[clang-tidy] Reduce false-positive ratio in misc-suspicious-missing-comma check.

Summary:
This patch is adding detection of common string literal patterns
that should not trigger warnings.

  [*] Add a limit on the number of concatenated token,
  [*] Add support for parenthese sequence of tokens,
  [*] Add detection of valid indentation.

As an example, this code will no longer trigger a warning:
```
const char* Array[] = {
  "first literal"
    "indented literal"
    "indented literal",
  "second literal",
  [...]
```

Reviewers: alexfh

Subscribers: cfe-commits

Differential Revision: http://reviews.llvm.org/D18695

llvm-svn: 265303
This commit is contained in:
Etienne Bergeron 2016-04-04 15:46:38 +00:00
parent b3c2764f89
commit 1eec3f01f0
3 changed files with 103 additions and 7 deletions

View File

@ -19,8 +19,51 @@ namespace misc {
namespace {
AST_MATCHER(StringLiteral, isConcatenatedLiteral) {
return Node.getNumConcatenated() > 1;
bool isConcatenatedLiteralsOnPurpose(ASTContext* Ctx,
const StringLiteral* Lit) {
// String literals surrounded by parentheses are assumed to be on purpose.
// i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] };
auto Parents = Ctx->getParents(*Lit);
if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr)
return true;
// Appropriately indented string literals are assumed to be on purpose.
// The following frequent indentation is accepted:
// const char* Array[] = {
// "first literal"
// "indented literal"
// "indented literal",
// "second literal",
// [...]
// };
const SourceManager& SM = Ctx->getSourceManager();
bool IndentedCorrectly = true;
SourceLocation FirstToken = Lit->getStrTokenLoc(0);
FileID BaseFID = SM.getFileID(FirstToken);
unsigned int BaseIndent = SM.getSpellingColumnNumber(FirstToken);
unsigned int BaseLine = SM.getSpellingLineNumber(FirstToken);
for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++ TokNum) {
SourceLocation Token = Lit->getStrTokenLoc(TokNum);
FileID FID = SM.getFileID(Token);
unsigned int Indent = SM.getSpellingColumnNumber(Token);
unsigned int Line = SM.getSpellingLineNumber(Token);
if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) {
IndentedCorrectly = false;
break;
}
}
if (IndentedCorrectly)
return true;
// There is no pattern recognized by the checker, assume it's not on purpose.
return false;
}
AST_MATCHER_P(StringLiteral, isConcatenatedLiteral,
unsigned, MaxConcatenatedTokens) {
return Node.getNumConcatenated() > 1 &&
Node.getNumConcatenated() < MaxConcatenatedTokens &&
!isConcatenatedLiteralsOnPurpose(&Finder->getASTContext(), &Node);
}
} // namespace
@ -29,17 +72,19 @@ SuspiciousMissingCommaCheck::SuspiciousMissingCommaCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
SizeThreshold(Options.get("SizeThreshold", 5U)),
RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))) {}
RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))),
MaxConcatenatedTokens(Options.get("MaxConcatenatedTokens", 5U)) {}
void SuspiciousMissingCommaCheck::storeOptions(
ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "SizeThreshold", SizeThreshold);
Options.store(Opts, "RatioThreshold", std::to_string(RatioThreshold));
Options.store(Opts, "MaxConcatenatedTokens", MaxConcatenatedTokens);
}
void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
const auto ConcatenatedStringLiteral =
stringLiteral(isConcatenatedLiteral()).bind("str");
stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind("str");
const auto StringsInitializerList =
initListExpr(hasType(constantArrayType()),
@ -51,9 +96,10 @@ void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
void SuspiciousMissingCommaCheck::check(
const MatchFinder::MatchResult &Result) {
const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>("list");
const auto *ConcatenatedLiteral = Result.Nodes.getNodeAs<Expr>("str");
const auto *ConcatenatedLiteral =
Result.Nodes.getNodeAs<StringLiteral>("str");
assert(InitializerList && ConcatenatedLiteral);
// Skip small arrays as they often generate false-positive.
unsigned int Size = InitializerList->getNumInits();
if (Size < SizeThreshold) return;

View File

@ -32,6 +32,8 @@ private:
const unsigned SizeThreshold;
// Maximal threshold ratio of suspicious string literals to be considered.
const double RatioThreshold;
// Maximal number of concatenated tokens.
const unsigned MaxConcatenatedTokens;
};
} // namespace misc

View File

@ -15,7 +15,8 @@ const wchar_t* Colors[] = {
L"Red", L"Yellow", L"Blue", L"Green", L"Purple", L"Rose", L"White", L"Black"
};
// The following array should not trigger any warnings.
// The following array should not trigger any warnings. There is more than 5
// elements, but they are all concatenated string literals.
const char* HttpCommands[] = {
"GET / HTTP/1.0\r\n"
"\r\n",
@ -26,9 +27,56 @@ const char* HttpCommands[] = {
"GET /favicon.ico HTTP/1.0\r\n"
"header: dummy"
"\r\n",
"GET /index.html-en HTTP/1.0\r\n"
"\r\n",
"GET /index.html-fr HTTP/1.0\r\n"
"\r\n",
"GET /index.html-es HTTP/1.0\r\n"
"\r\n",
};
// This array is too small to trigger a warning.
const char* SmallArray[] = {
"a" "b", "c"
};
// Parentheses should be enough to avoid warnings.
const char* ParentheseArray[] = {
("a" "b"), "c",
("d"
"e"
"f"),
"g", "h", "i", "j", "k", "l"
};
// Indentation should be enough to avoid warnings.
const char* CorrectlyIndentedArray[] = {
"This is a long message "
"which is spanning over multiple lines."
"And this should be fine.",
"a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l"
};
const char* IncorrectlyIndentedArray[] = {
"This is a long message "
"which is spanning over multiple lines."
"And this should be fine.",
"a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l"
};
// CHECK-MESSAGES: :[[@LINE-6]]:3: warning: suspicious string literal, probably missing a comma [misc-suspicious-missing-comma]
const char* TooManyConcatenatedTokensArray[] = {
"Dummy line",
"Dummy line",
"a" "b" "c" "d" "e" "f",
"g" "h" "i" "j" "k" "l",
"Dummy line",
"Dummy line",
"Dummy line",
"Dummy line",
};