[clang-tidy] add new checker for string literal with NUL character.
Summary: This patch adds the support for detecting suspicious string literals and their //incorrect// usage. The following example shows a incorrect character escaping leading to an embedded NUL character. ``` std::string str = "\0x42"; // Should be "\x42". ``` The patch also add detection of truncated literal when a literal is passed to a string constructor. Reviewers: hokein, alexfh Subscribers: LegalizeAdulthood, bcraig, Eugene.Zelenko, bkramer, cfe-commits Differential Revision: http://reviews.llvm.org/D18783 llvm-svn: 265691
This commit is contained in:
parent
3802c4af59
commit
a5fd19ba1e
|
@ -23,6 +23,7 @@ add_clang_library(clangTidyMiscModule
|
||||||
SizeofContainerCheck.cpp
|
SizeofContainerCheck.cpp
|
||||||
StaticAssertCheck.cpp
|
StaticAssertCheck.cpp
|
||||||
StringIntegerAssignmentCheck.cpp
|
StringIntegerAssignmentCheck.cpp
|
||||||
|
StringLiteralWithEmbeddedNulCheck.cpp
|
||||||
SuspiciousMissingCommaCheck.cpp
|
SuspiciousMissingCommaCheck.cpp
|
||||||
SuspiciousSemicolonCheck.cpp
|
SuspiciousSemicolonCheck.cpp
|
||||||
SwappedArgumentsCheck.cpp
|
SwappedArgumentsCheck.cpp
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include "SizeofContainerCheck.h"
|
#include "SizeofContainerCheck.h"
|
||||||
#include "StaticAssertCheck.h"
|
#include "StaticAssertCheck.h"
|
||||||
#include "StringIntegerAssignmentCheck.h"
|
#include "StringIntegerAssignmentCheck.h"
|
||||||
|
#include "StringLiteralWithEmbeddedNulCheck.h"
|
||||||
#include "SuspiciousMissingCommaCheck.h"
|
#include "SuspiciousMissingCommaCheck.h"
|
||||||
#include "SuspiciousSemicolonCheck.h"
|
#include "SuspiciousSemicolonCheck.h"
|
||||||
#include "SwappedArgumentsCheck.h"
|
#include "SwappedArgumentsCheck.h"
|
||||||
|
@ -89,6 +90,8 @@ public:
|
||||||
"misc-static-assert");
|
"misc-static-assert");
|
||||||
CheckFactories.registerCheck<StringIntegerAssignmentCheck>(
|
CheckFactories.registerCheck<StringIntegerAssignmentCheck>(
|
||||||
"misc-string-integer-assignment");
|
"misc-string-integer-assignment");
|
||||||
|
CheckFactories.registerCheck<StringLiteralWithEmbeddedNulCheck>(
|
||||||
|
"misc-string-literal-with-embedded-nul");
|
||||||
CheckFactories.registerCheck<SuspiciousMissingCommaCheck>(
|
CheckFactories.registerCheck<SuspiciousMissingCommaCheck>(
|
||||||
"misc-suspicious-missing-comma");
|
"misc-suspicious-missing-comma");
|
||||||
CheckFactories.registerCheck<SuspiciousSemicolonCheck>(
|
CheckFactories.registerCheck<SuspiciousSemicolonCheck>(
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
//===--- StringLiteralWithEmbeddedNulCheck.cpp - clang-tidy----------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "StringLiteralWithEmbeddedNulCheck.h"
|
||||||
|
#include "clang/AST/ASTContext.h"
|
||||||
|
#include "clang/ASTMatchers/ASTMatchFinder.h"
|
||||||
|
|
||||||
|
using namespace clang::ast_matchers;
|
||||||
|
|
||||||
|
namespace clang {
|
||||||
|
namespace tidy {
|
||||||
|
namespace misc {
|
||||||
|
|
||||||
|
AST_MATCHER(StringLiteral, containsNul) {
|
||||||
|
for (size_t i = 0; i < Node.getLength(); ++i)
|
||||||
|
if (Node.getCodeUnit(i) == '\0')
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void StringLiteralWithEmbeddedNulCheck::registerMatchers(MatchFinder *Finder) {
|
||||||
|
// Match a string that contains embedded NUL character. Extra-checks are
|
||||||
|
// applied in |check| to find incorectly escaped characters.
|
||||||
|
Finder->addMatcher(stringLiteral(containsNul()).bind("strlit"), this);
|
||||||
|
|
||||||
|
// The remaining checks only apply to C++.
|
||||||
|
if (!getLangOpts().CPlusPlus)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const auto StrLitWithNul =
|
||||||
|
ignoringParenImpCasts(stringLiteral(containsNul()).bind("truncated"));
|
||||||
|
|
||||||
|
// Match string constructor.
|
||||||
|
const auto StringConstructorExpr = expr(anyOf(
|
||||||
|
cxxConstructExpr(argumentCountIs(1),
|
||||||
|
hasDeclaration(cxxMethodDecl(hasName("basic_string")))),
|
||||||
|
// If present, the second argument is the alloc object which must not
|
||||||
|
// be present explicitly.
|
||||||
|
cxxConstructExpr(argumentCountIs(2),
|
||||||
|
hasDeclaration(cxxMethodDecl(hasName("basic_string"))),
|
||||||
|
hasArgument(1, cxxDefaultArgExpr()))));
|
||||||
|
|
||||||
|
// Detect passing a suspicious string literal to a string constructor.
|
||||||
|
// example: std::string str = "abc\0def";
|
||||||
|
Finder->addMatcher(
|
||||||
|
cxxConstructExpr(StringConstructorExpr, hasArgument(0, StrLitWithNul)),
|
||||||
|
this);
|
||||||
|
|
||||||
|
// Detect passing a suspicious string literal through an overloaded operator.
|
||||||
|
Finder->addMatcher(cxxOperatorCallExpr(hasAnyArgument(StrLitWithNul)), this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void StringLiteralWithEmbeddedNulCheck::check(
|
||||||
|
const MatchFinder::MatchResult &Result) {
|
||||||
|
if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
|
||||||
|
for (size_t Offset = 0, Length = SL->getLength(); Offset < Length;
|
||||||
|
++Offset) {
|
||||||
|
// Find a sequence of character like "\0x12".
|
||||||
|
if (Offset + 3 < Length && SL->getCodeUnit(Offset) == '\0' &&
|
||||||
|
SL->getCodeUnit(Offset + 1) == 'x' &&
|
||||||
|
isDigit(SL->getCodeUnit(Offset + 2)) &&
|
||||||
|
isDigit(SL->getCodeUnit(Offset + 3))) {
|
||||||
|
diag(SL->getLocStart(), "suspicious embedded NUL character");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("truncated")) {
|
||||||
|
diag(SL->getLocStart(),
|
||||||
|
"truncated string literal with embedded NUL character");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace misc
|
||||||
|
} // namespace tidy
|
||||||
|
} // namespace clang
|
|
@ -0,0 +1,35 @@
|
||||||
|
//===--- StringLiteralWithEmbeddedNulCheck.h - clang-tidy--------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H
|
||||||
|
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H
|
||||||
|
|
||||||
|
#include "../ClangTidy.h"
|
||||||
|
|
||||||
|
namespace clang {
|
||||||
|
namespace tidy {
|
||||||
|
namespace misc {
|
||||||
|
|
||||||
|
/// Find suspicious string literals with embedded NUL characters.
|
||||||
|
///
|
||||||
|
/// For the user-facing documentation see:
|
||||||
|
/// http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html
|
||||||
|
class StringLiteralWithEmbeddedNulCheck : public ClangTidyCheck {
|
||||||
|
public:
|
||||||
|
StringLiteralWithEmbeddedNulCheck(StringRef Name, ClangTidyContext *Context)
|
||||||
|
: ClangTidyCheck(Name, Context) {}
|
||||||
|
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
|
||||||
|
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace misc
|
||||||
|
} // namespace tidy
|
||||||
|
} // namespace clang
|
||||||
|
|
||||||
|
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H
|
|
@ -97,6 +97,12 @@ identified. The improvements since the 3.8 release include:
|
||||||
Warns when there is a explicit redundant cast of a calculation result to a
|
Warns when there is a explicit redundant cast of a calculation result to a
|
||||||
bigger type.
|
bigger type.
|
||||||
|
|
||||||
|
- New `misc-string-literal-with-embedded-nul
|
||||||
|
<http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html>`_ check
|
||||||
|
|
||||||
|
Warns about suspicious NUL character in string literals which may lead to
|
||||||
|
truncation or invalid character escaping.
|
||||||
|
|
||||||
- New `misc-suspicious-missing-comma
|
- New `misc-suspicious-missing-comma
|
||||||
<http://clang.llvm.org/extra/clang-tidy/checks/misc-suspicious-missing-comma.html>`_ check
|
<http://clang.llvm.org/extra/clang-tidy/checks/misc-suspicious-missing-comma.html>`_ check
|
||||||
|
|
||||||
|
|
|
@ -66,6 +66,7 @@ Clang-Tidy Checks
|
||||||
misc-sizeof-container
|
misc-sizeof-container
|
||||||
misc-static-assert
|
misc-static-assert
|
||||||
misc-string-integer-assignment
|
misc-string-integer-assignment
|
||||||
|
misc-string-literal-with-embedded-nul
|
||||||
misc-suspicious-missing-comma
|
misc-suspicious-missing-comma
|
||||||
misc-suspicious-semicolon
|
misc-suspicious-semicolon
|
||||||
misc-swapped-arguments
|
misc-swapped-arguments
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
.. title:: clang-tidy - misc-string-literal-with-embedded-nul
|
||||||
|
|
||||||
|
misc-string-literal-with-embedded-nul
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
Finds occurences of string literal with embedded NUL character and validates
|
||||||
|
their usage.
|
||||||
|
|
||||||
|
|
||||||
|
Invalid escaping
|
||||||
|
^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Special characters can be escaped within a string literal by using their
|
||||||
|
hexadecimal encoding like ``\x42``. A common mistake is to escape them
|
||||||
|
like this ``\0x42`` where the ``\0`` stands for the NUL character.
|
||||||
|
|
||||||
|
.. code:: c++
|
||||||
|
|
||||||
|
const char* Example[] = "Invalid character: \0x12 should be \x12";
|
||||||
|
const char* Bytes[] = "\x03\0x02\0x01\0x00\0xFF\0xFF\0xFF";
|
||||||
|
|
||||||
|
|
||||||
|
Truncated literal
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
String-like classes can manipulate strings with embedded NUL as they are
|
||||||
|
keeping track of the bytes and the length. This is not the case for a
|
||||||
|
``char*`` (NUL-terminated) string.
|
||||||
|
|
||||||
|
A common mistake is to pass a string-literal with embedded NUL to a string
|
||||||
|
constructor expecting a NUL-terminated string. The bytes after the first NUL
|
||||||
|
character are truncated.
|
||||||
|
|
||||||
|
.. code:: c++
|
||||||
|
|
||||||
|
std::string str("abc\0def"); // "def" is truncated
|
||||||
|
str += "\0"; // This statement is doing nothing
|
||||||
|
if (str == "\0abc") return; // This expression is always true
|
|
@ -0,0 +1,85 @@
|
||||||
|
// RUN: %check_clang_tidy %s misc-string-literal-with-embedded-nul %t
|
||||||
|
|
||||||
|
namespace std {
|
||||||
|
template <typename T>
|
||||||
|
class allocator {};
|
||||||
|
template <typename T>
|
||||||
|
class char_traits {};
|
||||||
|
template <typename C, typename T, typename A>
|
||||||
|
struct basic_string {
|
||||||
|
typedef basic_string<C, T, A> _Type;
|
||||||
|
basic_string();
|
||||||
|
basic_string(const C *p, const A &a = A());
|
||||||
|
|
||||||
|
_Type& operator+=(const C* s);
|
||||||
|
_Type& operator=(const C* s);
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef basic_string<char, std::char_traits<char>, std::allocator<char>> string;
|
||||||
|
typedef basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t>> wstring;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const std::string&, const char*);
|
||||||
|
bool operator==(const char*, const std::string&);
|
||||||
|
|
||||||
|
|
||||||
|
const char Valid[] = "This is valid \x12.";
|
||||||
|
const char Strange[] = "This is strange \0x12 and must be fixed";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: suspicious embedded NUL character [misc-string-literal-with-embedded-nul]
|
||||||
|
|
||||||
|
const char textA[] = "\0x01\0x02\0x03\0x04";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious embedded NUL character
|
||||||
|
const wchar_t textW[] = L"\0x01\0x02\0x03\0x04";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: suspicious embedded NUL character
|
||||||
|
|
||||||
|
const char A[] = "\0";
|
||||||
|
const char B[] = "\0x";
|
||||||
|
const char C[] = "\0x1";
|
||||||
|
const char D[] = "\0x11";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character
|
||||||
|
|
||||||
|
const wchar_t E[] = L"\0";
|
||||||
|
const wchar_t F[] = L"\0x";
|
||||||
|
const wchar_t G[] = L"\0x1";
|
||||||
|
const wchar_t H[] = L"\0x11";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: suspicious embedded NUL character
|
||||||
|
|
||||||
|
const char I[] = "\000\000\000\000";
|
||||||
|
const char J[] = "\0\0\0\0\0\0";
|
||||||
|
const char K[] = "";
|
||||||
|
|
||||||
|
const char L[] = "\0x12" "\0x12" "\0x12" "\0x12";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character
|
||||||
|
|
||||||
|
void TestA() {
|
||||||
|
std::string str1 = "abc\0def";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal
|
||||||
|
std::string str2 = "\0";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal
|
||||||
|
std::string str3("\0");
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal
|
||||||
|
std::string str4{"\x00\x01\x02\x03"};
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal
|
||||||
|
|
||||||
|
std::string str;
|
||||||
|
str += "abc\0def";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: truncated string literal
|
||||||
|
str = "abc\0def";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:9: warning: truncated string literal
|
||||||
|
|
||||||
|
if (str == "abc\0def") return;
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: truncated string literal
|
||||||
|
if ("abc\0def" == str) return;
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: truncated string literal
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestW() {
|
||||||
|
std::wstring str1 = L"abc\0def";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal
|
||||||
|
std::wstring str2 = L"\0";
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal
|
||||||
|
std::wstring str3(L"\0");
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal
|
||||||
|
std::wstring str4{L"\x00\x01\x02\x03"};
|
||||||
|
// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal
|
||||||
|
}
|
Loading…
Reference in New Issue