clang-tidy: Add check modernize-raw-string-literal

llvm-svn: 264539
This commit is contained in:
Richard Thomson 2016-03-27 16:43:44 +00:00
parent 569af59b14
commit 8930aab886
9 changed files with 372 additions and 1 deletions

View File

@ -7,6 +7,7 @@ add_clang_library(clangTidyModernizeModule
MakeUniqueCheck.cpp
ModernizeTidyModule.cpp
PassByValueCheck.cpp
RawStringLiteralCheck.cpp
RedundantVoidArgCheck.cpp
ReplaceAutoPtrCheck.cpp
ShrinkToFitCheck.cpp

View File

@ -14,6 +14,7 @@
#include "LoopConvertCheck.h"
#include "MakeUniqueCheck.h"
#include "PassByValueCheck.h"
#include "RawStringLiteralCheck.h"
#include "RedundantVoidArgCheck.h"
#include "ReplaceAutoPtrCheck.h"
#include "ShrinkToFitCheck.h"
@ -36,6 +37,8 @@ public:
CheckFactories.registerCheck<LoopConvertCheck>("modernize-loop-convert");
CheckFactories.registerCheck<MakeUniqueCheck>("modernize-make-unique");
CheckFactories.registerCheck<PassByValueCheck>("modernize-pass-by-value");
CheckFactories.registerCheck<RawStringLiteralCheck>(
"modernize-raw-string-literal");
CheckFactories.registerCheck<RedundantVoidArgCheck>(
"modernize-redundant-void-arg");
CheckFactories.registerCheck<ReplaceAutoPtrCheck>(

View File

@ -0,0 +1,140 @@
//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "RawStringLiteralCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Lex/Lexer.h"
using namespace clang::ast_matchers;
namespace clang {
namespace tidy {
namespace modernize {
namespace {
bool containsEscapes(StringRef HayStack, StringRef Escapes) {
size_t BackSlash = HayStack.find('\\');
if (BackSlash == StringRef::npos)
return false;
while (BackSlash != StringRef::npos) {
if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos)
return false;
BackSlash = HayStack.find('\\', BackSlash + 2);
}
return true;
}
bool isRawStringLiteral(StringRef Text) {
// Already a raw string literal if R comes before ".
const size_t QuotePos = Text.find('"');
assert(QuotePos != StringRef::npos);
return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
}
bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
const StringLiteral *Literal) {
// FIXME: Handle L"", u8"", u"" and U"" literals.
if (!Literal->isAscii())
return false;
StringRef Bytes = Literal->getBytes();
// Non-printing characters disqualify this literal:
// \007 = \a bell
// \010 = \b backspace
// \011 = \t horizontal tab
// \012 = \n new line
// \013 = \v vertical tab
// \014 = \f form feed
// \015 = \r carriage return
// \177 = delete
if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a"
"\b\t\n\v\f\r\016\017"
"\020\021\022\023\024\025\026\027"
"\030\031\032\033\034\035\036\037"
"\177",
33)) != StringRef::npos)
return false;
CharSourceRange CharRange = Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(Literal->getSourceRange()),
*Result.SourceManager, Result.Context->getLangOpts());
StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
Result.Context->getLangOpts());
if (isRawStringLiteral(Text))
return false;
return containsEscapes(Text, R"('\"?x01)");
}
bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
return Bytes.find(Delimiter.empty()
? std::string(R"lit()")lit")
: (")" + Delimiter + R"(")")) != StringRef::npos;
}
std::string asRawStringLiteral(const StringLiteral *Literal,
const std::string &DelimiterStem) {
const StringRef Bytes = Literal->getBytes();
std::string Delimiter;
for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
}
if (Delimiter.empty())
return (R"(R"()" + Bytes + R"lit()")lit").str();
return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str();
}
} // namespace
RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
DelimiterStem(Options.get("DelimiterStem", "lit")) {}
void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) {
ClangTidyCheck::storeOptions(Options);
}
void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
Finder->addMatcher(stringLiteral().bind("lit"), this);
}
void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
// Raw string literals require C++11 or later.
if (!Result.Context->getLangOpts().CPlusPlus11)
return;
const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
if (Literal->getLocStart().isMacroID())
return;
if (containsEscapedCharacters(Result, Literal))
replaceWithRawStringLiteral(Result, Literal);
}
void RawStringLiteralCheck::replaceWithRawStringLiteral(
const MatchFinder::MatchResult &Result, const StringLiteral *Literal) {
CharSourceRange CharRange = Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(Literal->getSourceRange()),
*Result.SourceManager, Result.Context->getLangOpts());
diag(Literal->getLocStart(),
"escaped string literal can be written as a raw string literal")
<< FixItHint::CreateReplacement(
CharRange, asRawStringLiteral(Literal, DelimiterStem));
}
} // namespace modernize
} // namespace tidy
} // namespace clang

View File

@ -0,0 +1,45 @@
//===--- RawStringLiteralCheck.h - clang-tidy--------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
#include "../ClangTidy.h"
//#include <string>
namespace clang {
namespace tidy {
namespace modernize {
/// This check replaces string literals with escaped characters to
/// raw string literals.
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/modernize-raw-string-literal.html
class RawStringLiteralCheck : public ClangTidyCheck {
public:
RawStringLiteralCheck(StringRef Name, ClangTidyContext *Context);
void storeOptions(ClangTidyOptions::OptionMap &Options) override;
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
private:
void replaceWithRawStringLiteral(
const ast_matchers::MatchFinder::MatchResult &Result,
const StringLiteral *Literal);
std::string DelimiterStem;
};
} // namespace modernize
} // namespace tidy
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H

View File

@ -63,7 +63,10 @@ Improvements to ``clang-tidy``
explain them more clearly, and provide more accurate fix-its for the issues
identified. The improvements since the 3.8 release include:
- ...
- New ``modernize-raw-string-literal`` check
This check selectively replaces string literals containing escaped
characters with raw string literals.
Improvements to ``modularize``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -78,6 +78,7 @@ Clang-Tidy Checks
modernize-loop-convert
modernize-make-unique
modernize-pass-by-value
modernize-raw-string-literal
modernize-redundant-void-arg
modernize-replace-auto-ptr
modernize-shrink-to-fit

View File

@ -0,0 +1,46 @@
.. title:: clang-tidy - modernize-raw-string-literal
modernize-raw-string-literal
============================
This check selectively replaces string literals containing escaped characters
with raw string literals.
Example:
.. code-blocK:: c++
const char *const Quotes{"embedded \"quotes\""};
const char *const Paragraph{"Line one.\nLine two.\nLine three.\n"};
const char *const SingleLine{"Single line.\n"};
const char *const TrailingSpace{"Look here -> \n"};
const char *const Tab{"One\tTwo\n"};
const char *const Bell{"Hello!\a And welcome!"};
const char *const Path{"C:\\Program Files\\Vendor\\Application.exe"};
const char *const RegEx{"\\w\\([a-z]\\)"};
becomes
.. code-block:: c++
const char *const Quotes{R"(embedded "quotes")"};
const char *const Paragraph{"Line one.\nLine two.\nLine three.\n"};
const char *const SingleLine{"Single line.\n"};
const char *const TrailingSpace{"Look here -> \n"};
const char *const Tab{"One\tTwo\n"};
const char *const Bell{"Hello!\a And welcome!"};
const char *const Path{R"(C:\Program Files\Vendor\Application.exe)"};
const char *const RegEx{R"(\w\([a-z]\))"};
The presence of any of the following escapes can cause the string to be
converted to a raw string literal: ``\\``, ``\'``, ``\"``, ``\?``,
and octal or hexadecimal escapes for printable ASCII characters.
A string literal containing only escaped newlines is a common way of
writing lines of text output. Introducing physical newlines with raw
string literals in this case is likely to impede readability. These
string literals are left unchanged.
An escaped horizontal tab, form feed, or vertical tab prevents the string
literal from being converted. The presence of a horizontal tab, form feed or
vertical tab in source code is not visually obvious.

View File

@ -0,0 +1,9 @@
// RUN: %check_clang_tidy %s modernize-raw-string-literal %t -- -config='{CheckOptions: [{key: "modernize-raw-string-literal.DelimiterStem", value: "str"}]}' -- -std=c++11
char const *const ContainsSentinel{"who\\ops)\""};
// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const ContainsSentinel{R"str(who\ops)")str"};{{$}}
//char const *const ContainsDelim{"whoops)\")lit\""};
// CHECK-XMESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal
// CHECK-XFIXES: {{^}}char const *const ContainsDelim{R"lit1(whoops)")lit")lit1"};{{$}}

View File

@ -0,0 +1,123 @@
// RUN: %check_clang_tidy %s modernize-raw-string-literal %t
char const *const BackSlash("goink\\frob");
// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: escaped string literal can be written as a raw string literal [modernize-raw-string-literal]
// CHECK-FIXES: {{^}}char const *const BackSlash(R"(goink\frob)");{{$}}
char const *const PlainLiteral("plain literal");
// Non-printable ASCII characters.
char const *const Nul("goink\\\000");
char const *const Soh("goink\\\001");
char const *const Stx("goink\\\002");
char const *const Etx("goink\\\003");
char const *const Enq("goink\\\004");
char const *const Ack("goink\\\005");
char const *const Bell("goink\\\afrob");
char const *const BackSpace("goink\\\bfrob");
char const *const HorizontalTab("goink\\\tfrob");
char const *const NewLine("goink\nfrob");
char const *const VerticalTab("goink\\\vfrob");
char const *const FormFeed("goink\\\ffrob");
char const *const CarraigeReturn("goink\\\rfrob");
char const *const So("goink\\\016");
char const *const Si("goink\\\017");
char const *const Dle("goink\\\020");
char const *const Dc1("goink\\\021");
char const *const Dc2("goink\\\022");
char const *const Dc3("goink\\\023");
char const *const Dc4("goink\\\024");
char const *const Nak("goink\\\025");
char const *const Syn("goink\\\026");
char const *const Etb("goink\\\027");
char const *const Can("goink\\\030");
char const *const Em("goink\\\031");
char const *const Sub("goink\\\032");
char const *const Esc("goink\\\033");
char const *const Fs("goink\\\034");
char const *const Gs("goink\\\035");
char const *const Rs("goink\\\036");
char const *const Us("goink\\\037");
char const *const HexNonPrintable("\\\x03");
char const *const Delete("\\\177");
char const *const TrailingSpace("A line \\with space. \n");
char const *const TrailingNewLine("A single \\line.\n");
char const *const AlreadyRaw(R"(foobie\\bletch)");
char const *const UTF8Literal(u8"foobie\\bletch");
char const *const UTF8RawLiteral(u8R"(foobie\\bletch)");
char16_t const *const UTF16Literal(u"foobie\\bletch");
char16_t const *const UTF16RawLiteral(uR"(foobie\\bletch)");
char32_t const *const UTF32Literal(U"foobie\\bletch");
char32_t const *const UTF32RawLiteral(UR"(foobie\\bletch)");
wchar_t const *const WideLiteral(L"foobie\\bletch");
wchar_t const *const WideRawLiteral(LR"(foobie\\bletch)");
char const *const SingleQuote("goink\'frob");
// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal
// CHECK-XFIXES: {{^}}char const *const SingleQuote(R"(goink'frob)");{{$}}
char const *const DoubleQuote("goink\"frob");
// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const DoubleQuote(R"(goink"frob)");{{$}}
char const *const QuestionMark("goink\?frob");
// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const QuestionMark(R"(goink?frob)");{{$}}
char const *const RegEx("goink\\(one|two\\)\\\\\\?.*\\nfrob");
// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const RegEx(R"(goink\(one|two\)\\\?.*\nfrob)");{{$}}
char const *const Path("C:\\Program Files\\Vendor\\Application\\Application.exe");
// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const Path(R"(C:\Program Files\Vendor\Application\Application.exe)");{{$}}
char const *const ContainsSentinel("who\\ops)\"");
// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const ContainsSentinel(R"lit(who\ops)")lit");{{$}}
char const *const ContainsDelim("whoops)\")lit\"");
// CHECK-MESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const ContainsDelim(R"lit1(whoops)")lit")lit1");{{$}}
char const *const OctalPrintable("\100\\");
// CHECK-MESSAGES: :[[@LINE-1]]:34: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const OctalPrintable(R"(@\)");{{$}}
char const *const HexPrintable("\x40\\");
// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}}char const *const HexPrintable(R"(@\)");{{$}}
#define TRICK(arg_) #arg_
char const *const MacroBody = TRICK(foo\\bar);
#define HAT(rabbit_) #rabbit_ "foo\\bar"
char const *const StringizedMacroArgument = HAT(foo\\bar);
#define SUBST(lit_) lit_
char const *const MacroArgument = SUBST("foo\\bar");
// FIXME: We should be able to replace this string literal macro argument
template <typename T>
void fn(char const *const Arg) {
char const *const Str("foo\\bar");
// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}} char const *const Str(R"(foo\bar)");{{$}}
}
template <>
void fn<int>(char const *const Arg) {
char const *const Str("foo\\bar");
// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}} char const *const Str(R"(foo\bar)");{{$}}
}
void callFn() {
fn<int>("foo\\bar");
// CHECK-MESSAGES: :[[@LINE-1]]:11: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}} fn<int>(R"(foo\bar)");{{$}}
fn<double>("foo\\bar");
// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: {{.*}} can be written as a raw string literal
// CHECK-FIXES: {{^}} fn<double>(R"(foo\bar)");{{$}}
}