Do not warn about format strings that are indexed string literals.

Summary:
The warning for a format string not being a string literal and therefore
being potentially insecure is overly strict for indices into string
literals. This fix checks if the index into the string literal is
precomputable. If that's the case it will check if the suffix of that
string literal is a valid format string string literal. It will still
issue the aforementioned warning for out of range indices into the
string literal.

Patch by Meike Baumgärtner (meikeb)

Reviewers: rsmith

Subscribers: srhines, cfe-commits

Differential Revision: https://reviews.llvm.org/D24584

llvm-svn: 281686
This commit is contained in:
Stephen Hines 2016-09-16 01:07:04 +00:00
parent b53b62eb69
commit 648c369ef2
2 changed files with 201 additions and 18 deletions

View File

@ -3850,7 +3850,95 @@ enum StringLiteralCheckType {
};
} // end anonymous namespace
static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
BinaryOperatorKind BinOpKind,
bool AddendIsRight) {
unsigned BitWidth = Offset.getBitWidth();
unsigned AddendBitWidth = Addend.getBitWidth();
// There might be negative interim results.
if (Addend.isUnsigned()) {
Addend = Addend.zext(++AddendBitWidth);
Addend.setIsSigned(true);
}
// Adjust the bit width of the APSInts.
if (AddendBitWidth > BitWidth) {
Offset = Offset.sext(AddendBitWidth);
BitWidth = AddendBitWidth;
} else if (BitWidth > AddendBitWidth) {
Addend = Addend.sext(BitWidth);
}
bool Ov = false;
llvm::APSInt ResOffset = Offset;
if (BinOpKind == BO_Add)
ResOffset = Offset.sadd_ov(Addend, Ov);
else {
assert(AddendIsRight && BinOpKind == BO_Sub &&
"operator must be add or sub with addend on the right");
ResOffset = Offset.ssub_ov(Addend, Ov);
}
// We add an offset to a pointer here so we should support an offset as big as
// possible.
if (Ov) {
assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
Offset.sext(2 * BitWidth);
sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
return;
}
Offset = ResOffset;
}
namespace {
// This is a wrapper class around StringLiteral to support offsetted string
// literals as format strings. It takes the offset into account when returning
// the string and its length or the source locations to display notes correctly.
class FormatStringLiteral {
const StringLiteral *FExpr;
int64_t Offset;
public:
FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
: FExpr(fexpr), Offset(Offset) {}
StringRef getString() const {
return FExpr->getString().drop_front(Offset);
}
unsigned getByteLength() const {
return FExpr->getByteLength() - getCharByteWidth() * Offset;
}
unsigned getLength() const { return FExpr->getLength() - Offset; }
unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
QualType getType() const { return FExpr->getType(); }
bool isAscii() const { return FExpr->isAscii(); }
bool isWide() const { return FExpr->isWide(); }
bool isUTF8() const { return FExpr->isUTF8(); }
bool isUTF16() const { return FExpr->isUTF16(); }
bool isUTF32() const { return FExpr->isUTF32(); }
bool isPascal() const { return FExpr->isPascal(); }
SourceLocation getLocationOfByte(
unsigned ByteNo, const SourceManager &SM, const LangOptions &Features,
const TargetInfo &Target, unsigned *StartToken = nullptr,
unsigned *StartTokenByteOffset = nullptr) const {
return FExpr->getLocationOfByte(ByteNo + Offset, SM, Features, Target,
StartToken, StartTokenByteOffset);
}
SourceLocation getLocStart() const LLVM_READONLY {
return FExpr->getLocStart().getLocWithOffset(Offset);
}
SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); }
};
} // end anonymous namespace
static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args,
bool HasVAListArg, unsigned format_idx,
@ -3871,8 +3959,11 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
unsigned firstDataArg, Sema::FormatStringType Type,
Sema::VariadicCallType CallType, bool InFunctionCall,
llvm::SmallBitVector &CheckedVarArgs,
UncoveredArgHandler &UncoveredArg) {
UncoveredArgHandler &UncoveredArg,
llvm::APSInt Offset) {
tryAgain:
assert(Offset.isSigned() && "invalid offset");
if (E->isTypeDependent() || E->isValueDependent())
return SLCT_NotALiteral;
@ -3906,6 +3997,10 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
CheckLeft = false;
}
// We need to maintain the offsets for the right and the left hand side
// separately to check if every possible indexed expression is a valid
// string literal. They might have different offsets for different string
// literals in the end.
StringLiteralCheckType Left;
if (!CheckLeft)
Left = SLCT_UncheckedLiteral;
@ -3913,16 +4008,17 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
Left = checkFormatStringExpr(S, C->getTrueExpr(), Args,
HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall,
CheckedVarArgs, UncoveredArg);
if (Left == SLCT_NotALiteral || !CheckRight)
CheckedVarArgs, UncoveredArg, Offset);
if (Left == SLCT_NotALiteral || !CheckRight) {
return Left;
}
}
StringLiteralCheckType Right =
checkFormatStringExpr(S, C->getFalseExpr(), Args,
HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall, CheckedVarArgs,
UncoveredArg);
UncoveredArg, Offset);
return (CheckLeft && Left < Right) ? Left : Right;
}
@ -3975,8 +4071,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
return checkFormatStringExpr(S, Init, Args,
HasVAListArg, format_idx,
firstDataArg, Type, CallType,
/*InFunctionCall*/false, CheckedVarArgs,
UncoveredArg);
/*InFunctionCall*/ false, CheckedVarArgs,
UncoveredArg, Offset);
}
}
@ -4031,7 +4127,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
return checkFormatStringExpr(S, Arg, Args,
HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall,
CheckedVarArgs, UncoveredArg);
CheckedVarArgs, UncoveredArg, Offset);
} else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
unsigned BuiltinID = FD->getBuiltinID();
if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
@ -4041,7 +4137,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
HasVAListArg, format_idx,
firstDataArg, Type, CallType,
InFunctionCall, CheckedVarArgs,
UncoveredArg);
UncoveredArg, Offset);
}
}
}
@ -4058,7 +4154,13 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
StrE = cast<StringLiteral>(E);
if (StrE) {
CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,
if (Offset.isNegative() || Offset > StrE->getLength()) {
// TODO: It would be better to have an explicit warning for out of
// bounds literals.
return SLCT_NotALiteral;
}
FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue());
CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx,
firstDataArg, Type, InFunctionCall, CallType,
CheckedVarArgs, UncoveredArg);
return SLCT_CheckedLiteral;
@ -4066,6 +4168,50 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
return SLCT_NotALiteral;
}
case Stmt::BinaryOperatorClass: {
llvm::APSInt LResult;
llvm::APSInt RResult;
const BinaryOperator *BinOp = cast<BinaryOperator>(E);
// A string literal + an int offset is still a string literal.
if (BinOp->isAdditiveOp()) {
bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context);
bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context);
if (LIsInt != RIsInt) {
BinaryOperatorKind BinOpKind = BinOp->getOpcode();
if (LIsInt) {
if (BinOpKind == BO_Add) {
sumOffsets(Offset, LResult, BinOpKind, RIsInt);
E = BinOp->getRHS();
goto tryAgain;
}
} else {
sumOffsets(Offset, RResult, BinOpKind, RIsInt);
E = BinOp->getLHS();
goto tryAgain;
}
}
return SLCT_NotALiteral;
}
}
case Stmt::UnaryOperatorClass: {
const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
auto ASE = dyn_cast<ArraySubscriptExpr>(UnaOp->getSubExpr());
if (UnaOp->getOpcode() == clang::UO_AddrOf && ASE) {
llvm::APSInt IndexResult;
if (ASE->getRHS()->EvaluateAsInt(IndexResult, S.Context)) {
sumOffsets(Offset, IndexResult, BO_Add, /*RHS is int*/ true);
E = ASE->getBase();
goto tryAgain;
}
}
return SLCT_NotALiteral;
}
default:
return SLCT_NotALiteral;
@ -4132,8 +4278,9 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
StringLiteralCheckType CT =
checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg,
format_idx, firstDataArg, Type, CallType,
/*IsFunctionCall*/true, CheckedVarArgs,
UncoveredArg);
/*IsFunctionCall*/ true, CheckedVarArgs,
UncoveredArg,
/*no string offset*/ llvm::APSInt(64, false) = 0);
// Generate a diagnostic where an uncovered argument is detected.
if (UncoveredArg.hasUncoveredArg()) {
@ -4189,7 +4336,7 @@ namespace {
class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
protected:
Sema &S;
const StringLiteral *FExpr;
const FormatStringLiteral *FExpr;
const Expr *OrigFormatExpr;
const unsigned FirstDataArg;
const unsigned NumDataArgs;
@ -4206,7 +4353,7 @@ protected:
UncoveredArgHandler &UncoveredArg;
public:
CheckFormatHandler(Sema &s, const StringLiteral *fexpr,
CheckFormatHandler(Sema &s, const FormatStringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, const char *beg, bool hasVAListArg,
ArrayRef<const Expr *> Args,
@ -4306,7 +4453,8 @@ getSpecifierRange(const char *startSpecifier, unsigned specifierLen) {
}
SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) {
return S.getLocationOfStringLiteralByte(FExpr, x - Beg);
return FExpr->getLocationOfByte(x - Beg, S.getSourceManager(),
S.getLangOpts(), S.Context.getTargetInfo());
}
void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier,
@ -4645,7 +4793,7 @@ class CheckPrintfHandler : public CheckFormatHandler {
bool ObjCContext;
public:
CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
CheckPrintfHandler(Sema &s, const FormatStringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, bool isObjC,
const char *beg, bool hasVAListArg,
@ -5432,7 +5580,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
namespace {
class CheckScanfHandler : public CheckFormatHandler {
public:
CheckScanfHandler(Sema &s, const StringLiteral *fexpr,
CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, const char *beg, bool hasVAListArg,
ArrayRef<const Expr *> Args,
@ -5603,7 +5751,7 @@ bool CheckScanfHandler::HandleScanfSpecifier(
return true;
}
static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args,
bool HasVAListArg, unsigned format_idx,

View File

@ -652,3 +652,38 @@ void test_format_security_pos(char* string) {
// expected-note@-1{{treat the string as an argument to avoid this}}
}
#pragma GCC diagnostic warning "-Wformat-nonliteral"
void test_char_pointer_arithmetic(int b) {
const char s1[] = "string";
const char s2[] = "%s string";
printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}}
// expected-note@-1{{treat the string as an argument to avoid this}}
printf(s1 + 2); // no-warning
printf(s2 + 2); // no-warning
const char s3[] = "%s string";
printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}}
// expected-note@-2{{format string is defined here}}
printf(2 + s2); // no-warning
printf(6 + s2 - 2); // no-warning
printf(2 + (b ? s1 : s2)); // no-warning
const char s5[] = "string %s";
printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}}
// expected-note@-2{{format string is defined here}}
printf(2 + (b ? s2 : s5), ""); // no-warning
printf(2 + (b ? s1 : s2 - 2), ""); // no-warning
const char s6[] = "%s string";
printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}}
// expected-note@-2{{format string is defined here}}
printf(1 ? s2 + 2 : s2); // no-warning
printf(0 ? s2 : s2 + 2); // no-warning
printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}}
const char s7[] = "%s string %s %s";
printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}}
// expected-note@-2{{format string is defined here}}
}