[sanitizer] Improve scanf interceptor

This a rewrite of the scanf parser. The new implementation is pretty close to
the spec, with a few shortcuts taken here and there. It is conservative, i.e.
it gives up parsing if it does not understand some part of the format string,
or runs into an ambiguous % spec. It does not handle some rarely used parts of
the spec, like %n$ - for now.

I'm also moving parser call to after the original *scanf function completes,
so that we can find out the store size of %s directive by the use of strlen()
on the target buffer.

llvm-svn: 174704
This commit is contained in:
Evgeniy Stepanov 2013-02-08 11:17:20 +00:00
parent acc33666f6
commit 87c43537df
3 changed files with 269 additions and 148 deletions

View File

@ -150,8 +150,11 @@ INTERCEPTOR(int, prctl, int option,
INTERCEPTOR(int, vscanf, const char *format, va_list ap) { // NOLINT
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, vscanf, format, ap);
scanf_common(ctx, format, ap);
va_list aq;
va_copy(aq, ap);
int res = REAL(vscanf)(format, ap); // NOLINT
scanf_common(ctx, format, aq);
va_end(aq);
return res;
}
@ -159,8 +162,11 @@ INTERCEPTOR(int, vsscanf, const char *str, const char *format, // NOLINT
va_list ap) {
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, vsscanf, str, format, ap);
scanf_common(ctx, format, ap);
va_list aq;
va_copy(aq, ap);
int res = REAL(vsscanf)(str, format, ap); // NOLINT
scanf_common(ctx, format, aq);
va_end(aq);
// FIXME: read of str
return res;
}
@ -169,8 +175,11 @@ INTERCEPTOR(int, vfscanf, void *stream, const char *format, // NOLINT
va_list ap) {
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, vfscanf, stream, format, ap);
scanf_common(ctx, format, ap);
va_list aq;
va_copy(aq, ap);
int res = REAL(vfscanf)(stream, format, ap); // NOLINT
scanf_common(ctx, format, aq);
va_end(aq);
return res;
}

View File

@ -8,90 +8,43 @@
//===----------------------------------------------------------------------===//
//
// Scanf implementation for use in *Sanitizer interceptors.
// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
// with a few common GNU extensions.
//
//===----------------------------------------------------------------------===//
#include <stdarg.h>
#ifdef _WIN32
#define va_copy(dst, src) ((dst) = (src))
#endif // _WIN32
#endif // _WIN32
struct ScanfSpec {
char c;
unsigned size;
struct ScanfDirective {
int argIdx; // argument index, or -1 of not specified ("%n$")
bool suppressed; // suppress assignment ("*")
int fieldWidth;
bool allocate; // allocate space ("m")
char lengthModifier[2];
char convSpecifier;
};
// One-letter specs.
static const ScanfSpec scanf_specs[] = {
{'s', 1}, // FIXME: This is incorrect, we should check the actual number
// of bytes written to the string.
{'c', sizeof(char)},
{'p', sizeof(void *)},
{'e', sizeof(float)},
{'E', sizeof(float)},
{'a', sizeof(float)},
{'f', sizeof(float)},
{'g', sizeof(float)},
{'d', sizeof(int)},
{'i', sizeof(int)},
{'o', sizeof(int)},
{'u', sizeof(int)},
{'x', sizeof(int)},
{'X', sizeof(int)},
{'n', sizeof(int)},
{'t', sizeof(PTRDIFF_T)},
{'z', sizeof(SIZE_T)},
{'j', sizeof(INTMAX_T)},
{'h', sizeof(short)}
};
static const unsigned scanf_specs_cnt =
sizeof(scanf_specs) / sizeof(scanf_specs[0]);
// %ll?, %L?, %q? specs
static const ScanfSpec scanf_llspecs[] = {
{'e', sizeof(long double)},
{'f', sizeof(long double)},
{'g', sizeof(long double)},
{'d', sizeof(long long)},
{'i', sizeof(long long)},
{'o', sizeof(long long)},
{'u', sizeof(long long)},
{'x', sizeof(long long)}
};
static const unsigned scanf_llspecs_cnt =
sizeof(scanf_llspecs) / sizeof(scanf_llspecs[0]);
// %l? specs
static const ScanfSpec scanf_lspecs[] = {
{'e', sizeof(double)},
{'f', sizeof(double)},
{'g', sizeof(double)},
{'d', sizeof(long)},
{'i', sizeof(long)},
{'o', sizeof(long)},
{'u', sizeof(long)},
{'x', sizeof(long)},
{'X', sizeof(long)},
};
static const unsigned scanf_lspecs_cnt =
sizeof(scanf_lspecs) / sizeof(scanf_lspecs[0]);
static unsigned match_spec(const struct ScanfSpec *spec, unsigned n, char c) {
for (unsigned i = 0; i < n; ++i)
if (spec[i].c == c)
return spec[i].size;
return 0;
static const char *parse_number(const char *p, int *out) {
*out = internal_atoll(p);
while (*p >= '0' && *p <= '9')
++p;
return p;
}
static void scanf_common(void *ctx, const char *format, va_list ap_const) {
va_list aq;
va_copy(aq, ap_const);
static bool char_is_one_of(char c, const char *s) {
return !!internal_strchr(s, c);
}
const char *p = format;
unsigned size;
// Parse scanf format string. If a valid directive in encountered, it is
// returned in dir. This function returns the pointer to the first
// unprocessed character, or 0 in case of error.
// In case of the end-of-string, a pointer to the closing \0 is returned.
static const char *scanf_parse_next(const char *p, ScanfDirective *dir) {
internal_memset(dir, 0, sizeof(*dir));
dir->argIdx = -1;
while (*p) {
if (*p != '%') {
@ -99,81 +52,224 @@ static void scanf_common(void *ctx, const char *format, va_list ap_const) {
continue;
}
++p;
if (*p == '*' || *p == '%' || *p == '\0') {
// FIXME: Bailing out for (p == "*") is wrong, we should parse the
// directive to the end.
if (*p != '\0')
++p;
continue;
}
unsigned field_width = 0;
if (*p >= '0' && *p <= '9') {
field_width = internal_atoll(p);
while (*p >= '0' && *p <= '9')
p++;
}
if (field_width > 0) {
// +1 for the \0 at the end.
if (*p == 's')
field_width++;
if (*p == 's' || *p == 'c') {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void*), field_width);
++p;
continue;
}
}
if (*p == '[') {
// Search for the closing bracket. It is ignored if it goes right after
// the opening bracket or after ^.
p++;
if (*p == ']') {
p++;
} else if (*p == '^' && *(p+1) == ']') {
p += 2;
}
while (*p != ']')
p++;
// +1 for the \0 at the end.
field_width++;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void*), field_width);
continue;
}
if (*p == 'L' || *p == 'q') {
// %%
if (*p == '%') {
++p;
size = match_spec(scanf_llspecs, scanf_llspecs_cnt, *p);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size);
continue;
}
if (*p == 'l') {
if (*p == '\0') {
return 0;
}
// %n$
if (*p >= '0' && *p <= '9') {
int number;
const char *q = parse_number(p, &number);
if (*q == '$') {
dir->argIdx = number;
p = q + 1;
}
// Otherwise, do not change p. This will be re-parsed later as the field
// width.
}
// *
if (*p == '*') {
dir->suppressed = true;
++p;
}
// Field width.
if (*p >= '0' && *p <= '9') {
p = parse_number(p, &dir->fieldWidth);
if (dir->fieldWidth <= 0)
return 0;
}
// m
if (*p == 'm') {
dir->allocate = true;
++p;
}
// Length modifier.
if (char_is_one_of(*p, "jztLq")) {
dir->lengthModifier[0] = *p;
++p;
} else if (*p == 'h') {
dir->lengthModifier[0] = 'h';
++p;
if (*p == 'h') {
dir->lengthModifier[1] = 'h';
++p;
}
} else if (*p == 'l') {
dir->lengthModifier[0] = 'l';
++p;
if (*p == 'l') {
dir->lengthModifier[1] = 'l';
++p;
size = match_spec(scanf_llspecs, scanf_llspecs_cnt, *p);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size);
continue;
} else {
size = match_spec(scanf_lspecs, scanf_lspecs_cnt, *p);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size);
continue;
}
}
if (*p == 'h' && *(p + 1) == 'h') {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), sizeof(char));
p += 2;
continue;
}
size = match_spec(scanf_specs, scanf_specs_cnt, *p);
if (size) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size);
// Conversion specifier.
dir->convSpecifier = *p++;
// Consume %[...] expression.
if (dir->convSpecifier == '[') {
if (*p == '^')
++p;
if (*p == ']')
++p;
while (*p && *p != ']')
++p;
if (*p == 0)
return 0; // unexpected end of string
// Consume the closing ']'.
++p;
continue;
}
break;
}
return p;
}
// Returns true if the character is an integer conversion specifier.
static bool scanf_is_integer_conv(char c) {
return char_is_one_of(c, "diouxXn");
}
// Returns true if the character is an floating point conversion specifier.
static bool scanf_is_float_conv(char c) {
return char_is_one_of(c, "AeEfFgG");
// NOTE: c == 'a' is ambiguous between POSIX and GNU and, therefore,
// unsupported.
}
// Returns string output character size for string-like conversions,
// or 0 if the conversion is invalid.
static int scanf_get_char_size(ScanfDirective *dir) {
if (char_is_one_of(dir->convSpecifier, "CS")) {
// wchar_t
return 0;
}
if (char_is_one_of(dir->convSpecifier, "cs[")) {
if (dir->lengthModifier[0] == 'l')
// wchar_t
return 0;
else if (dir->lengthModifier[0] == 0)
return sizeof(char);
else
return 0;
}
return 0;
}
enum ScanfStoreSize {
// Store size not known in advance; can be calculated as strlen() of the
// destination buffer.
SSS_STRLEN = -1,
// Invalid conversion specifier.
SSS_INVALID = 0
};
// Returns the store size of a scanf directive (if >0), or a value of
// ScanfStoreSize.
static int scanf_get_store_size(ScanfDirective *dir) {
if (scanf_is_integer_conv(dir->convSpecifier)) {
switch (dir->lengthModifier[0]) {
case 'h':
return dir->lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
case 'l':
return dir->lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
case 'L':
return sizeof(long long);
case 'j':
return sizeof(INTMAX_T);
case 'z':
return sizeof(SIZE_T);
case 't':
return sizeof(PTRDIFF_T);
case 0:
return sizeof(int);
default:
return SSS_INVALID;
}
}
va_end(aq);
if (scanf_is_float_conv(dir->convSpecifier)) {
switch (dir->lengthModifier[0]) {
case 'L':
case 'q':
return sizeof(long double);
case 'l':
return dir->lengthModifier[1] == 'l' ? sizeof(long double)
: sizeof(double);
case 0:
return sizeof(float);
default:
return SSS_INVALID;
}
}
if (char_is_one_of(dir->convSpecifier, "sS[")) {
unsigned charSize = scanf_get_char_size(dir);
if (charSize == 0)
return SSS_INVALID;
if (dir->fieldWidth == 0)
return SSS_STRLEN;
return (dir->fieldWidth + 1) * charSize;
}
if (char_is_one_of(dir->convSpecifier, "cC")) {
unsigned charSize = scanf_get_char_size(dir);
if (charSize == 0)
return SSS_INVALID;
if (dir->fieldWidth == 0)
return charSize;
return dir->fieldWidth * charSize;
}
if (dir->convSpecifier == 'p') {
if (dir->lengthModifier[1] != 0)
return SSS_INVALID;
return sizeof(void *);
}
return SSS_INVALID;
}
// Common part of *scanf interceptors.
// Process format string and va_list, and report all store ranges.
static void scanf_common(void *ctx, const char *format, va_list ap_const) {
va_list aq;
va_copy(aq, ap_const);
const char *p = format;
while (p) {
ScanfDirective dir;
p = scanf_parse_next(p, &dir);
if (!p)
break;
if (dir.convSpecifier == 0) {
// This can only happen at the end of the format string.
CHECK_EQ(*p, 0);
break;
}
// Here the directive is valid. Do what it says.
if (dir.argIdx != -1) {
// Unsupported.
break;
}
if (dir.suppressed)
continue;
if (dir.allocate) {
// Unsupported;
continue;
}
int size = scanf_get_store_size(&dir);
if (size == SSS_INVALID)
break;
void *p = va_arg(aq, void *);
if (size == SSS_STRLEN) {
size = internal_strlen((const char *)p) + 1;
}
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, size);
}
}

View File

@ -31,14 +31,17 @@ static void testScanf2(void *ctx, const char *format, ...) {
va_end(ap);
}
static const char scanf_buf[] = "Test string.";
static size_t scanf_buf_size = sizeof(scanf_buf);
static void testScanf(const char *format, unsigned n, ...) {
std::vector<unsigned> scanf_sizes;
// 16 args should be enough.
testScanf2((void *)&scanf_sizes, format,
(void*)0, (void*)0, (void*)0, (void*)0,
(void*)0, (void*)0, (void*)0, (void*)0,
(void*)0, (void*)0, (void*)0, (void*)0,
(void*)0, (void*)0, (void*)0, (void*)0);
scanf_buf, scanf_buf, scanf_buf, scanf_buf,
scanf_buf, scanf_buf, scanf_buf, scanf_buf,
scanf_buf, scanf_buf, scanf_buf, scanf_buf,
scanf_buf, scanf_buf, scanf_buf, scanf_buf);
ASSERT_EQ(n, scanf_sizes.size()) <<
"Unexpected number of format arguments: '" << format << "'";
va_list ap;
@ -86,11 +89,24 @@ TEST(SanitizerCommonInterceptors, Scanf) {
testScanf("%*d", 0);
testScanf("%4d%8f%c", 3, I, F, C);
testScanf("%s%d", 2, 1, I);
testScanf("%[abc]", 1, 1);
testScanf("%s%d", 2, scanf_buf_size, I);
testScanf("%[abc]", 1, scanf_buf_size);
testScanf("%4[bcdef]", 1, 5);
testScanf("%[]]", 1, 1);
testScanf("%[]]", 1, scanf_buf_size);
testScanf("%8[^]%d0-9-]%c", 2, 9, C);
testScanf("%*[^:]%n:%d:%1[ ]%n", 4, I, I, 2, I);
testScanf("%*d%u", 1, I);
// Unsupported stuff.
testScanf("%a[", 0);
testScanf("%as", 0);
testScanf("%aS", 0);
testScanf("%a13S", 0);
testScanf("%alS", 0);
testScanf("%5$d", 0);
testScanf("%md", 0);
testScanf("%m10s", 0);
}