Bug #:

Submitted by: Reviewed by: Misc. cleanup/polish of NumericLiteralParser and it's two clients, the C preprocessor and AST builder... llvm-svn: 39353
2007-03-13 20:29:44 +00:00 · 2007-03-13 20:29:44 +00:00 · f2fb89e759
parent 451d8f1626
commit f2fb89e759
6 changed files with 73 additions and 55 deletions
--- a/clang/AST/SemaExpr.cpp
+++ b/clang/AST/SemaExpr.cpp
@ -17,6 +17,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
@ -287,6 +288,12 @@ Sema::ExprResult Sema::ParseSimplePrimaryExpr(SourceLocation Loc,
 }

 Action::ExprResult Sema::ParseNumericConstant(const LexerToken &Tok) {
+  // fast path for a single digit (which is quite common). A single digit 
+  // cannot have a trigraph, escaped newline, radix prefix, or type suffix.
+  if (Tok.getLength() == 1) {
+    const char *t = PP.getSourceManager().getCharacterData(Tok.getLocation());
+    return ExprResult(new IntegerLiteral(*t-'0', Context.IntTy));
+  }
  SmallString<512> IntegerBuffer;
  IntegerBuffer.resize(Tok.getLength());
  const char *ThisTokBegin = &IntegerBuffer[0];
@ -299,15 +306,11 @@ Action::ExprResult Sema::ParseNumericConstant(const LexerToken &Tok) {
  //   a pointer to a *constant* buffer (avoiding a copy). 
  
  unsigned ActualLength = PP.getSpelling(Tok, ThisTokBegin);
-
-  // This is an optimization for single digits (which are very common).
-  if (ActualLength == 1)
-    return ExprResult(new IntegerLiteral(atoi(ThisTokBegin)));
-    
  NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, 
                               Tok.getLocation(), PP);
-  Expr *literal_expr = 0;
-    
+  if (Literal.hadError)
+    return ExprResult(true);
+
  if (Literal.isIntegerLiteral()) {
    TypeRef t;
    if (Literal.hasSuffix()) {
@ -322,12 +325,12 @@ Action::ExprResult Sema::ParseNumericConstant(const LexerToken &Tok) {
    }
    uintmax_t val;
    if (Literal.GetIntegerValue(val)) {
-      literal_expr = new IntegerLiteral(val, t);
+      return new IntegerLiteral(val, t);
    } 
  } else if (Literal.isFloatingLiteral()) {
    // TODO: add floating point processing...
  }
-  return literal_expr ? ExprResult(literal_expr) : ExprResult(true);
+  return ExprResult(true);
 }

 Action::ExprResult Sema::ParseParenExpr(SourceLocation L, SourceLocation R,
--- a/clang/Lex/LiteralSupport.cpp
+++ b/clang/Lex/LiteralSupport.cpp
@ -217,8 +217,8 @@ NumericLiteralParser(const char *begin, const char *end,
 }

 bool NumericLiteralParser::GetIntegerValue(uintmax_t &val) {
-  uintmax_t cutoff = UINTMAX_MAX / radix;
-  int cutlim = UINTMAX_MAX % radix;
+  uintmax_t max_value = UINTMAX_MAX / radix;
+  int max_digit = UINTMAX_MAX % radix;
  char c;
  
  val = 0;
@ -227,12 +227,12 @@ bool NumericLiteralParser::GetIntegerValue(uintmax_t &val) {
    c = *s++;
    if (c >= '0' && c <= '9')
      c -= '0';
-    else if (c >= 'A' && c <= 'F') // 10...15
-      c -= 'A' - 10;
-    else if (c >= 'a' && c <= 'f') // 10...15
+    else if (c >= 'A' && c <= 'F') 
+      c -= 'A' - 10; 
+    else if (c >= 'a' && c <= 'f') 
      c -= 'a' - 10;
    
-    if (val > cutoff || (val == cutoff && c > cutlim)) {
+    if (val > max_value || (val == max_value && c > max_digit)) {
      return false; // Overflow!
    } else {
      val *= radix;
@ -243,8 +243,8 @@ bool NumericLiteralParser::GetIntegerValue(uintmax_t &val) {
 }

 bool NumericLiteralParser::GetIntegerValue(int &val) {
-  intmax_t cutoff = INT_MAX / radix;
-  int cutlim = INT_MAX % radix;
+  intmax_t max_value = INT_MAX / radix;
+  int max_digit = INT_MAX % radix;
  char c;
  
  val = 0;
@ -253,12 +253,12 @@ bool NumericLiteralParser::GetIntegerValue(int &val) {
    c = *s++;
    if (c >= '0' && c <= '9')
      c -= '0';
-    else if (c >= 'A' && c <= 'F') // 10...15
+    else if (c >= 'A' && c <= 'F') 
      c -= 'A' - 10;
-    else if (c >= 'a' && c <= 'f') // 10...15
+    else if (c >= 'a' && c <= 'f')
      c -= 'a' - 10;
    
-    if (val > cutoff || (val == cutoff && c > cutlim)) {
+    if (val > max_value || (val == max_value && c > max_digit)) {
      return false; // Overflow!
    } else {
      val *= radix;
@ -267,3 +267,9 @@ bool NumericLiteralParser::GetIntegerValue(int &val) {
  }
  return true;
 }
+
+void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID, 
+          const std::string &M) {
+  PP.Diag(Loc, DiagID, M);
+  hadError = true;
+}
--- a/clang/Lex/PPExpressions.cpp
+++ b/clang/Lex/PPExpressions.cpp
@ -145,20 +145,33 @@ static bool EvaluateValue(int &Result, LexerToken &PeekTok, DefinedTracker &DT,
    PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr);
    return true;
  case tok::numeric_constant: {
-    // FIXME: track signs. ?? snaroff: talk to Chris...
    SmallString<512> IntegerBuffer;
    IntegerBuffer.resize(PeekTok.getLength());
    const char *ThisTokBegin = &IntegerBuffer[0];
    unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin);
    NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, 
                                 PeekTok.getLocation(), PP);
-    if (Literal.isIntegerLiteral()) {
-      Literal.GetIntegerValue(Result);
-    } else if (Literal.isFloatingLiteral()) {
+    if (Literal.hadError) 
+      return true; // a diagnostic was already reported.
+    else if (Literal.isIntegerLiteral()) {
+      if (!Literal.GetIntegerValue(Result)) {
+        // FIXME: C99 (6.10.1) dictates that all preprocessor arithmetic be
+        // performed using the largest integer type found on the target 
+        // computer, which is intmax_t (the default) or uintmax_t (if the 
+        // literal contains an unsigned suffix) defined in stdint.h.
+        // Since "Result" is typed as "int", the maximum legal integer 
+        // literal is currently INT32_MAX (or 2147483647). If the literal
+        // value is larger, we will overflow and trigger this assert.
+        assert(0 && "Integer Overflow in preprocessor expression"); 
+        return true;
+      }
+      PP.LexNonComment(PeekTok);
+      return false;
+    } else {
+      assert(Literal.isFloatingLiteral() && "Unknown ppnumber");
      PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal);
+      return true;
    }
-    PP.LexNonComment(PeekTok);
-    return false;
  }
  case tok::l_paren:
    PP.LexNonComment(PeekTok);  // Eat the (.
--- a/clang/Sema/SemaExpr.cpp
+++ b/clang/Sema/SemaExpr.cpp
@ -17,6 +17,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
@ -287,6 +288,12 @@ Sema::ExprResult Sema::ParseSimplePrimaryExpr(SourceLocation Loc,
 }

 Action::ExprResult Sema::ParseNumericConstant(const LexerToken &Tok) {
+  // fast path for a single digit (which is quite common). A single digit 
+  // cannot have a trigraph, escaped newline, radix prefix, or type suffix.
+  if (Tok.getLength() == 1) {
+    const char *t = PP.getSourceManager().getCharacterData(Tok.getLocation());
+    return ExprResult(new IntegerLiteral(*t-'0', Context.IntTy));
+  }
  SmallString<512> IntegerBuffer;
  IntegerBuffer.resize(Tok.getLength());
  const char *ThisTokBegin = &IntegerBuffer[0];
@ -299,15 +306,11 @@ Action::ExprResult Sema::ParseNumericConstant(const LexerToken &Tok) {
  //   a pointer to a *constant* buffer (avoiding a copy). 
  
  unsigned ActualLength = PP.getSpelling(Tok, ThisTokBegin);
-
-  // This is an optimization for single digits (which are very common).
-  if (ActualLength == 1)
-    return ExprResult(new IntegerLiteral(atoi(ThisTokBegin)));
-    
  NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, 
                               Tok.getLocation(), PP);
-  Expr *literal_expr = 0;
-    
+  if (Literal.hadError)
+    return ExprResult(true);
+
  if (Literal.isIntegerLiteral()) {
    TypeRef t;
    if (Literal.hasSuffix()) {
@ -322,12 +325,12 @@ Action::ExprResult Sema::ParseNumericConstant(const LexerToken &Tok) {
    }
    uintmax_t val;
    if (Literal.GetIntegerValue(val)) {
-      literal_expr = new IntegerLiteral(val, t);
+      return new IntegerLiteral(val, t);
    } 
  } else if (Literal.isFloatingLiteral()) {
    // TODO: add floating point processing...
  }
-  return literal_expr ? ExprResult(literal_expr) : ExprResult(true);
+  return ExprResult(true);
 }

 Action::ExprResult Sema::ParseParenExpr(SourceLocation L, SourceLocation R,
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@ -71,10 +71,6 @@ public:
  IntegerLiteral() : Expr(IntegerLiteralClass),
     Type(0), Value(0) {
  }
-  // constructor for the single digit case
-  IntegerLiteral(intmax_t value) : Expr(IntegerLiteralClass), 
-     Type(0), Value(value) {
-  }
  IntegerLiteral(intmax_t value, TypeRef type)
    : Expr(IntegerLiteralClass), Type(type), Value(value) {
 #if 0
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@ -14,19 +14,17 @@
 #ifndef LLVM_CLANG_LITERALSUPPORT_H
 #define LLVM_CLANG_LITERALSUPPORT_H

-#include "clang/Lex/Preprocessor.h"
+#include <string>

 namespace llvm {
 namespace clang {

 class Diagnostic;
 class Preprocessor;
+class SourceLocation;
 class TargetInfo;
    
-struct NumericLiteralParser {
-  NumericLiteralParser(const char *begin, const char *end,
-                       SourceLocation Loc, Preprocessor &PP);
-private:
+class NumericLiteralParser {
  Preprocessor &PP; // needed for diagnostics
  
  const char *const ThisTokBegin;
@ -40,35 +38,34 @@ private:
  bool saw_float_suffix;
  
 public:
+  NumericLiteralParser(const char *begin, const char *end,
+                       SourceLocation Loc, Preprocessor &PP);
  bool hadError;
  bool isUnsigned;
  bool isLong;
  bool isLongLong;
  
-  bool isIntegerLiteral() { 
-    return !saw_period && !saw_exponent && !hadError ? true : false;
+  bool isIntegerLiteral() const { 
+    return !saw_period && !saw_exponent ? true : false;
  }
-  bool isFloatingLiteral() {
-    return saw_period || saw_exponent && !hadError ? true : false;
+  bool isFloatingLiteral() const {
+    return saw_period || saw_exponent ? true : false;
  }
-  bool hasSuffix() {
+  bool hasSuffix() const {
    return SuffixBegin != ThisTokEnd;
  }
  /// getIntegerValue - Convert the string into a number. At this point, we 
  /// know the digit characters are valid (0...9, a...f, A...F). We don't know
  /// how many bits are needed to store the number. Sizing of the integer
  /// type (int, unsigned, long, unsigned long, long long, unsigned long long) 
-  /// will be done elsewhere - the computation is target dependent. We return 
-  /// true if the value fit into uintmax_t, false otherwise. 
+  /// will be done elsewhere - the size computation is target dependent. We  
+  /// return true if the value fit into "val", false otherwise. 
  bool GetIntegerValue(uintmax_t &val);
  bool GetIntegerValue(int &val);

 private:  
  void Diag(SourceLocation Loc, unsigned DiagID, 
-            const std::string &M = std::string()) {
-    PP.Diag(Loc, DiagID, M);
-    hadError = true;
-  }
+            const std::string &M = std::string());
  
  /// SkipHexDigits - Read and skip over any hex digits, up to End.
  /// Return a pointer to the first non-hex digit or End.