[FIRParser] Parse RawString ExtModule Parameters (#1340)

2021-06-30 09:24:37 -07:00 · 2021-06-30 09:24:37 -07:00 · e5dd5eede6
parent f5febe3f5f
commit e5dd5eede6
6 changed files with 85 additions and 13 deletions
--- a/docs/RationaleFIRRTL.md
+++ b/docs/RationaleFIRRTL.md
@ -46,10 +46,9 @@ support is planned, but not implemented.

 There are some exceptions to the above:

-1) We don't support the `'raw string'` syntax for strings.
-2) We don't support the `Fixed` types for fixed point numbers, and some
+1) We don't support the `Fixed` types for fixed point numbers, and some
   primitives associated with them.
-3) We don't support `Interval` types
+2) We don't support `Interval` types

 Some of these may be research efforts that didn't gain broad adoption, in which
 case we don't want to support them.  However, if there is a good reason and a
--- a/lib/Dialect/FIRRTL/Import/FIRLexer.cpp
+++ b/lib/Dialect/FIRRTL/Import/FIRLexer.cpp
@ -66,7 +66,7 @@ std::string FIRToken::getStringValue(StringRef spelling) {

  std::string result;
  result.reserve(bytes.size());
-  for (unsigned i = 0, e = bytes.size(); i != e;) {
+  for (size_t i = 0, e = bytes.size(); i != e;) {
    auto c = bytes[i++];
    if (c != '\\') {
      result.push_back(c);
@ -76,17 +76,27 @@ std::string FIRToken::getStringValue(StringRef spelling) {
    assert(i + 1 <= e && "invalid string should be caught by lexer");
    auto c1 = bytes[i++];
    switch (c1) {
-    case '"':
    case '\\':
+    case '"':
+    case '\'':
      result.push_back(c1);
      continue;
+    case 'b':
+      result.push_back('\b');
+      continue;
    case 'n':
      result.push_back('\n');
      continue;
    case 't':
      result.push_back('\t');
      continue;
-      // TODO: Handle the rest of the escapes.
+    case 'f':
+      result.push_back('\f');
+      continue;
+    case 'r':
+      result.push_back('\r');
+      continue;
+      // TODO: Handle the rest of the escapes (octal and unicode).
    default:
      break;
    }
@ -101,6 +111,38 @@ std::string FIRToken::getStringValue(StringRef spelling) {
  return result;
 }

+/// Given a token containing a raw string, return its value, including removing
+/// the quote characters and unescaping the quotes of the string. The lexer has
+/// already verified that this token is valid.
+std::string FIRToken::getRawStringValue() const {
+  assert(getKind() == raw_string);
+  return getRawStringValue(getSpelling());
+}
+
+std::string FIRToken::getRawStringValue(StringRef spelling) {
+  // Start by dropping the quotes.
+  StringRef bytes = spelling.drop_front().drop_back();
+
+  std::string result;
+  result.reserve(bytes.size());
+  for (size_t i = 0, e = bytes.size(); i != e;) {
+    auto c = bytes[i++];
+    if (c != '\\') {
+      result.push_back(c);
+      continue;
+    }
+
+    assert(i + 1 <= e && "invalid string should be caught by lexer");
+    auto c1 = bytes[i++];
+    if (c1 != '\'') {
+      result.push_back(c);
+    }
+    result.push_back(c1);
+  }
+
+  return result;
+}
+
 //===----------------------------------------------------------------------===//
 // FIRLexer
 //===----------------------------------------------------------------------===//
@ -243,7 +285,9 @@ FIRToken FIRLexer::lexTokenImpl() {
      continue;

    case '"':
-      return lexString(tokStart);
+      return lexString(tokStart, /*isRaw=*/false);
+    case '\'':
+      return lexString(tokStart, /*isRaw=*/true);

    case '+':
    case '-':
@ -390,16 +434,23 @@ void FIRLexer::skipComment() {
 }

 /// StringLit      ::= '"' UnquotedString? '"'
+/// RawString      ::= '\'' UnquotedString? '\''
 /// UnquotedString ::= ( '\\\'' | '\\"' | ~[\r\n] )+?
 ///
-FIRToken FIRLexer::lexString(const char *tokStart) {
+FIRToken FIRLexer::lexString(const char *tokStart, bool isRaw) {
  while (1) {
    switch (*curPtr++) {
    case '"': // This is the end of the string literal.
+      if (isRaw)
+        break;
      return formToken(FIRToken::string, tokStart);
+    case '\'': // This is the end of the raw string.
+      if (!isRaw)
+        break;
+      return formToken(FIRToken::raw_string, tokStart);
    case '\\':
-      // Ignore escaped '"'
-      if (*curPtr == '"')
+      // Ignore escaped '\'' or '"'
+      if (*curPtr == '\'' || *curPtr == '"')
        ++curPtr;
      break;
    case 0:
@ -409,6 +460,7 @@ FIRToken FIRLexer::lexString(const char *tokStart) {
        break;
      LLVM_FALLTHROUGH;
    case '\n': // Vertical whitespace isn't allowed in a string.
+    case '\r':
    case '\v':
    case '\f':
      return emitError(tokStart, "unterminated string");
--- a/lib/Dialect/FIRRTL/Import/FIRLexer.h
+++ b/lib/Dialect/FIRRTL/Import/FIRLexer.h
@ -74,6 +74,12 @@ public:
  std::string getStringValue() const;
  static std::string getStringValue(StringRef spelling);

+  /// Given a token containing a raw string, return its value, including removing
+  /// the quote characters and unescaping the quotes of the string. The lexer has
+  /// already verified that this token is valid.
+  std::string getRawStringValue() const;
+  static std::string getRawStringValue(StringRef spelling);
+
  // Location processing.
  llvm::SMLoc getLoc() const;
  llvm::SMLoc getEndLoc() const;
@ -128,7 +134,7 @@ private:
  FIRToken lexNumber(const char *tokStart);
  FIRToken lexFloatingPoint(const char *tokStart);
  void skipComment();
-  FIRToken lexString(const char *tokStart);
+  FIRToken lexString(const char *tokStart, bool isRaw);

  const llvm::SourceMgr &sourceMgr;
  mlir::MLIRContext *const context;
--- a/lib/Dialect/FIRRTL/Import/FIRParser.cpp
+++ b/lib/Dialect/FIRRTL/Import/FIRParser.cpp
@ -3295,11 +3295,17 @@ ParseResult FIRCircuitParser::parseModule(CircuitOp circuit,
      break;
    }
    case FIRToken::string: {
-      // Drop the quotes and unescape.
+      // Drop the double quotes and unescape.
      value = builder.getStringAttr(getToken().getStringValue());
      consumeToken(FIRToken::string);
      break;
    }
+    case FIRToken::raw_string: {
+      // Drop the single quotes and unescape the ones inside.
+      value = builder.getStringAttr(getToken().getRawStringValue());
+      consumeToken(FIRToken::raw_string);
+      break;
+    }

    case FIRToken::floatingpoint:
      double v;
--- a/lib/Dialect/FIRRTL/Import/FIRTokenKinds.def
+++ b/lib/Dialect/FIRRTL/Import/FIRTokenKinds.def
@ -52,7 +52,7 @@ TOK_LITERAL(integer)        // 42
 TOK_LITERAL(signed_integer) // -42 and +42
 TOK_LITERAL(floatingpoint)  // 42.0
 TOK_LITERAL(string)         // "foo"
-// TOK_LITERAL(raw_string)    // 'foo'
+TOK_LITERAL(raw_string)     // 'foo'

 TOK_LITERAL(fileinfo)
 TOK_LITERAL(inlineannotation) // %[{"foo":"bar"}]
--- a/test/Dialect/FIRRTL/parse-basic.fir
+++ b/test/Dialect/FIRRTL/parse-basic.fir
@ -857,3 +857,12 @@ circuit MyModule :     ; CHECK: firrtl.circuit "MyModule" {
    output c: UInt<8>
    c <= mux(sel, a, b)

+  ; CHECK-LABEL: firrtl.extmodule @RawStringParam
+  ; CHECK: parameters = {FORMAT = "xyz_timeout=%d\\n",
+  ; CHECK:               MIXED_QUOTES = "\22'\\\22",
+  ; CHECK:               TYPE = "bit"}}
+  extmodule RawStringParam :
+    parameter TYPE = 'bit'
+    parameter FORMAT = 'xyz_timeout=%d\n'
+    parameter MIXED_QUOTES = '"\'\"'
+