From 4bda28e7651c9d5b12b296b6cde6de486d98e6d2 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 6 Apr 2009 21:50:39 +0000 Subject: [PATCH] add a new Blob encoding abbreviation for bitcode files that emits elements in a form that is efficient for the reader to just get a pointer in memory and start reading. APIs to do efficient reading and writing are still todo. llvm-svn: 68465 --- llvm/docs/BitCodeFormat.html | 7 +++++ llvm/include/llvm/Bitcode/BitCodes.h | 4 ++- llvm/include/llvm/Bitcode/BitstreamReader.h | 29 ++++++++++++++++++--- llvm/include/llvm/Bitcode/BitstreamWriter.h | 26 ++++++++++++++---- 4 files changed, 56 insertions(+), 10 deletions(-) diff --git a/llvm/docs/BitCodeFormat.html b/llvm/docs/BitCodeFormat.html index 8a53f27abe18..54b9361c3798 100644 --- a/llvm/docs/BitCodeFormat.html +++ b/llvm/docs/BitCodeFormat.html @@ -478,6 +478,13 @@ emitted as their code, followed by the extra data.
  • Char6: This field should be emitted as a char6-encoded value. This operand type takes no extra data.
  • +
  • Blob: This field is emitted as a vbr6, followed by padding to a + 32-bit boundary (for alignment) and an array of 8-bit objects. The array of + bytes is further followed by tail padding to ensure that its total length is + a multiple of 4 bytes. This makes it very efficient for the reader to + decode the data without having to make a copy of it: it can use a pointer to + the data in the mapped in file and poke directly at it. A blob may only + occur as the last operand of an abbreviation.
  • diff --git a/llvm/include/llvm/Bitcode/BitCodes.h b/llvm/include/llvm/Bitcode/BitCodes.h index 277ae9e1d41c..2be9e530f26e 100644 --- a/llvm/include/llvm/Bitcode/BitCodes.h +++ b/llvm/include/llvm/Bitcode/BitCodes.h @@ -88,7 +88,8 @@ public: Fixed = 1, // A fixed width field, Val specifies number of bits. VBR = 2, // A VBR field where Val specifies the width of each chunk. Array = 3, // A sequence of fields, next field species elt encoding. - Char6 = 4 // A 6-bit fixed field which maps to [a-zA-Z0-9._]. + Char6 = 4, // A 6-bit fixed field which maps to [a-zA-Z0-9._]. + Blob = 5 // 8-bit aligned array of 8-bit characters. }; explicit BitCodeAbbrevOp(uint64_t V) : Val(V), IsLiteral(true) {} @@ -117,6 +118,7 @@ public: return true; case Array: case Char6: + case Blob: return false; } } diff --git a/llvm/include/llvm/Bitcode/BitstreamReader.h b/llvm/include/llvm/Bitcode/BitstreamReader.h index da78731f2d6c..281f8a412beb 100644 --- a/llvm/include/llvm/Bitcode/BitstreamReader.h +++ b/llvm/include/llvm/Bitcode/BitstreamReader.h @@ -149,7 +149,7 @@ public: } // If we run out of data, stop at the end of the stream. - if (LastChar == NextChar) { + if (NextChar == LastChar) { CurWord = 0; BitsInCurWord = 0; return 0; @@ -380,9 +380,7 @@ public: const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) { ReadAbbreviatedLiteral(Op, Vals); - } else if (Op.getEncoding() != BitCodeAbbrevOp::Array) { - ReadAbbreviatedField(Op, Vals); - } else { + } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { // Array case. Read the number of elements as a vbr6. unsigned NumElts = ReadVBR(6); @@ -393,6 +391,29 @@ public: // Read all the elements. for (; NumElts; --NumElts) ReadAbbreviatedField(EltEnc, Vals); + } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { + // Blob case. Read the number of bytes as a vbr6. + unsigned NumElts = ReadVBR(6); + SkipToWord(); // 32-bit alignment + + // Figure out where the end of this blob will be including tail padding. + const unsigned char *NewEnd = NextChar+((NumElts+3)&~3); + + // If this would read off the end of the bitcode file, just set the + // record to empty and return. + if (NewEnd > LastChar) { + Vals.append(NumElts, 0); + NextChar = LastChar; + break; + } + + // Otherwise, read the number of bytes. + for (; NumElts; ++NextChar, --NumElts) + Vals.push_back(*NextChar); + // Skip over tail padding. + NextChar = NewEnd; + } else { + ReadAbbreviatedField(Op, Vals); } } diff --git a/llvm/include/llvm/Bitcode/BitstreamWriter.h b/llvm/include/llvm/Bitcode/BitstreamWriter.h index 49fe4076c2e6..66384f8b31c5 100644 --- a/llvm/include/llvm/Bitcode/BitstreamWriter.h +++ b/llvm/include/llvm/Bitcode/BitstreamWriter.h @@ -319,11 +319,7 @@ public: assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); EmitAbbreviatedLiteral(Op, Vals[RecordIdx]); ++RecordIdx; - } else if (Op.getEncoding() != BitCodeAbbrevOp::Array) { - assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); - EmitAbbreviatedField(Op, Vals[RecordIdx]); - ++RecordIdx; - } else { + } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { // Array case. assert(i+2 == e && "array op not second to last?"); const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); @@ -334,6 +330,26 @@ public: // Emit each field. for (; RecordIdx != Vals.size(); ++RecordIdx) EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); + } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { + // Emit a vbr6 to indicate the number of elements present. + EmitVBR(static_cast(Vals.size()-RecordIdx), 6); + // Flush to a 32-bit alignment boundary. + FlushToWord(); + assert((Out.size() & 3) == 0 && "Not 32-bit aligned"); + + // Emit each field as a literal byte. + for (; RecordIdx != Vals.size(); ++RecordIdx) { + assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob"); + Out.push_back((unsigned char)Vals[RecordIdx]); + } + // Align end to 32-bits. + while (Out.size() & 3) + Out.push_back(0); + + } else { // Single scalar field. + assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); + EmitAbbreviatedField(Op, Vals[RecordIdx]); + ++RecordIdx; } } assert(RecordIdx == Vals.size() && "Not all record operands emitted!");