Adds a JSON parser and a benchmark (json-bench) to catch performance regressions.

llvm-svn: 146735
This commit is contained in:
Manuel Klimek 2011-12-16 13:09:10 +00:00
parent fd1f79871a
commit 2c899a181c
12 changed files with 996 additions and 2 deletions

View File

@ -353,6 +353,7 @@ add_subdirectory(utils/FileUpdate)
add_subdirectory(utils/count)
add_subdirectory(utils/not)
add_subdirectory(utils/llvm-lit)
add_subdirectory(utils/json-bench)
add_subdirectory(projects)

View File

@ -0,0 +1,444 @@
//===--- JsonParser.h - Simple JSON parser ----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a JSON parser.
//
// See http://www.json.org/ for an overview.
// See http://www.ietf.org/rfc/rfc4627.txt for the full standard.
//
// FIXME: Currently this supports a subset of JSON. Specifically, support
// for numbers, booleans and null for values is missing.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_JSON_PARSER_H
#define LLVM_CLANG_TOOLING_JSON_PARSER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorHandling.h"
#include <string>
namespace llvm {
class JSONString;
class JSONValue;
class JSONKeyValuePair;
/// \brief Base class for a parsable JSON atom.
///
/// This class has no semantics other than being a unit of JSON data which can
/// be parsed out of a JSON document.
class JSONAtom {
public:
/// \brief Possible types of JSON objects.
enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String };
/// \brief Returns the type of this value.
Kind getKind() const { return MyKind; }
static bool classof(const JSONAtom *Atom) { return true; }
protected:
JSONAtom(Kind MyKind) : MyKind(MyKind) {}
private:
/// \brief Parses to the end of the object and returns whether parsing
/// was successful.
bool skip() const;
Kind MyKind;
friend class JSONParser;
friend class JSONKeyValuePair;
template <typename, char, char, JSONAtom::Kind> friend class JSONContainer;
};
/// \brief A parser for JSON text.
///
/// Use an object of JSONParser to iterate over the values of a JSON text.
/// All objects are parsed during the iteration, so you can only iterate once
/// over the JSON text, but the cost of partial iteration is minimized.
/// Create a new JSONParser if you want to iterate multiple times.
class JSONParser {
public:
/// \brief Create a JSONParser for the given input.
///
/// Parsing is started via parseRoot(). Access to the object returned from
/// parseRoot() will parse the input lazily.
JSONParser(StringRef Input);
/// \brief Returns the outermost JSON value (either an array or an object).
///
/// Can return NULL if the input does not start with an array or an object.
/// The object is not parsed yet - the caller must either iterate over the
/// returned object or call 'skip' to trigger parsing.
///
/// A JSONValue can be either a JSONString, JSONObject or JSONArray.
JSONValue *parseRoot();
/// \brief Parses the JSON text and returns whether it is valid JSON.
///
/// In case validate() return false, failed() will return true and
/// getErrorMessage() will return the parsing error.
bool validate();
/// \brief Returns true if an error occurs during parsing.
///
/// If there was an error while parsing an object that was created by
/// iterating over the result of 'parseRoot', 'failed' will return true.
bool failed() const;
/// \brief Returns an error message when 'failed' returns true.
std::string getErrorMessage() const;
private:
/// \brief These methods manage the implementation details of parsing new JSON
/// atoms.
/// @{
JSONString *parseString();
JSONValue *parseValue();
JSONKeyValuePair *parseKeyValuePair();
/// @}
/// \brief Templated helpers to parse the elements out of both forms of JSON
/// containers.
/// @{
template <typename AtomT> AtomT *parseElement();
template <typename AtomT, char StartChar, char EndChar>
StringRef::iterator parseFirstElement(const AtomT *&Element);
template <typename AtomT, char EndChar>
StringRef::iterator parseNextElement(const AtomT *&Element);
/// @}
/// \brief Whitespace parsing.
/// @{
void nextNonWhitespace();
bool isWhitespace();
/// @}
/// \brief These methods are used for error handling.
/// {
void setExpectedError(StringRef Expected, StringRef Found);
void setExpectedError(StringRef Expected, char Found);
bool errorIfAtEndOfFile(StringRef Message);
bool errorIfNotAt(char C, StringRef Message);
/// }
/// All nodes are allocated by the parser and will be deallocated when the
/// parser is destroyed.
BumpPtrAllocator ValueAllocator;
/// \brief The original input to the parser.
const StringRef Input;
/// \brief The current position in the parse stream.
StringRef::iterator Position;
/// \brief If non-empty, an error has occurred.
std::string ErrorMessage;
template <typename AtomT, char StartChar, char EndChar,
JSONAtom::Kind ContainerKind>
friend class JSONContainer;
};
/// \brief Base class for JSON value objects.
///
/// This object represents an abstract JSON value. It is the root node behind
/// the group of JSON entities that can represent top-level values in a JSON
/// document. It has no API, and is just a placeholder in the type hierarchy of
/// nodes.
class JSONValue : public JSONAtom {
protected:
JSONValue(Kind MyKind) : JSONAtom(MyKind) {}
public:
/// \brief dyn_cast helpers
///@{
static bool classof(const JSONAtom *Atom) {
switch (Atom->getKind()) {
case JK_Array:
case JK_Object:
case JK_String:
return true;
case JK_KeyValuePair:
return false;
};
llvm_unreachable("Invalid JSONAtom kind");
}
static bool classof(const JSONValue *Value) { return true; }
///@}
};
/// \brief Gives access to the text of a JSON string.
///
/// FIXME: Implement a method to return the unescaped text.
class JSONString : public JSONValue {
public:
/// \brief Returns the underlying parsed text of the string.
///
/// This is the unescaped content of the JSON text.
/// See http://www.ietf.org/rfc/rfc4627.txt for details.
StringRef getRawText() const { return RawText; };
private:
JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {}
/// \brief Skips to the next position in the parse stream.
bool skip() const { return true; };
StringRef RawText;
friend class JSONAtom;
friend class JSONParser;
public:
/// \brief dyn_cast helpers
///@{
static bool classof(const JSONAtom *Atom) {
return Atom->getKind() == JK_String;
}
static bool classof(const JSONString *String) { return true; }
///@}
};
/// \brief A (key, value) tuple of type (JSONString *, JSONValue *).
///
/// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom.
/// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray.
/// They are not viable as top-level values either.
class JSONKeyValuePair : public JSONAtom {
public:
const JSONString * const Key;
const JSONValue * const Value;
private:
JSONKeyValuePair(const JSONString *Key, const JSONValue *Value)
: JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {}
/// \brief Skips to the next position in the parse stream.
bool skip() const { return Value->skip(); };
friend class JSONAtom;
friend class JSONParser;
template <typename, char, char, JSONAtom::Kind> friend class JSONContainer;
public:
/// \brief dyn_cast helpers
///@{
static bool classof(const JSONAtom *Atom) {
return Atom->getKind() == JK_KeyValuePair;
}
static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; }
///@}
};
/// \brief Implementation of JSON containers (arrays and objects).
///
/// JSONContainers drive the lazy parsing of JSON arrays and objects via
/// forward iterators. Call 'skip' to validate parsing of all elements of the
/// container and to position the parse stream behind the container.
template <typename AtomT, char StartChar, char EndChar,
JSONAtom::Kind ContainerKind>
class JSONContainer : public JSONValue {
public:
/// \brief An iterator that parses the underlying container during iteration.
///
/// Iterators on the same collection use shared state, so when multiple copies
/// of an iterator exist, only one is allowed to be used for iteration;
/// iterating multiple copies of an iterator of the same collection will lead
/// to undefined behavior.
class const_iterator : public std::iterator<std::forward_iterator_tag,
const AtomT*> {
public:
const_iterator(const const_iterator &I) : Container(I.Container) {}
bool operator==(const const_iterator &I) const {
if (isEnd() || I.isEnd())
return isEnd() == I.isEnd();
return Container->Position == I.Container->Position;
}
bool operator!=(const const_iterator &I) const { return !(*this == I); }
const_iterator &operator++() {
Container->parseNextElement();
return *this;
}
const AtomT *operator*() { return Container->Current; }
private:
/// \brief Create an iterator for which 'isEnd' returns true.
const_iterator() : Container(0) {}
/// \brief Create an iterator for the given container.
const_iterator(const JSONContainer *Container) : Container(Container) {}
bool isEnd() const {
return Container == 0 || Container->Position == StringRef::iterator();
}
const JSONContainer * const Container;
friend class JSONContainer;
};
/// \brief Returns a lazy parsing iterator over the container.
///
/// As the iterator drives the parse stream, begin() must only be called
/// once per container.
const_iterator begin() const {
if (Started)
report_fatal_error("Cannot parse container twice.");
Started = true;
// Set up the position and current element when we begin iterating over the
// container.
Position = Parser->parseFirstElement<AtomT, StartChar, EndChar>(Current);
return const_iterator(this);
}
const_iterator end() const {
return const_iterator();
}
private:
JSONContainer(JSONParser *Parser)
: JSONValue(ContainerKind), Parser(Parser),
Position(), Current(0), Started(false) {}
const_iterator current() const {
if (!Started)
return begin();
return const_iterator(this);
}
/// \brief Skips to the next position in the parse stream.
bool skip() const {
for (const_iterator I = current(), E = end(); I != E; ++I) {
assert(*I != 0);
if (!(*I)->skip())
return false;
}
return !Parser->failed();
}
/// \brief Parse the next element in the container into the Current element.
///
/// This routine is called as an iterator into this container walks through
/// its elements. It mutates the container's internal current node to point to
/// the next atom of the container.
void parseNextElement() const {
Current->skip();
Position = Parser->parseNextElement<AtomT, EndChar>(Current);
}
// For parsing, JSONContainers call back into the JSONParser.
JSONParser * const Parser;
// 'Position', 'Current' and 'Started' store the state of the parse stream
// for iterators on the container, they don't change the container's elements
// and are thus marked as mutable.
mutable StringRef::iterator Position;
mutable const AtomT *Current;
mutable bool Started;
friend class JSONAtom;
friend class JSONParser;
friend class const_iterator;
public:
/// \brief dyn_cast helpers
///@{
static bool classof(const JSONAtom *Atom) {
return Atom->getKind() == ContainerKind;
}
static bool classof(const JSONContainer *Container) { return true; }
///@}
};
/// \brief A simple JSON array.
typedef JSONContainer<JSONValue, '[', ']', JSONAtom::JK_Array> JSONArray;
/// \brief A JSON object: an iterable list of JSON key-value pairs.
typedef JSONContainer<JSONKeyValuePair, '{', '}', JSONAtom::JK_Object>
JSONObject;
/// \brief Template adaptor to dispatch element parsing for values.
template <> JSONValue *JSONParser::parseElement();
/// \brief Template adaptor to dispatch element parsing for key value pairs.
template <> JSONKeyValuePair *JSONParser::parseElement();
/// \brief Parses the first element of a JSON array or object, or closes the
/// array.
///
/// The method assumes that the current position is before the first character
/// of the element, with possible white space in between. When successful, it
/// returns the new position after parsing the element. Otherwise, if there is
/// no next value, it returns a default constructed StringRef::iterator.
template <typename AtomT, char StartChar, char EndChar>
StringRef::iterator JSONParser::parseFirstElement(const AtomT *&Element) {
assert(*Position == StartChar);
Element = 0;
nextNonWhitespace();
if (errorIfAtEndOfFile("value or end of container at start of container"))
return StringRef::iterator();
if (*Position == EndChar)
return StringRef::iterator();
Element = parseElement<AtomT>();
if (Element == 0)
return StringRef::iterator();
return Position;
}
/// \brief Parses the next element of a JSON array or object, or closes the
/// array.
///
/// The method assumes that the current position is before the ',' which
/// separates the next element from the current element. When successful, it
/// returns the new position after parsing the element. Otherwise, if there is
/// no next value, it returns a default constructed StringRef::iterator.
template <typename AtomT, char EndChar>
StringRef::iterator JSONParser::parseNextElement(const AtomT *&Element) {
Element = 0;
nextNonWhitespace();
if (errorIfAtEndOfFile("',' or end of container for next element"))
return 0;
switch (*Position) {
case ',':
nextNonWhitespace();
if (errorIfAtEndOfFile("element in container"))
return StringRef::iterator();
Element = parseElement<AtomT>();
if (Element == 0)
return StringRef::iterator();
return Position;
case EndChar:
return StringRef::iterator();
default:
setExpectedError("',' or end of container for next element", *Position);
return StringRef::iterator();
}
}
} // end namespace llvm
#endif // LLVM_CLANG_TOOLING_JSON_PARSER_H

View File

@ -29,6 +29,7 @@ add_llvm_library(LLVMSupport
IntervalMap.cpp
IsInf.cpp
IsNAN.cpp
JSONParser.cpp
ManagedStatic.cpp
MemoryBuffer.cpp
MemoryObject.cpp

View File

@ -0,0 +1,221 @@
//===--- JsonParser.cpp - Simple JSON parser ------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a JSON parser.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/JSONParser.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Casting.h"
namespace llvm {
JSONParser::JSONParser(StringRef Input)
: Input(Input), Position(Input.begin()) {}
JSONValue *JSONParser::parseRoot() {
if (Position != Input.begin())
report_fatal_error("Cannot resuse JSONParser.");
if (isWhitespace())
nextNonWhitespace();
if (errorIfAtEndOfFile("'[' or '{' at start of JSON text"))
return 0;
switch (*Position) {
case '[':
return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
case '{':
return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
default:
setExpectedError("'[' or '{' at start of JSON text", *Position);
return 0;
}
}
bool JSONParser::validate() {
return parseRoot()->skip();
}
// Sets the current error to:
// "Error while parsing JSON: expected <Expected>, but found <Found>".
void JSONParser::setExpectedError(StringRef Expected, StringRef Found) {
ErrorMessage = ("Error while parsing JSON: expected " +
Expected + ", but found " + Found + ".").str();
}
// Sets the current error to:
// "Error while parsing JSON: expected <Expected>, but found <Found>".
void JSONParser::setExpectedError(StringRef Expected, char Found) {
setExpectedError(Expected, StringRef(&Found, 1));
}
// If there is no character available, returns true and sets the current error
// to: "Error while parsing JSON: expected <Expected>, but found EOF.".
bool JSONParser::errorIfAtEndOfFile(StringRef Expected) {
if (Position == Input.end()) {
setExpectedError(Expected, "EOF");
return true;
}
return false;
}
// Sets the current error if the current character is not C to:
// "Error while parsing JSON: expected 'C', but got <current character>".
bool JSONParser::errorIfNotAt(char C, StringRef Message) {
if (Position == Input.end() || *Position != C) {
std::string Expected =
("'" + StringRef(&C, 1) + "' " + Message).str();
if (Position == Input.end())
setExpectedError(Expected, "EOF");
else
setExpectedError(Expected, *Position);
return true;
}
return false;
}
// Forbidding inlining improves performance by roughly 20%.
// FIXME: Remove once llvm optimizes this to the faster version without hints.
LLVM_ATTRIBUTE_NOINLINE static bool
wasEscaped(StringRef::iterator First, StringRef::iterator Position);
// Returns whether a character at 'Position' was escaped with a leading '\'.
// 'First' specifies the position of the first character in the string.
static bool wasEscaped(StringRef::iterator First,
StringRef::iterator Position) {
assert(Position - 1 >= First);
StringRef::iterator I = Position - 1;
// We calulate the number of consecutive '\'s before the current position
// by iterating backwards through our string.
while (I >= First && *I == '\\') --I;
// (Position - 1 - I) now contains the number of '\'s before the current
// position. If it is odd, the character at 'Positon' was escaped.
return (Position - 1 - I) % 2 == 1;
}
// Parses a JSONString, assuming that the current position is on a quote.
JSONString *JSONParser::parseString() {
assert(Position != Input.end());
assert(!isWhitespace());
if (errorIfNotAt('"', "at start of string"))
return 0;
StringRef::iterator First = Position + 1;
// Benchmarking shows that this loop is the hot path of the application with
// about 2/3rd of the runtime cycles. Since escaped quotes are not the common
// case, and multiple escaped backslashes before escaped quotes are very rare,
// we pessimize this case to achieve a smaller inner loop in the common case.
// We're doing that by having a quick inner loop that just scans for the next
// quote. Once we find the quote we check the last character to see whether
// the quote might have been escaped. If the last character is not a '\', we
// know the quote was not escaped and have thus found the end of the string.
// If the immediately preceding character was a '\', we have to scan backwards
// to see whether the previous character was actually an escaped backslash, or
// an escape character for the quote. If we find that the current quote was
// escaped, we continue parsing for the next quote and repeat.
// This optimization brings around 30% performance improvements.
do {
// Step over the current quote.
++Position;
// Find the next quote.
while (Position != Input.end() && *Position != '"')
++Position;
if (errorIfAtEndOfFile("\" at end of string"))
return 0;
// Repeat until the previous character was not a '\' or was an escaped
// backslash.
} while (*(Position - 1) == '\\' && wasEscaped(First, Position));
return new (ValueAllocator.Allocate<JSONString>())
JSONString(StringRef(First, Position - First));
}
// Advances the position to the next non-whitespace position.
void JSONParser::nextNonWhitespace() {
do {
++Position;
} while (isWhitespace());
}
// Checks if there is a whitespace character at the current position.
bool JSONParser::isWhitespace() {
return Position != Input.end() && (*Position == ' ' || *Position == '\t' ||
*Position == '\n' || *Position == '\r');
}
bool JSONParser::failed() const {
return !ErrorMessage.empty();
}
std::string JSONParser::getErrorMessage() const {
return ErrorMessage;
}
bool JSONAtom::skip() const {
switch (MyKind) {
case JK_Array: return cast<JSONArray>(this)->skip();
case JK_Object: return cast<JSONObject>(this)->skip();
case JK_String: return cast<JSONString>(this)->skip();
case JK_KeyValuePair: return cast<JSONKeyValuePair>(this)->skip();
}
llvm_unreachable("Impossible enum value.");
}
// Parses a JSONValue, assuming that the current position is at the first
// character of the value.
JSONValue *JSONParser::parseValue() {
assert(Position != Input.end());
assert(!isWhitespace());
switch (*Position) {
case '[':
return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
case '{':
return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
case '"':
return parseString();
default:
setExpectedError("'[', '{' or '\"' at start of value", *Position);
return 0;
}
}
// Parses a JSONKeyValuePair, assuming that the current position is at the first
// character of the key, value pair.
JSONKeyValuePair *JSONParser::parseKeyValuePair() {
assert(Position != Input.end());
assert(!isWhitespace());
JSONString *Key = parseString();
if (Key == 0)
return 0;
nextNonWhitespace();
if (errorIfNotAt(':', "between key and value"))
return 0;
nextNonWhitespace();
const JSONValue *Value = parseValue();
if (Value == 0)
return 0;
return new (ValueAllocator.Allocate<JSONKeyValuePair>(1))
JSONKeyValuePair(Key, Value);
}
template <> JSONValue *JSONParser::parseElement() {
return parseValue();
}
template <> JSONKeyValuePair *JSONParser::parseElement() {
return parseKeyValuePair();
}
} // end namespace llvm

View File

@ -77,5 +77,5 @@ add_dependencies(check.deps
BugpointPasses LLVMHello
llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-dwarfdump
llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump macho-dump opt
FileCheck count not)
FileCheck count not json-bench)
set_target_properties(check.deps PROPERTIES FOLDER "Tests")

View File

@ -0,0 +1,5 @@
; RUN: json-bench --verify --info-output-file=- | FileCheck %s
; CHECK: Fast: Parsing
; CHECK: Fast: Loop

View File

@ -133,6 +133,7 @@ add_llvm_unittest(Support
Support/CommandLineTest.cpp
Support/ConstantRangeTest.cpp
Support/EndianTest.cpp
Support/JSONParserTest.cpp
Support/LeakDetectorTest.cpp
Support/MathExtrasTest.cpp
Support/Path.cpp

View File

@ -0,0 +1,218 @@
//===- unittest/Tooling/JSONParserTest ------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Casting.h"
#include "llvm/Support/JSONParser.h"
#include "llvm/ADT/Twine.h"
#include "gtest/gtest.h"
namespace llvm {
// Returns a buffer that contains the content of the given string without
// the trailing zero, in order to get valgrind to catch out-of-bound reads.
static std::vector<char> CutTrailingZero(StringRef String) {
std::vector<char> InputWithoutZero(String.size());
memcpy(&InputWithoutZero[0], String.data(), String.size());
return InputWithoutZero;
}
// Checks that the given input gives a parse error. Makes sure that an error
// text is available and the parse fails.
static void ExpectParseError(StringRef Message,
const std::vector<char> &InputWithoutZero) {
StringRef Input = StringRef(&InputWithoutZero[0], InputWithoutZero.size());
JSONParser Parser(Input);
EXPECT_FALSE(Parser.validate()) << Message << ": " << Input;
EXPECT_TRUE(Parser.failed()) << Message << ": " << Input;
EXPECT_FALSE(Parser.getErrorMessage().empty()) << Message << ": " << Input;
}
// Overloads the above to allow using const char * as Input.
static void ExpectParseError(StringRef Message, StringRef Input) {
return ExpectParseError(Message, CutTrailingZero(Input));
}
// Checks that the given input can be parsed without error.
static void ExpectParseSuccess(StringRef Message,
const std::vector<char> &InputWithoutZero) {
StringRef Input = StringRef(&InputWithoutZero[0], InputWithoutZero.size());
JSONParser Parser(Input);
EXPECT_TRUE(Parser.validate())
<< Message << ": " << Input << " - " << Parser.getErrorMessage();
}
// Overloads the above to allow using const char * as Input.
static void ExpectParseSuccess(StringRef Message, StringRef Input) {
return ExpectParseSuccess(Message, CutTrailingZero(Input));
}
TEST(JSONParser, FailsOnEmptyString) {
JSONParser Parser("");
EXPECT_EQ(NULL, Parser.parseRoot());
}
TEST(JSONParser, DoesNotReadAfterInput) {
JSONParser Parser(llvm::StringRef(NULL, 0));
EXPECT_EQ(NULL, Parser.parseRoot());
}
TEST(JSONParser, FailsIfStartsWithString) {
JSONParser Character("\"x\"");
EXPECT_EQ(NULL, Character.parseRoot());
}
TEST(JSONParser, ParsesEmptyArray) {
ExpectParseSuccess("Empty array", "[]");
}
TEST(JSONParser, FailsIfNotClosingArray) {
ExpectParseError("Not closing array", "[");
ExpectParseError("Not closing array", " [ ");
ExpectParseError("Not closing array", " [x");
}
TEST(JSONParser, ParsesEmptyArrayWithWhitespace) {
ExpectParseSuccess("Array with spaces", " [ ] ");
ExpectParseSuccess("All whitespaces", "\t\r\n[\t\n \t\r ]\t\r \n\n");
}
TEST(JSONParser, ParsesEmptyObject) {
ExpectParseSuccess("Empty object", "[{}]");
}
TEST(JSONParser, ParsesObject) {
ExpectParseSuccess("Object with an entry", "[{\"a\":\"/b\"}]");
}
TEST(JSONParser, ParsesMultipleKeyValuePairsInObject) {
ExpectParseSuccess("Multiple key, value pairs",
"[{\"a\":\"/b\",\"c\":\"d\",\"e\":\"f\"}]");
}
TEST(JSONParser, FailsIfNotClosingObject) {
ExpectParseError("Missing close on empty", "[{]");
ExpectParseError("Missing close after pair", "[{\"a\":\"b\"]");
}
TEST(JSONParser, FailsIfMissingColon) {
ExpectParseError("Missing colon between key and value", "[{\"a\"\"/b\"}]");
ExpectParseError("Missing colon between key and value", "[{\"a\" \"b\"}]");
}
TEST(JSONParser, FailsOnMissingQuote) {
ExpectParseError("Missing open quote", "[{a\":\"b\"}]");
ExpectParseError("Missing closing quote", "[{\"a\":\"b}]");
}
TEST(JSONParser, ParsesEscapedQuotes) {
ExpectParseSuccess("Parses escaped string in key and value",
"[{\"a\":\"\\\"b\\\" \\\" \\\"\"}]");
}
TEST(JSONParser, ParsesEmptyString) {
ExpectParseSuccess("Parses empty string in value", "[{\"a\":\"\"}]");
}
TEST(JSONParser, FailsOnMissingString) {
ExpectParseError("Missing value", "[{\"a\":}]");
ExpectParseError("Missing key", "[{:\"b\"}]");
}
TEST(JSONParser, ParsesMultipleObjects) {
ExpectParseSuccess(
"Multiple objects in array",
"["
" { \"a\" : \"b\" },"
" { \"a\" : \"b\" },"
" { \"a\" : \"b\" }"
"]");
}
TEST(JSONParser, FailsOnMissingComma) {
ExpectParseError(
"Missing comma",
"["
" { \"a\" : \"b\" }"
" { \"a\" : \"b\" }"
"]");
}
TEST(JSONParser, FailsOnSuperfluousComma) {
ExpectParseError("Superfluous comma in array", "[ { \"a\" : \"b\" }, ]");
ExpectParseError("Superfluous comma in object", "{ \"a\" : \"b\", }");
}
TEST(JSONParser, ParsesSpacesInBetweenTokens) {
ExpectParseSuccess(
"Various whitespace between tokens",
" \t \n\n \r [ \t \n\n \r"
" \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
" \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r,\t \n\n \r"
" \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
" \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r]\t \n\n \r");
}
TEST(JSONParser, ParsesArrayOfArrays) {
ExpectParseSuccess("Array of arrays", "[[]]");
}
TEST(JSONParser, HandlesEndOfFileGracefully) {
ExpectParseError("In string starting with EOF", "[\"");
ExpectParseError("In string hitting EOF", "[\" ");
ExpectParseError("In string escaping EOF", "[\" \\");
ExpectParseError("In array starting with EOF", "[");
ExpectParseError("In array element starting with EOF", "[[], ");
ExpectParseError("In array hitting EOF", "[[] ");
ExpectParseError("In array hitting EOF", "[[]");
ExpectParseError("In object hitting EOF", "{\"\"");
}
// Checks that the given string can be parsed into an identical string inside
// of an array.
static void ExpectCanParseString(StringRef String) {
std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
JSONParser Parser(StringInArray);
const JSONArray *ParsedArray = dyn_cast<JSONArray>(Parser.parseRoot());
StringRef ParsedString =
dyn_cast<JSONString>(*ParsedArray->begin())->getRawText();
EXPECT_EQ(String, ParsedString.str()) << Parser.getErrorMessage();
}
// Checks that parsing the given string inside an array fails.
static void ExpectCannotParseString(StringRef String) {
std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
ExpectParseError((Twine("When parsing string \"") + String + "\"").str(),
StringInArray);
}
TEST(JSONParser, ParsesStrings) {
ExpectCanParseString("");
ExpectCannotParseString("\\");
ExpectCannotParseString("\"");
ExpectCanParseString(" ");
ExpectCanParseString("\\ ");
ExpectCanParseString("\\\"");
ExpectCannotParseString("\"\\");
ExpectCannotParseString(" \\");
ExpectCanParseString("\\\\");
ExpectCannotParseString("\\\\\\");
ExpectCanParseString("\\\\\\\\");
ExpectCanParseString("\\\" ");
ExpectCannotParseString("\\\\\" ");
ExpectCanParseString("\\\\\\\" ");
ExpectCanParseString(" \\\\ \\\" \\\\\\\" ");
}
TEST(JSONParser, WorksWithIteratorAlgorithms) {
JSONParser Parser("[\"1\", \"2\", \"3\", \"4\", \"5\", \"6\"]");
const JSONArray *Array = dyn_cast<JSONArray>(Parser.parseRoot());
EXPECT_EQ(6, std::distance(Array->begin(), Array->end()));
}
} // end namespace llvm

View File

@ -9,7 +9,7 @@
LEVEL = ..
PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
count fpcmp llvm-lit not unittest
count fpcmp llvm-lit not unittest json-bench
EXTRA_DIST := cgiplotNLT.pl check-each-file codegen-diff countloc.sh \
DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \

View File

@ -0,0 +1,5 @@
add_llvm_utility(json-bench
JSONBench.cpp
)
target_link_libraries(json-bench LLVMSupport)

View File

@ -0,0 +1,77 @@
//===- JSONBench - Benchmark the JSONParser implementation ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This program executes the JSONParser on differntly sized JSON texts and
// outputs the run time.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/JSONParser.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
static llvm::cl::opt<bool>
Verify("verify", llvm::cl::desc(
"Run a quick verification useful for regression testing"),
llvm::cl::init(false));
void benchmark(llvm::TimerGroup &Group, llvm::StringRef Name,
llvm::StringRef JSONText) {
llvm::Timer BaseLine((Name + ": Loop").str(), Group);
BaseLine.startTimer();
char C = 0;
for (llvm::StringRef::iterator I = JSONText.begin(),
E = JSONText.end();
I != E; ++I) { C += *I; }
BaseLine.stopTimer();
volatile char DontOptimizeOut = C; (void)DontOptimizeOut;
llvm::Timer Parsing((Name + ": Parsing").str(), Group);
Parsing.startTimer();
llvm::JSONParser Parser(JSONText);
if (!Parser.validate()) {
llvm::errs() << "Parsing error in JSON parser benchmark.\n";
exit(1);
}
Parsing.stopTimer();
}
std::string createJSONText(int N, int ValueSize) {
std::string JSONText;
llvm::raw_string_ostream Stream(JSONText);
Stream << "[\n";
for (int I = 0; I < N; ++I) {
Stream << " {\n"
<< " \"key1\": \"" << std::string(ValueSize, '*') << "\",\n"
<< " \"key2\": \"" << std::string(ValueSize, '*') << "\",\n"
<< " \"key3\": \"" << std::string(ValueSize, '*') << "\"\n"
<< " }";
if (I + 1 < N) Stream << ",";
Stream << "\n";
}
Stream << "]\n";
Stream.flush();
return JSONText;
}
int main(int argc, char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv);
llvm::TimerGroup Group("JSON parser benchmark");
if (Verify) {
benchmark(Group, "Fast", createJSONText(1000, 500));
} else {
benchmark(Group, "Small Values", createJSONText(1000000, 5));
benchmark(Group, "Medium Values", createJSONText(1000000, 500));
benchmark(Group, "Large Values", createJSONText(10000, 50000));
}
return 0;
}

View File

@ -0,0 +1,21 @@
##===- utils/FileCheck/Makefile ----------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../..
TOOLNAME = json-bench
USEDLIBS = LLVMSupport.a
# This tool has no plugins, optimize startup time.
TOOL_NO_EXPORTS = 1
# Don't install this utility
NO_INSTALL = 1
include $(LEVEL)/Makefile.common