mirror of https://github.com/openqasm/openqasm.git
264 lines
8.8 KiB
ANTLR
264 lines
8.8 KiB
ANTLR
lexer grammar qasm3Lexer;
|
|
|
|
/* Naming conventions in this lexer grammar
|
|
*
|
|
* - Keywords and exact symbols that have only one possible value are written in
|
|
* all caps. There is no more information in the parsed text than in the name
|
|
* of the lexeme. For example, `INCLUDE` is only ever the string `'include'`.
|
|
*
|
|
* - Lexemes with information in the string form are in PascalCase. This
|
|
* indicates there is more information in the token than just the name. For
|
|
* example, `Identifier` has a payload containing the name of the identifier.
|
|
*/
|
|
|
|
/* Language keywords. */
|
|
|
|
OPENQASM: 'OPENQASM' -> pushMode(VERSION_IDENTIFIER);
|
|
INCLUDE: 'include' -> pushMode(ARBITRARY_STRING);
|
|
DEFCALGRAMMAR: 'defcalgrammar' -> pushMode(ARBITRARY_STRING);
|
|
DEF: 'def';
|
|
CAL: 'cal' -> mode(CAL_PRELUDE);
|
|
DEFCAL: 'defcal' -> mode(DEFCAL_PRELUDE);
|
|
GATE: 'gate';
|
|
EXTERN: 'extern';
|
|
BOX: 'box';
|
|
LET: 'let';
|
|
|
|
BREAK: 'break';
|
|
CONTINUE: 'continue';
|
|
IF: 'if';
|
|
ELSE: 'else';
|
|
END: 'end';
|
|
RETURN: 'return';
|
|
FOR: 'for';
|
|
WHILE: 'while';
|
|
IN: 'in';
|
|
SWITCH: 'switch';
|
|
CASE: 'case';
|
|
DEFAULT: 'default';
|
|
|
|
PRAGMA: '#'? 'pragma' -> pushMode(EAT_TO_LINE_END);
|
|
AnnotationKeyword: '@' Identifier ('.' Identifier)* -> pushMode(EAT_TO_LINE_END);
|
|
|
|
|
|
/* Types. */
|
|
|
|
INPUT: 'input';
|
|
OUTPUT: 'output';
|
|
CONST: 'const';
|
|
READONLY: 'readonly';
|
|
MUTABLE: 'mutable';
|
|
|
|
QREG: 'qreg';
|
|
QUBIT: 'qubit';
|
|
|
|
CREG: 'creg';
|
|
BOOL: 'bool';
|
|
BIT: 'bit';
|
|
INT: 'int';
|
|
UINT: 'uint';
|
|
FLOAT: 'float';
|
|
ANGLE: 'angle';
|
|
COMPLEX: 'complex';
|
|
ARRAY: 'array';
|
|
VOID: 'void';
|
|
|
|
DURATION: 'duration';
|
|
STRETCH: 'stretch';
|
|
|
|
|
|
/* Builtin identifiers and operations */
|
|
|
|
GPHASE: 'gphase';
|
|
INV: 'inv';
|
|
POW: 'pow';
|
|
CTRL: 'ctrl';
|
|
NEGCTRL: 'negctrl';
|
|
|
|
DIM: '#dim';
|
|
|
|
DURATIONOF: 'durationof';
|
|
|
|
DELAY: 'delay';
|
|
RESET: 'reset';
|
|
MEASURE: 'measure';
|
|
BARRIER: 'barrier';
|
|
|
|
BooleanLiteral: 'true' | 'false';
|
|
|
|
|
|
/* Symbols */
|
|
|
|
LBRACKET: '[';
|
|
RBRACKET: ']';
|
|
LBRACE: '{';
|
|
RBRACE: '}';
|
|
LPAREN: '(';
|
|
RPAREN: ')';
|
|
|
|
COLON: ':';
|
|
SEMICOLON: ';';
|
|
|
|
DOT: '.';
|
|
COMMA: ',';
|
|
|
|
EQUALS: '=';
|
|
ARROW: '->';
|
|
PLUS: '+';
|
|
DOUBLE_PLUS: '++';
|
|
MINUS: '-';
|
|
ASTERISK: '*';
|
|
DOUBLE_ASTERISK: '**';
|
|
SLASH: '/';
|
|
PERCENT: '%';
|
|
PIPE: '|';
|
|
DOUBLE_PIPE: '||';
|
|
AMPERSAND: '&';
|
|
DOUBLE_AMPERSAND: '&&';
|
|
CARET: '^';
|
|
AT: '@';
|
|
TILDE: '~';
|
|
EXCLAMATION_POINT: '!';
|
|
|
|
EqualityOperator: '==' | '!=';
|
|
CompoundAssignmentOperator: '+=' | '-=' | '*=' | '/=' | '&=' | '|=' | '~=' | '^=' | '<<=' | '>>=' | '%=' | '**=';
|
|
ComparisonOperator: '>' | '<' | '>=' | '<=';
|
|
BitshiftOperator: '>>' | '<<';
|
|
|
|
IMAG: 'im';
|
|
ImaginaryLiteral: (DecimalIntegerLiteral | FloatLiteral) [ \t]* IMAG;
|
|
|
|
BinaryIntegerLiteral: ('0b' | '0B') ([01] '_'?)* [01];
|
|
OctalIntegerLiteral: '0o' ([0-7] '_'?)* [0-7];
|
|
DecimalIntegerLiteral: ([0-9] '_'?)* [0-9];
|
|
HexIntegerLiteral: ('0x' | '0X') ([0-9a-fA-F] '_'?)* [0-9a-fA-F];
|
|
|
|
fragment ValidUnicode: [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]; // valid unicode chars
|
|
fragment Letter: [A-Za-z];
|
|
fragment FirstIdCharacter: '_' | ValidUnicode | Letter;
|
|
fragment GeneralIdCharacter: FirstIdCharacter | [0-9];
|
|
|
|
Identifier: FirstIdCharacter GeneralIdCharacter*;
|
|
HardwareQubit: '$' [0-9]+;
|
|
|
|
fragment FloatLiteralExponent: [eE] (PLUS | MINUS)? DecimalIntegerLiteral;
|
|
FloatLiteral:
|
|
// 1_123e-3, 123e+4 or 123E5 (needs the exponent or it's just an integer)
|
|
DecimalIntegerLiteral FloatLiteralExponent
|
|
// .1234_5678 or .1e3 (no digits before the dot)
|
|
| DOT DecimalIntegerLiteral FloatLiteralExponent?
|
|
// 123.456, 123. or 145.32e+1_00
|
|
| DecimalIntegerLiteral DOT DecimalIntegerLiteral? FloatLiteralExponent?;
|
|
|
|
fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's';
|
|
// represents explicit time value in SI or backend units
|
|
TimingLiteral: (DecimalIntegerLiteral | FloatLiteral) [ \t]* TimeUnit;
|
|
|
|
BitstringLiteral: '"' ([01] '_'?)* [01] '"';
|
|
|
|
// Ignore whitespace between tokens, and define C++-style comments.
|
|
Whitespace: [ \t]+ -> skip ;
|
|
Newline: [\r\n]+ -> skip ;
|
|
LineComment : '//' ~[\r\n]* -> skip;
|
|
BlockComment : '/*' .*? '*/' -> skip;
|
|
|
|
|
|
// The version identifier token would be ambiguous between itself and
|
|
// integer/floating-point literals, so we use a special mode to ensure it's
|
|
// lexed correctly.
|
|
mode VERSION_IDENTIFIER;
|
|
VERSION_IDENTIFER_WHITESPACE: [ \t\r\n]+ -> skip;
|
|
VersionSpecifier: [0-9]+ ('.' [0-9]+)? -> popMode;
|
|
|
|
// An include statement's path or defcalgrammar target is potentially ambiguous
|
|
// with `BitstringLiteral`.
|
|
mode ARBITRARY_STRING;
|
|
ARBITRARY_STRING_WHITESPACE: [ \t\r\n]+ -> skip;
|
|
// allow ``"str"`` and ``'str'``;
|
|
StringLiteral: ('"' ~["\r\t\n]+? '"' | '\'' ~['\r\t\n]+? '\'') -> popMode;
|
|
|
|
|
|
// A different lexer mode to swap to when we need handle tokens on a line basis
|
|
// rather than the default arbitrary-whitespace-based tokenisation. This is
|
|
// used by the annotation and pragma rules.
|
|
mode EAT_TO_LINE_END;
|
|
EAT_INITIAL_SPACE: [ \t]+ -> skip;
|
|
EAT_LINE_END: [\r\n] -> popMode, skip;
|
|
|
|
// The line content must be a non-empty token to satisfy ANTLR (otherwise it
|
|
// would be able to produce an infinite number of tokens). We could include
|
|
// the line ending to guarantee that this is always non-empty, but that just
|
|
// puts an annoying burden on consumers to remove it again.
|
|
RemainingLineContent: ~[ \t\r\n] ~[\r\n]*;
|
|
|
|
|
|
// We need to do a little context-aware lexing when we hit a `cal` or `defcal`
|
|
// token. In both cases, there's a small interlude before the pulse grammar
|
|
// block starts, and we need to be able to lex our way through that. We don't
|
|
// want to tie this grammar to one host language by injecting host code to
|
|
// manage the state of the lexer, so instead we need to do a little duplication
|
|
// of the tokens, because ANTLR doesn't allow us to inherit rules directly.
|
|
mode CAL_PRELUDE;
|
|
CAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
|
|
CAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
|
|
CAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
|
|
|
|
mode DEFCAL_PRELUDE;
|
|
DEFCAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
|
|
DEFCAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
|
|
DEFCAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
|
|
|
|
// Duplications of valid constant expression tokens that may appear in the
|
|
// argument list. This is an unfortunately large number of duplications.
|
|
|
|
// Types.
|
|
DEFCAL_PRELUDE_QREG: QREG -> type(QREG);
|
|
DEFCAL_PRELUDE_QUBIT: QUBIT -> type(QUBIT);
|
|
DEFCAL_PRELUDE_CREG: CREG -> type(CREG);
|
|
DEFCAL_PRELUDE_BOOL: BOOL -> type(BOOL);
|
|
DEFCAL_PRELUDE_BIT: BIT -> type(BIT);
|
|
DEFCAL_PRELUDE_INT: INT -> type(INT);
|
|
DEFCAL_PRELUDE_UINT: UINT -> type(UINT);
|
|
DEFCAL_PRELUDE_ANGLE: ANGLE -> type(ANGLE);
|
|
DEFCAL_PRELUDE_FLOAT: FLOAT -> type(FLOAT);
|
|
DEFCAL_PRELUDE_COMPLEX: COMPLEX -> type(COMPLEX);
|
|
DEFCAL_PRELUDE_ARRAY: ARRAY -> type(ARRAY);
|
|
DEFCAL_PRELUDE_DURATION: DURATION -> type(DURATION);
|
|
// Punctuation.
|
|
DEFCAL_PRELUDE_LBRACKET: LBRACKET -> type(LBRACKET);
|
|
DEFCAL_PRELUDE_RBRACKET: RBRACKET -> type(RBRACKET);
|
|
DEFCAL_PRELUDE_LPAREN: LPAREN -> type(LPAREN);
|
|
DEFCAL_PRELUDE_RPAREN: RPAREN -> type(RPAREN);
|
|
DEFCAL_PRELUDE_ARROW: ARROW -> type(ARROW);
|
|
DEFCAL_PRELUDE_COMMA: COMMA -> type(COMMA);
|
|
DEFCAL_PRELUDE_PLUS: PLUS -> type(PLUS);
|
|
DEFCAL_PRELUDE_MINUS: MINUS -> type(MINUS);
|
|
DEFCAL_PRELUDE_ASTERISK: ASTERISK -> type(ASTERISK);
|
|
DEFCAL_PRELUDE_SLASH: SLASH -> type(SLASH);
|
|
DEFCAL_PRELUDE_BitshiftOperator: BitshiftOperator -> type(BitshiftOperator);
|
|
// Literals and names.
|
|
DEFCAL_PRELUDE_BitstringLiteral: BitstringLiteral -> type(BitstringLiteral);
|
|
DEFCAL_PRELUDE_BinaryIntegerLiteral: BinaryIntegerLiteral -> type(BinaryIntegerLiteral);
|
|
DEFCAL_PRELUDE_OctalIntegerLiteral: OctalIntegerLiteral -> type(OctalIntegerLiteral);
|
|
DEFCAL_PRELUDE_DecimalIntegerLiteral: DecimalIntegerLiteral -> type(DecimalIntegerLiteral);
|
|
DEFCAL_PRELUDE_HexIntegerLiteral: HexIntegerLiteral -> type(HexIntegerLiteral);
|
|
DEFCAL_PRELUDE_FloatLiteral: FloatLiteral -> type(FloatLiteral);
|
|
DEFCAL_PRELUDE_MEASURE: MEASURE -> type(MEASURE);
|
|
DEFCAL_PRELUDE_DELAY: DELAY -> type(DELAY);
|
|
DEFCAL_PRELUDE_RESET: RESET -> type(RESET);
|
|
DEFCAL_PRELUDE_Identifier: Identifier -> type(Identifier);
|
|
DEFCAL_PRELUDE_HardwareQubit: HardwareQubit -> type(HardwareQubit);
|
|
|
|
|
|
// The meat-and-potatoes of matching a calibration block with balanced inner
|
|
// braces. We enter `CAL_BLOCK` with the opening brace already tokenised
|
|
// (that's how the lexer knew to swap modes to us), and with the token left open
|
|
// to continue to accumulate. We want to tokenise until we hit the balancing
|
|
// brace. Since we have _no_ knowledge of what the inner langauge is doing,
|
|
// things like unbalanced braces in comments will cause a failure, but there's
|
|
// not much we can do about that without greater spec restrictions.
|
|
mode CAL_BLOCK;
|
|
fragment NestedCalibrationBlock: LBRACE (NestedCalibrationBlock | ~[{}])* RBRACE;
|
|
CalibrationBlock: (NestedCalibrationBlock | ~[{}])+;
|
|
CAL_BLOCK_RBRACE: RBRACE -> type(RBRACE), mode(DEFAULT_MODE);
|