diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index afbbee4d0d9..e60dbbe8ec9 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -7,6 +7,9 @@ use self::ptr::Ptr; mod classes; use self::classes::*; +mod numbers; +use self::numbers::scan_number; + pub fn next_token(text: &str) -> Token { assert!(!text.is_empty()); let mut ptr = Ptr::new(text); @@ -50,69 +53,6 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { IDENT } -fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { - if c == '0' { - match ptr.next().unwrap_or('\0') { - 'b' | 'o' => { - ptr.bump(); - scan_digits(ptr, false); - } - 'x' => { - ptr.bump(); - scan_digits(ptr, true); - } - '0'...'9' | '_' | '.' | 'e' | 'E' => { - scan_digits(ptr, true); - } - _ => return INT_NUMBER, - } - } else { - scan_digits(ptr, false); - } - - // might be a float, but don't be greedy if this is actually an - // integer literal followed by field/method access or a range pattern - // (`0..2` and `12.foo()`) - if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { - // might have stuff after the ., and if it does, it needs to start - // with a number - ptr.bump(); - scan_digits(ptr, false); - scan_float_exponent(ptr); - return FLOAT_NUMBER; - } - // it might be a float if it has an exponent - if ptr.next_is('e') || ptr.next_is('E') { - scan_float_exponent(ptr); - return FLOAT_NUMBER; - } - INT_NUMBER -} - -fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { - while let Some(c) = ptr.next() { - match c { - '_' | '0'...'9' => { - ptr.bump(); - } - 'a'...'f' | 'A' ... 'F' if allow_hex => { - ptr.bump(); - } - _ => return - } - } -} - -fn scan_float_exponent(ptr: &mut Ptr) { - if ptr.next_is('e') || ptr.next_is('E') { - ptr.bump(); - if ptr.next_is('-') || ptr.next_is('+') { - ptr.bump(); - } - scan_digits(ptr, false); - } -} - fn string_literal_start(c: char, c1: Option, c2: Option) -> bool { match (c, c1, c2) { ('r', Some('"'), _) | diff --git a/src/lexer/numbers.rs b/src/lexer/numbers.rs new file mode 100644 index 00000000000..4c7edfe1ccb --- /dev/null +++ b/src/lexer/numbers.rs @@ -0,0 +1,68 @@ +use lexer::ptr::Ptr; +use lexer::classes::*; + +use {SyntaxKind}; +use syntax_kinds::*; + +pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { + if c == '0' { + match ptr.next().unwrap_or('\0') { + 'b' | 'o' => { + ptr.bump(); + scan_digits(ptr, false); + } + 'x' => { + ptr.bump(); + scan_digits(ptr, true); + } + '0'...'9' | '_' | '.' | 'e' | 'E' => { + scan_digits(ptr, true); + } + _ => return INT_NUMBER, + } + } else { + scan_digits(ptr, false); + } + + // might be a float, but don't be greedy if this is actually an + // integer literal followed by field/method access or a range pattern + // (`0..2` and `12.foo()`) + if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { + // might have stuff after the ., and if it does, it needs to start + // with a number + ptr.bump(); + scan_digits(ptr, false); + scan_float_exponent(ptr); + return FLOAT_NUMBER; + } + // it might be a float if it has an exponent + if ptr.next_is('e') || ptr.next_is('E') { + scan_float_exponent(ptr); + return FLOAT_NUMBER; + } + INT_NUMBER +} + +fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { + while let Some(c) = ptr.next() { + match c { + '_' | '0'...'9' => { + ptr.bump(); + } + 'a'...'f' | 'A' ... 'F' if allow_hex => { + ptr.bump(); + } + _ => return + } + } +} + +fn scan_float_exponent(ptr: &mut Ptr) { + if ptr.next_is('e') || ptr.next_is('E') { + ptr.bump(); + if ptr.next_is('-') || ptr.next_is('+') { + ptr.bump(); + } + scan_digits(ptr, false); + } +} diff --git a/tests/data/lexer/0004_number.rs b/tests/data/lexer/0004_number.rs index af53ff2cd13..0c0d3762703 100644 --- a/tests/data/lexer/0004_number.rs +++ b/tests/data/lexer/0004_number.rs @@ -5,3 +5,4 @@ 0e+1 0.e+1 0.0E-2 +0___0.10000____0000e+111__ \ No newline at end of file diff --git a/tests/data/lexer/0004_number.txt b/tests/data/lexer/0004_number.txt index 7dedd2cacb3..94fe0302d6b 100644 --- a/tests/data/lexer/0004_number.txt +++ b/tests/data/lexer/0004_number.txt @@ -60,3 +60,4 @@ INT_NUMBER 1 "1" WHITESPACE 1 "\n" FLOAT_NUMBER 6 "0.0E-2" WHITESPACE 1 "\n" +FLOAT_NUMBER 26 "0___0.10000____0000e+111__"