diff --git a/clang/utils/test/ShUtil.py b/clang/utils/test/ShUtil.py new file mode 100644 index 000000000000..b8485b1596f7 --- /dev/null +++ b/clang/utils/test/ShUtil.py @@ -0,0 +1,126 @@ +import Util + +class ShLexer: + def __init__(self, data): + self.data = data + self.pos = 0 + self.end = len(data) + + def eat(self): + c = self.data[self.pos] + self.pos += 1 + return c + + def look(self): + return self.data[self.pos] + + def maybe_eat(self, c): + """ + maybe_eat(c) - Consume the character c if it is the next character, + returning True if a character was consumed. """ + if self.data[self.pos] == c: + self.pos += 1 + return True + return False + + def lex_arg(self, c): + if c in "'\"": + str = self.lex_arg_quoted(c) + else: + str = c + while self.pos != self.end: + c = self.look() + if c.isspace() or c in "|><&": + break + elif c == '"': + self.eat() + str += self.lex_arg_quoted('"') + else: + str += self.eat() + return str + + def lex_arg_quoted(self, delim): + str = '' + while self.pos != self.end: + c = self.eat() + if c == delim: + return str + elif c == '\\' and delim == '"': + # Shell escaping is just '\"' to avoid termination, no actual + # escaping. + if self.pos == self.end: + Util.warning("escape at end of quoted argument in: %r" % + self.data) + return str + c = self.eat() + if c != delim: + str += '\\' + str += c + else: + str += c + Util.warning("missing quote character in %r" % self.data) + return str + + def lex_one_token(self): + """ + lex_one_token - Lex a single 'sh' token. """ + + c = self.eat() + if c == ';': + return (c) + if c == '|': + if self.maybe_eat('|'): + return ('||',) + return (c,) + if c == '&': + if self.maybe_eat('&'): + return ('&&',) + if self.maybe_eat('>'): + return ('&>',) + return (c,) + if c == '>': + if self.maybe_eat('&'): + return ('>&',) + if self.maybe_eat('>'): + return ('>>',) + return (c,) + if c == '<': + if self.maybe_eat('&'): + return ('<&',) + if self.maybe_eat('>'): + return ('<<',) + return self.lex_arg(c) + + def lex(self): + while self.pos != self.end: + if self.look().isspace(): + self.eat() + else: + yield self.lex_one_token() + +### + +import unittest + +class TestShLexer(unittest.TestCase): + def lex(self, str): + return list(ShLexer(str).lex()) + + def testops(self): + self.assertEqual(self.lex('a2>c'), + ['a2', ('>',), 'c']) + self.assertEqual(self.lex('a 2>c'), + ['a', '2', ('>',), 'c']) + + def testquoting(self): + self.assertEqual(self.lex(""" 'a' """), + ['a']) + self.assertEqual(self.lex(""" "hello\\"world" """), + ['hello"world']) + self.assertEqual(self.lex(""" "hello\\'world" """), + ["hello\\'world"]) + self.assertEqual(self.lex(""" he"llo wo"rld """), + ["hello world"]) + +if __name__ == '__main__': + unittest.main()