Skip to content

Commit 353c7ce

Browse files
committed
Add a lexer and a couple tests
1 parent 6540c28 commit 353c7ce

File tree

2 files changed

+96
-0
lines changed

2 files changed

+96
-0
lines changed

tests/test_lexer.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import unittest
2+
import logging
3+
4+
from ply.lex import LexToken
5+
6+
from typelanguage.lexer import TypeLexer
7+
from typelanguage.types import *
8+
9+
class TestLexer(unittest.TestCase):
10+
11+
def token(self, value, ty=None):
12+
t = LexToken()
13+
t.type = ty if ty != None else value
14+
t.value = value
15+
t.lineno = -1
16+
t.lexpos = -1
17+
return t
18+
19+
def assert_lex_equiv(self, s, stream2):
20+
# NOTE: lexer fails to reset after call?
21+
l = TypeLexer(debug=True)
22+
stream1 = list(l.tokenize(s)) # Save the stream for debug output when a test fails
23+
stream2 = list(stream2)
24+
assert len(stream1) == len(stream2)
25+
for token1, token2 in zip(stream1, stream2):
26+
print token1, token2
27+
assert token1.type == token2.type
28+
assert token1.value == token2.value
29+
30+
@classmethod
31+
def setup_class(cls):
32+
logging.basicConfig()
33+
34+
def test_simple_inputs(self):
35+
self.assert_lex_equiv('int', [self.token('int', 'ID')])
36+
self.assert_lex_equiv('[int]', [self.token('['), self.token('int', 'ID'), self.token(']')])
37+
self.assert_lex_equiv('int -> int', [self.token('int', 'ID'), self.token('->', 'ARROW'), self.token('int', 'ID')])
38+
self.assert_lex_equiv('*a', [self.token('*'), self.token('a', 'ID')])
39+
self.assert_lex_equiv('**a', [self.token('**', 'KWARGS'), self.token('a', 'ID')])
40+
self.assert_lex_equiv('*x, **a', [self.token('*'), self.token('x', 'ID'), self.token(','), self.token('**', 'KWARGS'), self.token('a', 'ID')])

typelanguage/lexer.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import sys
2+
import logging
3+
4+
import ply.lex
5+
6+
logger = logging.getLogger(__name__)
7+
8+
class TypeLexer(object):
9+
'''
10+
A Lexical analyzer for Python Typelanguage.
11+
'''
12+
13+
def __init__(self, debug=False):
14+
self.debug = debug
15+
16+
def tokenize(self, string):
17+
'''
18+
Maps a string to an iterator over tokens. In other words: [char] -> [token]
19+
'''
20+
21+
new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
22+
new_lexer.latest_newline = 0
23+
new_lexer.input(string)
24+
25+
while True:
26+
t = new_lexer.token()
27+
if t is None: break
28+
yield t
29+
30+
# ============== PLY Lexer specification ==================
31+
# This probably should be private but things like "literals"
32+
# might be a legitimate part of the public interface. Anyhow
33+
# it is pythonic to give some rope to hang oneself with :-)
34+
35+
literals = ['|', '(', ')', '{', '}', '[', ']', ':', '*', ',', ';']
36+
37+
tokens = ['ID', 'TYVAR', 'ARROW', 'KWARGS']
38+
39+
t_ARROW = r'->'
40+
t_KWARGS = r'\*\*'
41+
t_ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
42+
t_ignore = ' \t'
43+
44+
def t_newline(self, t):
45+
r'\n'
46+
t.lexer.lineno += 1
47+
t.lexer.latest_newline = t.lexpos
48+
49+
def t_error(self, t):
50+
raise Exception('Error on line %s, col %s: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.latest_newline, t.value[0]))
51+
52+
if __name__ == '__main__':
53+
logging.basicConfig()
54+
lexer = TypeLexer(debug=True)
55+
for token in lexer.tokenize(sys.stdin.read()):
56+
print '%-20s%s' % (token.value, token.type)

0 commit comments

Comments
 (0)