-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlexical_analysis.py
More file actions
48 lines (38 loc) · 1.23 KB
/
lexical_analysis.py
File metadata and controls
48 lines (38 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import re
import io
# Define token types and regular expressions
token_specs = [
('NUMBER', r'\d+'),
('IDENTIFIER', r'[a-zA-Z_]\w*'),
('OPERATOR', r'[+\-*/]'),
('PARENTHESIS', r'[()]'),
('ASSIGNMENT', r'='),
('WHITESPACE', r'\s+')
]
# Lexical analyzer function
def lexer(source_code):
tokens = []
source_code = source_code.strip() # Remove leading/trailing whitespace
while source_code:
matched = False
for token_type, pattern in token_specs:
regex = re.compile('^' + pattern)
match = regex.match(source_code)
if match:
value = match.group(0)
tokens.append((token_type, value))
source_code = source_code[len(value):].lstrip()
matched = True
break
if not matched:
raise SyntaxError(f"Invalid token: {source_code}")
return tokens
# Test the lexer with a sample source code
source_code = '''
hello = world + I * 2
print(hello WORLD)
'''
tokens = lexer(source_code)
# Print the tokens
for token_type, value in tokens:
print(f'Token: {token_type:<8} | Value: {value:>2}')