Skip to content

Commit 2fa4ab3

Browse files
author
Tim Hutt
committed
Switch to using sets for RISC-V assembly & improve organisation
Sets are the preferred method. I also reorganised the states a bit to make things work slightly more nicely (e.g. it highlights registers in preprocessor definitions).
1 parent 083b83c commit 2fa4ab3

1 file changed

Lines changed: 66 additions & 20 deletions

File tree

lib/rouge/lexers/riscvasm.rb

Lines changed: 66 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,16 @@ class RiscvAsm < RegexLexer
1414
filenames '*.s', '*.S'
1515

1616
# C preprocessor directives. These are only processed for .S files - not .s - however
17-
# the parsing is the same in both cases.
17+
# the parsing is mostly the same in both cases.
1818
def self.preproc_directive
19-
@preproc_directive ||= %w(
19+
@preproc_directive = Set.new %w(
2020
define elif else endif error if ifdef ifndef include line pragma undef warning
2121
)
2222
end
2323

2424
# Standard register name, including ABI names.
2525
def self.register
26-
@register ||= %w(
26+
@register = Set.new %w(
2727
x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31
2828
f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
2929
v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27 v28 v29 v30 v31
@@ -34,14 +34,14 @@ def self.register
3434

3535
# These keywords are used for some vector instructions (vsetvli etc.).
3636
def self.other_keyword
37-
@other_keyword ||= %w(
37+
@other_keyword = Set.new %w(
3838
e8 e16 e32 e64 mf8 mf4 mf2 m1 m2 m4 m8 ta tu ma mu v0.t
3939
)
4040
end
4141

4242
# For %pcrel_hi(...) relocations etc.
4343
def self.relocation_function
44-
@relocation_function ||= %w(
44+
@relocation_function = Set.new %w(
4545
hi lo
4646
pcrel_hi pcrel_lo
4747
tprel_hi tprel_lo
@@ -59,6 +59,11 @@ def self.relocation_function
5959
rule %r(/\*.*?\*/)m, Comment::Multiline
6060
end
6161

62+
# This is only needed to deal with preprocessor directives.
63+
state :in_single_line_comment do
64+
rule %r/.*/, Comment::Single, :pop!
65+
end
66+
6267
state :literals do
6368
# 1f, 2b forward/backward label references.
6469
rule %r/[0-9]+[fb]\b/, Name::Label
@@ -82,6 +87,36 @@ def self.relocation_function
8287
rule %r/'(\\\\|\\'|[^'])*'/, Str::Single
8388
end
8489

90+
state :relocations do
91+
rule %r/%(\w+)\b/ do |m|
92+
if self.class.relocation_function.include?(m[1])
93+
token Name::Builtin
94+
else
95+
token Text
96+
end
97+
end
98+
end
99+
100+
# Registers, keywords, variables and operators.
101+
state :words_and_operators do
102+
# Register names, keywords
103+
rule %r/([\w.]+)\b/ do |m|
104+
if self.class.register.include?(m[1])
105+
token Name::Constant
106+
elsif self.class.other_keyword.include?(m[1])
107+
token Name::Constant
108+
else
109+
token Name::Variable
110+
end
111+
end
112+
113+
# Variables.
114+
rule %r/\\?[\$\w]+/, Name::Variable
115+
116+
# Operators
117+
rule %r/[-~*\/%<>|&\^!+(),]/, Operator
118+
end
119+
85120
state :root do
86121
# Preprocessor directive. Awkwardly these are the same as single line comments.
87122
# It seems like GCC will silently ignore unknown directives so that comments
@@ -91,7 +126,17 @@ def self.relocation_function
91126
#
92127
# Then it will silently ignore it!
93128
#
94-
rule %r/^[ \t]*#[ \t]*(:?#{RiscvAsm.preproc_directive.join('|')})\b/, Comment::Preproc, :preprocessor_directive
129+
# [ \t] is used here to avoid matching `#\nfoo`.
130+
rule %r/^\s*#[ \t]*(\w+)\b/ do |m|
131+
if self.class.preproc_directive.include?(m[1])
132+
token Comment::Preproc
133+
push :preprocessor_directive
134+
else
135+
token Comment::Single
136+
# Match the rest of the line as a comment too.
137+
push :in_single_line_comment
138+
end
139+
end
95140

96141
mixin :comments_and_whitespace
97142

@@ -110,44 +155,45 @@ def self.relocation_function
110155

111156
state :preprocessor_directive do
112157
mixin :comments_and_whitespace
113-
mixin :literals
114158

115159
# Escaped newline. This is one case where you can't parse
116160
# .S and .s the same - if you try to escape a newline in a
117161
# preprocessor directive in .S it will work but in .s it
118162
# will be ignored. Here we assume .S.
119163
rule %r/\\\n/, Text
120164

121-
rule %r/./, Text
122165
rule %r/\n/, Text, :pop!
166+
167+
mixin :literals
168+
mixin :relocations
169+
mixin :words_and_operators
170+
171+
rule %r/./, Text
123172
end
124173

125174
state :directive do
126175
mixin :comments_and_whitespace
176+
177+
rule %r/\n/, Text, :pop!
178+
127179
mixin :literals
180+
mixin :relocations
181+
mixin :words_and_operators
128182

129183
rule %r/./, Text
130-
rule %r/\n/, Text, :pop!
131184
end
132185

133186
state :args do
134187
mixin :comments_and_whitespace
135-
mixin :literals
136188

137189
# End of instruction.
138190
rule %r/[;\n]/, Text::Whitespace, :pop!
139191

140-
# Register names.
141-
rule %r/(?:#{RiscvAsm.register.join('|')})\b/, Name::Constant
142-
# Other keywords.
143-
rule %r/(?:#{RiscvAsm.other_keyword.join('|')})\b/, Name::Constant
144-
# Relocations
145-
rule %r/%(?:#{RiscvAsm.relocation_function.join('|')})\b/, Name::Builtin
192+
mixin :literals
193+
mixin :relocations
194+
mixin :words_and_operators
146195

147-
# Operators
148-
rule %r/[-~*\/%<>|&\^!+(),]/, Operator
149-
# Variables.
150-
rule %r/\\?[\$\w]+/, Name::Variable
196+
rule %r/./, Text
151197
end
152198
end
153199
end

0 commit comments

Comments
 (0)