@@ -14,16 +14,16 @@ class RiscvAsm < RegexLexer
1414 filenames '*.s' , '*.S'
1515
1616 # C preprocessor directives. These are only processed for .S files - not .s - however
17- # the parsing is the same in both cases.
17+ # the parsing is mostly the same in both cases.
1818 def self . preproc_directive
19- @preproc_directive ||= %w(
19+ @preproc_directive = Set . new %w(
2020 define elif else endif error if ifdef ifndef include line pragma undef warning
2121 )
2222 end
2323
2424 # Standard register name, including ABI names.
2525 def self . register
26- @register ||= %w(
26+ @register = Set . new %w(
2727 x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31
2828 f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
2929 v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27 v28 v29 v30 v31
@@ -34,14 +34,14 @@ def self.register
3434
3535 # These keywords are used for some vector instructions (vsetvli etc.).
3636 def self . other_keyword
37- @other_keyword ||= %w(
37+ @other_keyword = Set . new %w(
3838 e8 e16 e32 e64 mf8 mf4 mf2 m1 m2 m4 m8 ta tu ma mu v0.t
3939 )
4040 end
4141
4242 # For %pcrel_hi(...) relocations etc.
4343 def self . relocation_function
44- @relocation_function ||= %w(
44+ @relocation_function = Set . new %w(
4545 hi lo
4646 pcrel_hi pcrel_lo
4747 tprel_hi tprel_lo
@@ -59,6 +59,11 @@ def self.relocation_function
5959 rule %r(/\* .*?\* /)m , Comment ::Multiline
6060 end
6161
62+ # This is only needed to deal with preprocessor directives.
63+ state :in_single_line_comment do
64+ rule %r/.*/ , Comment ::Single , :pop!
65+ end
66+
6267 state :literals do
6368 # 1f, 2b forward/backward label references.
6469 rule %r/[0-9]+[fb]\b / , Name ::Label
@@ -82,6 +87,36 @@ def self.relocation_function
8287 rule %r/'(\\ \\ |\\ '|[^'])*'/ , Str ::Single
8388 end
8489
90+ state :relocations do
91+ rule %r/%(\w +)\b / do |m |
92+ if self . class . relocation_function . include? ( m [ 1 ] )
93+ token Name ::Builtin
94+ else
95+ token Text
96+ end
97+ end
98+ end
99+
100+ # Registers, keywords, variables and operators.
101+ state :words_and_operators do
102+ # Register names, keywords
103+ rule %r/([\w .]+)\b / do |m |
104+ if self . class . register . include? ( m [ 1 ] )
105+ token Name ::Constant
106+ elsif self . class . other_keyword . include? ( m [ 1 ] )
107+ token Name ::Constant
108+ else
109+ token Name ::Variable
110+ end
111+ end
112+
113+ # Variables.
114+ rule %r/\\ ?[\$ \w ]+/ , Name ::Variable
115+
116+ # Operators
117+ rule %r/[-~*\/ %<>|&\^ !+(),]/ , Operator
118+ end
119+
85120 state :root do
86121 # Preprocessor directive. Awkwardly these are the same as single line comments.
87122 # It seems like GCC will silently ignore unknown directives so that comments
@@ -91,7 +126,17 @@ def self.relocation_function
91126 #
92127 # Then it will silently ignore it!
93128 #
94- rule %r/^[ \t ]*#[ \t ]*(:?#{ RiscvAsm . preproc_directive . join ( '|' ) } )\b / , Comment ::Preproc , :preprocessor_directive
129+ # [ \t] is used here to avoid matching `#\nfoo`.
130+ rule %r/^\s *#[ \t ]*(\w +)\b / do |m |
131+ if self . class . preproc_directive . include? ( m [ 1 ] )
132+ token Comment ::Preproc
133+ push :preprocessor_directive
134+ else
135+ token Comment ::Single
136+ # Match the rest of the line as a comment too.
137+ push :in_single_line_comment
138+ end
139+ end
95140
96141 mixin :comments_and_whitespace
97142
@@ -110,44 +155,45 @@ def self.relocation_function
110155
111156 state :preprocessor_directive do
112157 mixin :comments_and_whitespace
113- mixin :literals
114158
115159 # Escaped newline. This is one case where you can't parse
116160 # .S and .s the same - if you try to escape a newline in a
117161 # preprocessor directive in .S it will work but in .s it
118162 # will be ignored. Here we assume .S.
119163 rule %r/\\ \n / , Text
120164
121- rule %r/./ , Text
122165 rule %r/\n / , Text , :pop!
166+
167+ mixin :literals
168+ mixin :relocations
169+ mixin :words_and_operators
170+
171+ rule %r/./ , Text
123172 end
124173
125174 state :directive do
126175 mixin :comments_and_whitespace
176+
177+ rule %r/\n / , Text , :pop!
178+
127179 mixin :literals
180+ mixin :relocations
181+ mixin :words_and_operators
128182
129183 rule %r/./ , Text
130- rule %r/\n / , Text , :pop!
131184 end
132185
133186 state :args do
134187 mixin :comments_and_whitespace
135- mixin :literals
136188
137189 # End of instruction.
138190 rule %r/[;\n ]/ , Text ::Whitespace , :pop!
139191
140- # Register names.
141- rule %r/(?:#{ RiscvAsm . register . join ( '|' ) } )\b / , Name ::Constant
142- # Other keywords.
143- rule %r/(?:#{ RiscvAsm . other_keyword . join ( '|' ) } )\b / , Name ::Constant
144- # Relocations
145- rule %r/%(?:#{ RiscvAsm . relocation_function . join ( '|' ) } )\b / , Name ::Builtin
192+ mixin :literals
193+ mixin :relocations
194+ mixin :words_and_operators
146195
147- # Operators
148- rule %r/[-~*\/ %<>|&\^ !+(),]/ , Operator
149- # Variables.
150- rule %r/\\ ?[\$ \w ]+/ , Name ::Variable
196+ rule %r/./ , Text
151197 end
152198 end
153199 end
0 commit comments