-
-
Notifications
You must be signed in to change notification settings - Fork 66
Expand file tree
/
Copy pathstrings.py
More file actions
239 lines (203 loc) · 7.92 KB
/
strings.py
File metadata and controls
239 lines (203 loc) · 7.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
"""
String-related evaluation functions.
"""
import re
from mathics_scanner.characters import replace_box_unicode_with_ascii
from mathics.builtin.box.layout import RowBox
from mathics.core.atoms import Integer, Integer0, Integer1, Integer3, String
from mathics.core.convert.expression import to_mathics_list
from mathics.core.convert.python import from_bool
from mathics.core.convert.regex import to_regex
from mathics.core.element import BaseElement
from mathics.core.evaluation import Evaluation
from mathics.core.expression import Expression
from mathics.core.expression_predefined import MATHICS3_INFINITY
from mathics.core.list import ListExpression
from mathics.core.symbols import Symbol, SymbolTrue
from mathics.eval.encoding import EncodingNameError
from mathics.format.box import format_element
def eval_ToString(
expr: BaseElement, form: Symbol, encoding: String, evaluation: Evaluation
) -> String:
boxes = format_element(expr, evaluation, form)
try:
return String(boxes.to_text(evaluation=evaluation, encoding=encoding))
except EncodingNameError:
# Mimic the WMA behavior. In the future, we can implement the mechanism
# with encodings stored in .m files, and give a chance with it.
evaluation.message("Get", "noopen", String("encodings/" + encoding + "." + "m"))
return String(boxes.to_text(evaluation=evaluation, encoding="Unicode"))
def eval_StringContainsQ(name, string, patt, evaluation, options, matched):
# Get the pattern list and check validity for each
if patt.has_form("List", None):
patts = patt.elements
else:
patts = [patt]
re_patts = []
for p in patts:
py_p = to_regex(p, show_message=evaluation.message)
if py_p is None:
evaluation.message("StringExpression", "invld", p, patt)
return
re_patts.append(py_p)
flags = re.MULTILINE
if options["System`IgnoreCase"] is SymbolTrue:
flags = flags | re.IGNORECASE
def _search(patts, str, flags, matched):
if any(re.search(p, str, flags=flags) for p in patts):
return from_bool(matched)
return from_bool(not matched)
# Check string validity and perform regex searchhing
if string.has_form("List", None):
py_s = [s.get_string_value() for s in string.elements]
if any(s is None for s in py_s):
evaluation.message(
name, "strse", Integer1, Expression(Symbol(name), string, patt)
)
return
return to_mathics_list(*[_search(re_patts, s, flags, matched) for s in py_s])
else:
py_s = string.get_string_value()
if py_s is None:
evaluation.message(
name, "strse", Integer1, Expression(Symbol(name), string, patt)
)
return
return _search(re_patts, py_s, flags, matched)
def eval_StringFind(self, string, rule, n, evaluation, options, cases):
if n.sameQ(Symbol("System`Private`Null")):
expr = Expression(Symbol(self.get_name()), string, rule)
n = None
else:
expr = Expression(Symbol(self.get_name()), string, rule, n)
# convert string
if isinstance(string, ListExpression):
py_strings = [stri.get_string_value() for stri in string.elements]
if None in py_strings:
evaluation.message(self.get_name(), "strse", Integer1, expr)
return
else:
py_strings = string.get_string_value()
if py_strings is None:
evaluation.message(self.get_name(), "strse", Integer1, expr)
return
# convert rule
def convert_rule(r):
if r.has_form("Rule", None) and len(r.elements) == 2:
py_s = to_regex(r.elements[0], show_message=evaluation.message)
if py_s is None:
evaluation.message(
"StringExpression", "invld", r.elements[0], r.elements[0]
)
return
py_sp = r.elements[1]
return py_s, py_sp
elif cases:
py_s = to_regex(r, show_message=evaluation.message)
if py_s is None:
evaluation.message("StringExpression", "invld", r, r)
return
return py_s, None
evaluation.message(self.get_name(), "srep", r)
return
if rule.has_form("List", None):
py_rules = [convert_rule(r) for r in rule.elements]
else:
py_rules = [convert_rule(rule)]
if None in py_rules:
return None
# convert n
if n is None:
py_n = 0
elif n.sameQ(MATHICS3_INFINITY):
py_n = 0
else:
py_n = n.get_int_value()
if py_n is None or py_n < 0:
evaluation.message(self.get_name(), "innf", Integer3, expr)
return
# flags
flags = re.MULTILINE
if options["System`IgnoreCase"] is SymbolTrue:
flags = flags | re.IGNORECASE
if isinstance(py_strings, list):
return to_mathics_list(
*[
self._find(py_stri, py_rules, py_n, flags, evaluation)
for py_stri in py_strings
]
)
else:
return self._find(py_strings, py_rules, py_n, flags, evaluation)
def safe_backquotes(string: str):
"""Handle escaped backquotes."""
# TODO: Fix in the scanner how escaped backslashes
# are parsed.
# "\\`" must be parsed as "\\`" in order this
# works properly, but the parser converts `\\`
# into `\`.
string = string.replace(r"\\", r"\[RawBackslash]")
string = string.replace(r"\`", r"\[RawBackquote]")
string = string.replace(r"\[RawBackslash]", r"\\")
return string
def eval_StringForm_MakeBoxes(strform: String, items, form, evaluation: Evaluation):
"""MakeBoxes[StringForm[s_String, items___], form_]"""
if not isinstance(strform, String):
raise ValueError
items = [format_element(item, evaluation, form) for item in items]
curr_indx = 0
strform_str = safe_backquotes(replace_box_unicode_with_ascii(strform.value))
parts = strform_str.split("`")
result = [String(parts[0])]
if len(parts) <= 1:
return result[0]
quote_open = True
remaining = len(parts) - 1
num_items = len(items)
for part in parts[1:]:
remaining -= 1
# If quote_open, the part must be a placeholder
if quote_open:
# If not remaining, there is a not closed '`'
# character:
if not remaining:
evaluation.message("StringForm", "sfq", strform)
return strform.value
# part must be an index or an empty string.
# If is an empty string, pick the next element:
if part == "":
if curr_indx >= num_items:
evaluation.message(
"StringForm",
"sfr",
Integer(num_items + 1),
Integer(num_items),
strform,
)
return strform.value
result.append(items[curr_indx])
curr_indx += 1
quote_open = False
continue
# Otherwise, must be a positive integer:
try:
indx = int(part)
except ValueError:
evaluation.message(
"StringForm", "sfr", Integer0, Integer(num_items), strform
)
return strform.value
# indx must be greater than 0, and not greater than
# the number of items
if indx <= 0 or indx > len(items):
evaluation.message(
"StringForm", "sfr", Integer(indx), Integer(len(items)), strform
)
return strform.value
result.append(items[indx - 1])
curr_indx = indx
quote_open = False
continue
result.append(String(part))
quote_open = True
return RowBox(ListExpression(*result))