-
-
Notifications
You must be signed in to change notification settings - Fork 66
Expand file tree
/
Copy pathencoding.py
More file actions
72 lines (59 loc) · 1.91 KB
/
encoding.py
File metadata and controls
72 lines (59 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
Functions to format strings in a given encoding.
"""
from typing import Dict
from mathics.core.convert.op import operator_to_ascii, operator_to_unicode
# Map WMA encoding names to Python encoding names
ENCODING_WMA_TO_PYTHON = {
"WindowsEastEurope": "cp1250",
"WindowsCyrillic": "cp1251",
"WindowsANSI": "cp1252",
"WindowsGreek": "cp1252",
"WindowsTurkish": "cp1254",
}
UNICODE_CHARACTER_TO_ASCII = {
ch: operator_to_ascii.get(name, rf"\[{name}]")
for name, ch in operator_to_unicode.items()
}
# These characters are used in encoding
# in WMA, and differs from what we have
# in Mathics3-scanner tables:
UNICODE_CHARACTER_TO_ASCII.update(
{
operator_to_unicode["Times"]: r" x ",
"": r"\[DifferentialD]",
}
)
class EncodingNameError(Exception):
pass
def get_encoding_table(encoding: str) -> Dict[str, str]:
"""
Return a dictionary with a map from
character codes in the internal (Unicode)
representation to the request encoding.
"""
if encoding == "Unicode":
return {}
# In the final implementation, this should load the corresponding
# json table or an encoding file as in WMA
# SystemFiles/CharacterEncodings/*.m
# If the encoding is not available, raise an EncodingError
try:
return {
"ASCII": UNICODE_CHARACTER_TO_ASCII,
"UTF-8": {},
}[encoding]
except KeyError:
raise EncodingNameError
def encode_string_value(value: str, encoding: str) -> str:
"""Convert an Unicode string `value` to the required `encoding`"""
# In WMA, encodings are readed from SystemFiles/CharacterEncodings/*.m
# on the fly. We should load them from Mathics3-Scanner tables.
encoding_table = get_encoding_table(encoding)
if not encoding_table:
return value
result = ""
for ch in value:
ch = encoding_table.get(ch, ch)
result += ch
return result