Skip to content

Commit 5780c29

Browse files
committed
Merge branch 'dev'
2 parents 8fdd804 + 76b44b8 commit 5780c29

15 files changed

Lines changed: 258 additions & 100 deletions

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ tools/udhr/
1010
*.egg-info
1111
*.egg
1212
venv
13-
.coverage
13+
.coverage
14+
.tox

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# A changelog for the lib/hyperglot language database and CLI tool (dd.mm.yyyy)
22

3+
## 0.6.3 (08.04.2024)
4+
- FIX: Set correct default values for `Language.status` and `Orthography.preferred_as_group` and provide validation and tests for these.
5+
- TWEAK: Deprecated plain list `SUPPORTLEVELS, VALIDITYLEVELS, STATUSES, ORTHOGRAPHY_STATUSES` and replaced them with `SupportLevel, LanguageValidity, LanguageStatus, OrthographyStatus` enums throughout the code base. The deprecated values will be removed in the next minor version.
6+
- TESTS: Added simple tox config for running test on all supported minor python versions
7+
38
## 0.6.2 (22.3.2024)
49
- FIX: Fixed type hinting issue causing failure on python 3.8.x
510
- DATA: Added Banjar (`bjn`) (thanks @mahalisyarifuddin)

README_releasing.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ This documentation for releasing new versions to PIP is relevant only for reposi
44

55
- merge dev to master
66
- run hyperglot-validate
7-
- run pytest tests
7+
- run pytest tests (with all tox environments)
88
- manually sanity-check cli font check works
99
- bump version number
1010
- push dev to github, test install from commit in new environment: pip install git+https://github.com/rosettatype/hyperglot.git@dev

lib/hyperglot/__init__.py

Lines changed: 90 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,48 +2,106 @@
22
Gather a few package wide constants
33
"""
44
from os import path
5-
__version__ = "0.6.2"
5+
from enum import Enum
6+
from typing import List
7+
8+
__version__ = "0.6.3"
69

710
DB = path.abspath(path.join(path.dirname(__file__), "data"))
811
DB_EXTRA = path.abspath(path.join(path.dirname(__file__), "extra_data"))
912

10-
SUPPORTLEVELS = {
11-
"base": "base",
12-
"aux": "auxiliary"
13-
}
13+
# ~~DONE Refactor these levels and status as Enum's~~
14+
# TODO Eventaully remove deprecated "CONSTANTS"
1415

15-
# TODO Refactor these levels and status as Enum's
1616

17-
# Note that order matters, since these may be used like a logging level
18-
VALIDITYLEVELS = [
19-
"todo",
20-
"draft",
21-
"preliminary",
22-
"verified",
23-
]
17+
class SupportLevel(Enum):
18+
"""
19+
Valid support levels for querying Hyperglot.
20+
"""
21+
22+
BASE = "base"
23+
AUX = "auxiliary"
24+
25+
26+
# Deprecated: SUPPORTLEVELS will be removed in the future, use SupportLevel!
27+
SUPPORTLEVELS = {"base": "base", "aux": "auxiliary"}
28+
29+
30+
class LanguageValidity(Enum):
31+
"""
32+
Allowed hyperglot.Language["validity"] values.
33+
34+
Order from least to most valid matters for comparison!
35+
"""
36+
37+
TODO = "todo"
38+
DRAFT = "draft"
39+
PRELIMINARY = "preliminary"
40+
VERIFIED = "verified"
41+
42+
@classmethod
43+
def values(self) -> List:
44+
return [v.value for v in self]
45+
46+
@classmethod
47+
def index(self, val: str) -> int:
48+
"""
49+
Get the index of a given value, useful for comparing the validities in
50+
order.
51+
"""
52+
return self.values().index(val)
53+
54+
55+
# Deprecated: VALIDIRITLEVELS will be removed in the future, use LanguageValidity!
56+
VALIDITYLEVELS = LanguageValidity.values()
2457

2558

2659
# Note that "secondary" as status is also used, but on orthographies!
27-
STATUSES = [
28-
"historical",
29-
"constructed",
30-
"ancient",
31-
"living",
32-
"extinct",
33-
"deprecated",
34-
]
60+
class LanguageStatus(Enum):
61+
"""
62+
Allowed hyperglot.Language["status"] values, with LIVING being the default.
3563
64+
Deprecated values for 'status' previously used are: ancient, extinct and
65+
deprecated.
66+
"""
67+
68+
LIVING = "living"
69+
HISTORICAL = "historical"
70+
CONSTRUCTED = "constructed"
71+
72+
@classmethod
73+
def values(self) -> List:
74+
return [s.value for s in self]
75+
76+
77+
# Deprecated: STATUSES will be removed in the future, use LanguageStatus!
78+
STATUSES = LanguageStatus.values()
79+
80+
81+
class OrthographyStatus(Enum):
82+
"""
83+
Possible hyperglot.orthography.Orthography["status"] values.
84+
85+
Note: Order matters for preference of first found orthography.
86+
87+
Deprecated: "deprecated" orthography status removed in favour of "historical"
88+
"""
89+
90+
PRIMARY = "primary"
91+
LOCAL = "local"
92+
SECONDARY = "secondary"
93+
HISTORICAL = "historical"
94+
TRANSLITERATION = "transliteration"
95+
96+
@classmethod
97+
def values(self) -> List:
98+
return [s.value for s in self]
99+
100+
101+
# Deprecated: ORTHOGRAPHY_STATUSES will be removed in the futute, use
102+
# OrthographyStatus!
103+
ORTHOGRAPHY_STATUSES = OrthographyStatus.values()
36104

37-
# Possible orthography statuses, in no meaningful order
38-
# "deprecated" orthography status removed in favour of "historical"
39-
# Note: Order matters for preference of first found orthography
40-
ORTHOGRAPHY_STATUSES = [
41-
"primary",
42-
"local",
43-
"secondary",
44-
"historical",
45-
"transliteration",
46-
]
47105

48106
# Those attributes of orthographies that contain non-mark characters
49107
CHARACTER_ATTRIBUTES = [
@@ -55,7 +113,7 @@
55113

56114
SORTING = {
57115
"alphabetic": lambda lang: lang.get_name(),
58-
"speakers": lambda lang: lang["speakers"]
116+
"speakers": lambda lang: lang["speakers"],
59117
}
60118

61119
SORTING_DIRECTIONS = ["asc", "desc"]

lib/hyperglot/checker.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from hyperglot.language import Language
99
from hyperglot.orthography import Orthography
1010
from hyperglot.parse import parse_chars
11-
from hyperglot import SUPPORTLEVELS, VALIDITYLEVELS
11+
from hyperglot import SupportLevel, LanguageValidity
1212

1313
log = logging.getLogger(__name__)
1414
log.setLevel(logging.WARNING)
@@ -43,25 +43,25 @@ class Checker:
4343
support checks.
4444
"""
4545

46-
def __init__(self, fontpath=None, characters=None):
46+
def __init__(self, fontpath: str = None, characters: List = None):
4747
self.fontpath = fontpath
4848
self.characters = characters
4949
self.font = None
5050
self.shaper = None
5151

5252
def get_supported_languages(
5353
self,
54-
supportlevel=list(SUPPORTLEVELS.keys())[0],
55-
validity=VALIDITYLEVELS[1],
56-
decomposed=False,
57-
marks=False,
58-
shaping=False,
59-
include_all_orthographies=False,
60-
include_historical=False,
61-
include_constructed=False,
62-
report_missing=-1,
63-
report_marks=-1,
64-
report_joining=-1,
54+
supportlevel: str = SupportLevel.BASE.value,
55+
validity: str = LanguageValidity.DRAFT.value,
56+
decomposed: bool = False,
57+
marks: bool = False,
58+
shaping: bool = False,
59+
include_all_orthographies: bool = False,
60+
include_historical: bool = False,
61+
include_constructed: bool = False,
62+
report_missing: int = -1,
63+
report_marks: int = -1,
64+
report_joining: int = -1,
6565
) -> dict:
6666
"""
6767
Get all languages supported based on the passed in characters.
@@ -102,7 +102,7 @@ def get_supported_languages(
102102
continue
103103

104104
# Skip languages below the currently selected validity level.
105-
if VALIDITYLEVELS.index(l["validity"]) < VALIDITYLEVELS.index(validity):
105+
if LanguageValidity.index(l["validity"]) < LanguageValidity.index(validity):
106106
log.info("Skipping language '%s' which has lower " "'validity'" % iso)
107107
continue
108108

@@ -151,8 +151,8 @@ def get_supported_languages(
151151
def supports_language(
152152
self,
153153
iso: str,
154-
supportlevel: str = "base",
155-
validity: str = VALIDITYLEVELS[1],
154+
supportlevel: str = SupportLevel.BASE.value,
155+
validity: str = LanguageValidity.DRAFT.value,
156156
decomposed: bool = False,
157157
marks: bool = False,
158158
shaping: bool = False,
@@ -206,12 +206,13 @@ def supports_language(
206206

207207
# Exit if validity is not met
208208
if "validity" not in language or (
209-
VALIDITYLEVELS.index(language["validity"]) < VALIDITYLEVELS.index(validity)
209+
LanguageValidity.index(language["validity"])
210+
< LanguageValidity.index(validity)
210211
):
211212
return False
212213

213-
if supportlevel not in SUPPORTLEVELS.keys():
214-
log.warning(
214+
if supportlevel not in [s.value for s in SupportLevel]:
215+
raise Exception(
215216
"Provided support level '%s' not valid, "
216217
"defaulting to 'base'" % supportlevel
217218
)
@@ -298,7 +299,7 @@ def supports_language(
298299

299300
# If an orthography has no "auxiliary" we consider it supported on
300301
# "auxiliary" level, too.
301-
if supportlevel == "aux" and ort.auxiliary:
302+
if supportlevel == SupportLevel.AUX.value and ort.auxiliary:
302303
if marks:
303304
req_marks_aux = ort.auxiliary_marks
304305
else:
@@ -317,9 +318,7 @@ def supports_language(
317318

318319
# Validation
319320
supported = False
320-
logging.info(
321-
f"{language} missing {len(aux_missing)} 'aux'"
322-
)
321+
logging.info(f"{language} missing {len(aux_missing)} 'aux'")
323322

324323
if shaping:
325324
joining_errors, mark_errors = self._check_shaping(

lib/hyperglot/cli.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
__version__,
1111
SORTING_DIRECTIONS,
1212
DB,
13-
SUPPORTLEVELS,
14-
VALIDITYLEVELS,
13+
SupportLevel,
14+
LanguageValidity,
1515
CHARACTER_ATTRIBUTES,
1616
MARK_BASE,
1717
SORTING,
@@ -237,7 +237,7 @@ def hyperglot_options(f):
237237
@click.option(
238238
"-s",
239239
"--support",
240-
type=click.Choice(SUPPORTLEVELS.keys(), case_sensitive=False),
240+
type=click.Choice([s.value for s in SupportLevel], case_sensitive=False),
241241
default="base",
242242
show_default=True,
243243
help="Option to test only for the language's base charset, or to"
@@ -264,8 +264,8 @@ def hyperglot_options(f):
264264
)
265265
@click.option(
266266
"--validity",
267-
type=click.Choice(VALIDITYLEVELS, case_sensitive=False),
268-
default=VALIDITYLEVELS[1],
267+
type=click.Choice([v.value for v in LanguageValidity], case_sensitive=False),
268+
default=LanguageValidity.DRAFT.value,
269269
show_default=True,
270270
help="The level of validity for languages matched against the "
271271
"font. Weaker levels always include more strict levels. The "
@@ -453,7 +453,7 @@ def cli(
453453
report_joining=report_joining,
454454
)
455455

456-
level = SUPPORTLEVELS[support]
456+
level = SupportLevel(support).value
457457

458458
# Sort each script's results by the chosen sorting logic
459459
sorted_entries = {}

0 commit comments

Comments
 (0)