Skip to content

Commit 6cabe24

Browse files
authored
Merge pull request #513 from moritz-gross/ruff-lint
a bunch of stuff
2 parents f974dca + d7a48f1 commit 6cabe24

17 files changed

Lines changed: 351 additions & 755 deletions

.github/workflows/python.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ jobs:
99
steps:
1010
- uses: actions/checkout@v3
1111
- uses: astral-sh/setup-uv@v6
12+
- name: Run ruff lint
13+
working-directory: PythonScripts
14+
run: uv run ruff check audit_translations/
15+
- name: Run ruff format check
16+
working-directory: PythonScripts
17+
run: uv run ruff format --check audit_translations/
1218
- name: Run tests
1319
working-directory: PythonScripts
1420
run: uv run pytest

PythonScripts/audit_translations/README.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,9 @@ uv run --project PythonScripts audit-translations --list
6868
* `--list`: Displays all available languages.
6969
* Region variants are shown as `lang-region` (e.g., `zz-aa`) based on subdirectories under `Rules/Languages/<lang>`.
7070
* `--file`: Audits a single specific file instead of the whole directory.
71-
* `--format`: Output format (`rich`, `jsonl`). `--output` is honored only for `jsonl`; rich output always prints to the console.
7271
* `--rules-dir`: Override the Rules/Languages directory path.
7372
* `--only`: Filter issue types (comma-separated): `missing`, `untranslated`, `extra`, `diffs`, `all`.
74-
* `--verbose`: Show detailed output including English/translated snippets for rule differences (only affects rich format; default shows summary only).
73+
* `--verbose`: Show detailed output including English/translated snippets for rule differences.
7574
* **Summary Stats:** Provides a statistical summary after every run.
7675

7776
**Examples:**
@@ -92,9 +91,6 @@ uv run audit-translations de
9291
# Audit only a specific file
9392
uv run audit-translations es --file SharedRules/default.yaml
9493
95-
# Produce JSONL output for automation or AI workflows
96-
uv run audit-translations es --format jsonl --output es-issues.jsonl
97-
9894
# Audit a regional variant (merges Rules/Languages/de and Rules/Languages/de/CH)
9995
uv run audit-translations de-CH
10096

PythonScripts/audit_translations/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212

1313
sys.stdout.reconfigure(encoding="utf-8")
14-
from .cli import main
14+
from .cli import main # noqa: E402
1515

1616
__all__ = [
1717
"main",

PythonScripts/audit_translations/auditor.py

Lines changed: 49 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,31 @@
55
and for performing full language audits.
66
"""
77

8-
import json
98
import sys
109
from pathlib import Path
11-
from typing import TextIO
1210

1311
from rich.panel import Panel
1412
from rich.table import Table
1513

16-
from .dataclasses import RuleInfo, ComparisonResult
17-
from .parsers import parse_yaml_file, diff_rules
18-
from .renderer import collect_issues, console, print_warnings
14+
from .dataclasses import ComparisonResult, RuleInfo
15+
from .parsers import diff_rules, parse_yaml_file
16+
from .renderer import console, print_warnings
1917

2018
# Re-export console so existing `from .auditor import console` callers keep working.
2119
__all__ = ["console"]
2220

21+
GREEN_FILE_COUNT_THRESHOLD = 7
22+
YELLOW_FILE_COUNT_THRESHOLD = 4
23+
24+
25+
def file_count_color(file_count: int) -> str:
26+
"""Map number of translated YAML files to a display color."""
27+
if file_count >= GREEN_FILE_COUNT_THRESHOLD:
28+
return "green"
29+
if file_count >= YELLOW_FILE_COUNT_THRESHOLD:
30+
return "yellow"
31+
return "red"
32+
2333

2434
def split_language_into_base_and_region(language: str) -> tuple[str, str | None]:
2535
"""Split a language code into base and optional region."""
@@ -148,8 +158,6 @@ def merge_rules(base_rules: list[RuleInfo], region_rules: list[RuleInfo]) -> lis
148158
def audit_language(
149159
language: str,
150160
specific_file: str | None = None,
151-
output_format: str = "rich",
152-
output_path: str | None = None,
153161
rules_dir: str | None = None,
154162
issue_filter: set[str] | None = None,
155163
verbose: bool = False,
@@ -178,15 +186,10 @@ def audit_language(
178186
# Get list of files to audit
179187
files = [specific_file] if specific_file else get_yaml_files(english_dir, english_region_dir)
180188

181-
if output_format == "rich":
182-
# Print header
183-
console.print(Panel(f"MathCAT Translation Audit: {language.upper()}", style="bold cyan"))
184-
console.print(f"\n [dim]Comparing against English (en) reference files[/]")
185-
console.print(f" [dim]Files to check: {len(files)}[/]")
186-
187-
out_stream: TextIO = sys.stdout
188-
if output_path:
189-
out_stream = open(output_path, "w", encoding="utf-8", newline="")
189+
# Print header
190+
console.print(Panel(f"MathCAT Translation Audit: {language.upper()}", style="bold cyan"))
191+
console.print("\n [dim]Comparing against English (en) reference files[/]")
192+
console.print(f" [dim]Files to check: {len(files)}[/]")
190193

191194
total_issues = 0
192195
total_missing = 0
@@ -214,52 +217,39 @@ def audit_language(
214217
str(english_region_path) if english_region_path and english_region_path.exists() else None,
215218
)
216219

217-
if output_format == "rich":
218-
if result.has_issues:
219-
issues = print_warnings(result, file_name, verbose, language)
220-
if issues > 0:
221-
files_with_issues += 1
222-
total_issues += issues
223-
else:
224-
files_ok += 1
225-
else:
226-
issues_list = collect_issues(result, file_name, language)
227-
for issue in issues_list:
228-
out_stream.write(json.dumps(issue, ensure_ascii=False) + "\n")
229-
if issues_list:
220+
if result.has_issues:
221+
issues = print_warnings(result, file_name, verbose, language)
222+
if issues > 0:
230223
files_with_issues += 1
231-
total_issues += len(issues_list)
232-
else:
233-
files_ok += 1
224+
total_issues += issues
225+
else:
226+
files_ok += 1
234227

235228
total_missing += len(result.missing_rules)
236229
total_untranslated += sum(len(entries) for _, entries in result.untranslated_text)
237230
total_extra += len(result.extra_rules)
238231
total_differences += len(result.rule_differences)
239232

240-
if output_format == "rich":
241-
# Summary
242-
table = Table(title="SUMMARY", title_style="bold", box=None, show_header=False, padding=(0, 2))
243-
table.add_column(width=30)
244-
table.add_column()
245-
for label, value, color in [
246-
("Files checked", len(files), None),
247-
("Files with issues", files_with_issues, "yellow" if files_with_issues else "green"),
248-
("Files OK", files_ok, "green" if files_ok else None),
249-
("Missing rules", total_missing, "red" if total_missing else "green"),
250-
("Untranslated text", total_untranslated, "yellow" if total_untranslated else "green"),
251-
("Rule differences", total_differences, "magenta" if total_differences else "green"),
252-
("Extra rules", total_extra, "blue" if total_extra else None),
253-
]:
254-
table.add_row(label, f"[{color}]{value}[/]" if color else str(value))
255-
console.print(Panel(table, style="cyan"))
256-
257-
if output_path:
258-
out_stream.close()
233+
# Summary
234+
table = Table(title="SUMMARY", title_style="bold", box=None, show_header=False, padding=(0, 2))
235+
table.add_column(width=30)
236+
table.add_column()
237+
for label, value, color in [
238+
("Files checked", len(files), None),
239+
("Files with issues", files_with_issues, "yellow" if files_with_issues else "green"),
240+
("Files OK", files_ok, "green" if files_ok else None),
241+
("Missing rules", total_missing, "red" if total_missing else "green"),
242+
("Untranslated text", total_untranslated, "yellow" if total_untranslated else "green"),
243+
("Rule differences", total_differences, "magenta" if total_differences else "green"),
244+
("Extra rules", total_extra, "blue" if total_extra else None),
245+
]:
246+
table.add_row(label, f"[{color}]{value}[/]" if color else str(value))
247+
console.print(Panel(table, style="cyan"))
248+
259249
return total_issues
260250

261251

262-
def list_languages(rules_dir: str | None = None):
252+
def list_languages(rules_dir: str | None = None) -> None:
263253
"""List available languages for auditing"""
264254
console.print(Panel("Available Languages", style="bold cyan"))
265255

@@ -272,15 +262,16 @@ def list_languages(rules_dir: str | None = None):
272262
if not lang_dir.is_dir() or lang_dir.name == "en":
273263
continue
274264
base_count = len(get_yaml_files(lang_dir))
275-
color = "green" if base_count >= 7 else "yellow" if base_count >= 4 else "red"
265+
color = file_count_color(base_count)
276266
table.add_row(lang_dir.name, f"[{color}]{base_count}[/] files")
277267

278268
for region_dir in sorted(lang_dir.iterdir()):
279-
if region_dir.is_dir():
280-
code = f"{lang_dir.name}-{region_dir.name}"
281-
count = len(get_yaml_files(lang_dir, region_dir))
282-
region_color = "green" if count >= 7 else "yellow" if count >= 4 else "red"
283-
table.add_row(code, f"[{region_color}]{count}[/] files")
269+
if not region_dir.is_dir() or region_dir.name.lower() == "sharedrules":
270+
continue
271+
code = f"{lang_dir.name}-{region_dir.name}"
272+
count = len(get_yaml_files(lang_dir, region_dir))
273+
region_color = file_count_color(count)
274+
table.add_row(code, f"[{region_color}]{count}[/] files")
284275

285276
console.print(table)
286277
console.print("\n [dim]Reference: en (English) - base translation[/]\n")

PythonScripts/audit_translations/cli.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
import argparse
88
import sys
99

10-
from .auditor import audit_language, list_languages, console
10+
from .auditor import audit_language, console, list_languages
1111

1212

13-
def main():
13+
def main() -> None:
1414
"""Main entry point for the audit tool"""
1515

1616
parser = argparse.ArgumentParser(
@@ -28,21 +28,14 @@ def main():
2828
parser.add_argument("--file", dest="specific_file", help="Audit only a specific file (e.g., 'SharedRules/default.yaml')")
2929
parser.add_argument("--list", action="store_true", help="List available languages")
3030
parser.add_argument("--rules-dir", help="Override Rules/Languages directory path")
31-
parser.add_argument(
32-
"--format",
33-
choices=["rich", "jsonl"],
34-
default="rich",
35-
help="Output format (default: rich)",
36-
)
37-
parser.add_argument("--output", help="Write output to a file instead of stdout")
3831
parser.add_argument(
3932
"--only",
4033
help="Comma-separated issue types: missing, untranslated, extra, diffs, all",
4134
)
4235
parser.add_argument(
4336
"--verbose",
4437
action="store_true",
45-
help="Show detailed output including rule snippets (only affects rich format)",
38+
help="Show detailed output including rule snippets",
4639
)
4740

4841
args = parser.parse_args()
@@ -68,8 +61,6 @@ def main():
6861
audit_language(
6962
args.language,
7063
args.specific_file,
71-
args.format,
72-
args.output,
7364
args.rules_dir,
7465
issue_filter,
7566
args.verbose,

PythonScripts/audit_translations/dataclasses.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,28 @@
55
"""
66

77
from dataclasses import dataclass, field
8+
from enum import StrEnum
89
from typing import Any
910

1011

12+
class IssueType(StrEnum):
13+
"""Top-level issue categories used by the audit renderer."""
14+
15+
MISSING_RULE = "missing_rule"
16+
UNTRANSLATED_TEXT = "untranslated_text"
17+
RULE_DIFFERENCE = "rule_difference"
18+
EXTRA_RULE = "extra_rule"
19+
20+
21+
class DiffType(StrEnum):
22+
"""Rule-difference subcategories used for fine-grained diagnostics."""
23+
24+
MATCH = "match" # `match` XPath differs between English and translation.
25+
CONDITION = "condition" # `if` / `test` condition expressions differ.
26+
VARIABLES = "variables" # Variable names defined in `variables` differ.
27+
STRUCTURE = "structure" # Control-flow block shape/order differs (if/then/else/with/replace).
28+
29+
1130
@dataclass
1231
class RuleInfo:
1332
"""
@@ -29,8 +48,7 @@ class RuleInfo:
2948
Parsed YAML node for the rule; used for structural diffs.
3049
untranslated_entries : list[tuple[str, str, int | None]]
3150
List of (key, text, line) entries extracted from lowercase translation keys.
32-
This drives per-issue JSONL output so each untranslated string can report
33-
the specific YAML line number where it appears.
51+
This preserves exact text fragments and YAML line numbers for diagnostics.
3452
line_map : dict[str, list[int]]
3553
Mapping of element type to line numbers for rule components like match,
3654
conditions, variables, and structural tokens. This is used to point
@@ -64,11 +82,15 @@ class RuleDifference:
6482

6583
english_rule: RuleInfo
6684
translated_rule: RuleInfo
67-
diff_type: str # 'match', 'condition', 'structure', 'variables'
85+
diff_type: DiffType
6886
description: str
6987
english_snippet: str
7088
translated_snippet: str
7189

90+
def __post_init__(self) -> None:
91+
if isinstance(self.diff_type, str):
92+
self.diff_type = DiffType(self.diff_type)
93+
7294

7395
@dataclass
7496
class ComparisonResult:

PythonScripts/audit_translations/line_resolver.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,14 @@
44
Maps rule diff types and structure tokens to precise YAML source line numbers.
55
"""
66

7-
from .dataclasses import RuleInfo, RuleDifference
7+
from .dataclasses import DiffType, RuleDifference, RuleInfo
88
from .parsers import extract_structure_elements
99

1010

11-
def _get_line_map_lines(rule: RuleInfo, kind: str, token: str | None = None) -> list[int]:
11+
def _get_line_map_lines(rule: RuleInfo, kind: DiffType | str, token: str | None = None) -> list[int]:
1212
"""Return the line-number list for a given element kind from the rule's line map."""
13-
if kind == "match":
14-
return rule.line_map.get("match", [])
15-
if kind == "condition":
16-
return rule.line_map.get("condition", [])
17-
if kind == "variables":
18-
return rule.line_map.get("variables", [])
13+
if kind in ("match", "condition", "variables"):
14+
return rule.line_map.get(kind, [])
1915
if kind == "structure" and token:
2016
return rule.line_map.get(f"structure:{token.rstrip(':')}", [])
2117
return []
@@ -44,7 +40,7 @@ def first_structure_mismatch(
4440

4541
def resolve_issue_line_at_position(
4642
rule: RuleInfo,
47-
kind: str,
43+
kind: DiffType | str,
4844
token: str | None = None,
4945
position: int = 0,
5046
) -> int | None:
@@ -64,7 +60,7 @@ def resolve_issue_line_at_position(
6460
return lines[position] if position < len(lines) else None
6561

6662

67-
def resolve_issue_line(rule: RuleInfo, kind: str, token: str | None = None) -> int | None:
63+
def resolve_issue_line(rule: RuleInfo, kind: DiffType | str, token: str | None = None) -> int | None:
6864
"""
6965
Resolve the line number for an issue within a rule.
7066
@@ -150,8 +146,8 @@ def resolve_diff_lines(diff: RuleDifference) -> tuple[int | None, int | None] |
150146
Resolve issue line numbers for a rule difference.
151147
152148
Returns (line_en, line_tr), or None only for unresolvable structure diffs.
153-
This is the single entry point used by both collect_issues and print_warnings
154-
to avoid duplicating the structure vs non-structure branching logic.
149+
This is the single entry point used by the renderer to avoid duplicating
150+
the structure vs non-structure branching logic.
155151
"""
156152
if diff.diff_type == "structure":
157153
return resolve_structure_issue_lines(diff)

0 commit comments

Comments
 (0)