forked from daisy/MathCAT
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataclasses.py
More file actions
109 lines (87 loc) · 3.81 KB
/
dataclasses.py
File metadata and controls
109 lines (87 loc) · 3.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
Data models for the audit tool.
Contains dataclasses for representing rules and comparison results.
"""
from dataclasses import dataclass, field
from enum import StrEnum
from typing import Any
class IssueType(StrEnum):
"""Top-level issue categories used by the audit renderer."""
MISSING_RULE = "missing_rule"
UNTRANSLATED_TEXT = "untranslated_text"
RULE_DIFFERENCE = "rule_difference"
EXTRA_RULE = "extra_rule"
class DiffType(StrEnum):
"""Rule-difference subcategories used for fine-grained diagnostics."""
MATCH = "match" # `match` XPath differs between English and translation.
CONDITION = "condition" # `if` / `test` condition expressions differ.
VARIABLES = "variables" # Variable names defined in `variables` differ.
STRUCTURE = "structure" # Control-flow block shape/order differs (if/then/else/with/replace).
@dataclass
class RuleInfo:
"""
Information about a single rule parsed from a YAML file.
Attributes
----------
name : str | None
Rule name for standard rule files; None for unicode entries.
tag : str | None
Rule tag (normalized string); None for unicode entries.
key : str
Stable identifier used for matching; for unicode entries this is the character or range key.
line_number : int
1-based line number where the rule starts in the source file.
raw_content : str
Raw YAML block for this rule (used for reporting/snippets).
data : Any | None
Parsed YAML node for the rule; used for structural diffs.
untranslated_entries : list[tuple[str, str, int | None]]
List of (key, text, line) entries extracted from lowercase translation keys.
This preserves exact text fragments and YAML line numbers for diagnostics.
line_map : dict[str, list[int]]
Mapping of element type to line numbers for rule components like match,
conditions, variables, and structural tokens. This is used to point
structural diffs at a precise line rather than the top of the rule.
audit_ignore : bool
True if the raw content contains an audit-ignore marker.
"""
name: str | None # None for unicode entries
tag: str | None # None for unicode entries
key: str # For unicode entries, this is the character/range
line_number: int
raw_content: str
data: Any | None = None
untranslated_entries: list[tuple[str, str, int | None]] = field(default_factory=list)
line_map: dict[str, list[int]] = field(default_factory=dict)
audit_ignore: bool = False
@property
def has_untranslated_text(self) -> bool:
return bool(self.untranslated_entries)
@property
def untranslated_keys(self) -> list[str]:
return [entry[1] for entry in self.untranslated_entries]
@dataclass
class RuleDifference:
"""Fine-grained difference between English and translated rule"""
english_rule: RuleInfo
translated_rule: RuleInfo
diff_type: DiffType
description: str
english_snippet: str
translated_snippet: str
def __post_init__(self) -> None:
if isinstance(self.diff_type, str):
self.diff_type = DiffType(self.diff_type)
@dataclass
class ComparisonResult:
"""Results from comparing English and translated files"""
missing_rules: list[RuleInfo] # Rules in English but not in translation
extra_rules: list[RuleInfo] # Rules in translation but not in English
untranslated_text: list[tuple[RuleInfo, list[tuple[str, str, int | None]]]] # Rules with lowercase t/ot/ct
file_path: str
english_rule_count: int
translated_rule_count: int
rule_differences: list[RuleDifference] = field(default_factory=list) # Fine-grained diffs
@property
def has_issues(self) -> bool:
return bool(self.missing_rules or self.untranslated_text or self.extra_rules or self.rule_differences)