Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 32 additions & 52 deletions PythonScripts/audit_translations/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,81 +100,61 @@ def iter_field_matches(node: Any) -> Iterator[tuple[str, Any, Any]]:
yield key, match.value, parent


def parse_rules_file(content: str, data: Any) -> list[RuleInfo]:
"""Parse a standard rules file with name/tag entries"""
def _extract_item_fields(item: Any, is_unicode: bool) -> tuple[str, str | None, str | None, Any] | None:
if is_unicode:
if isinstance(item, dict) and len(item) == 1:
char_key = str(next(iter(item.keys())))
return char_key, None, None, item[char_key]
else:
if isinstance(item, dict) and "name" in item:
rule_name = str(item.get("name"))
tag = format_tag(item.get("tag"))
return f"{rule_name}|{tag or 'unknown'}", rule_name, tag, item
return None


def _build_rule_items(content: str, data: Any, is_unicode_file: bool) -> list[RuleInfo]:
if not isinstance(data, list):
return []

rules: list[RuleInfo] = []
lines = content.splitlines()

start_lines: list[int] = []
rule_items: list[Any] = []
extracted: list[tuple[str, str | None, str | None, Any]] = []

for idx, item in enumerate(data):
if isinstance(item, dict) and "name" in item:
fields = _extract_item_fields(item, is_unicode_file)
if fields is not None:
line = data.lc.item(idx)[0] if hasattr(data, "lc") else 0
start_lines.append(line)
rule_items.append(item)

extracted.append(fields)
raw_blocks = build_raw_blocks(lines, start_lines)

for item, raw_content, line_idx in zip(rule_items, raw_blocks, start_lines):
rule_name = str(item.get("name"))
tag = format_tag(item.get("tag"))
rule_key = f"{rule_name}|{tag or 'unknown'}"
rules: list[RuleInfo] = []
for (key, name, tag, item_data), raw_content, line_idx in zip(extracted, raw_blocks, start_lines):
rules.append(
RuleInfo(
name=rule_name,
name=name,
tag=tag,
key=rule_key,
key=key,
line_number=line_idx + 1,
raw_content=raw_content,
data=item,
untranslated_entries=find_untranslated_text_entries(item),
line_map=build_line_map(item),
data=item_data,
untranslated_entries=find_untranslated_text_entries(item_data),
line_map=build_line_map(item_data),
audit_ignore=has_audit_ignore(raw_content),
)
)

return rules


def parse_unicode_file(content: str, data: Any) -> list[RuleInfo]:
"""Parse a unicode file with character/range keys"""
if not isinstance(data, list):
return []

rules: list[RuleInfo] = []
lines = content.splitlines()

start_lines: list[int] = []
entries: list[tuple[str, Any]] = []
for idx, item in enumerate(data):
if isinstance(item, dict) and len(item) == 1:
key = next(iter(item.keys()))
value = item[key]
line = data.lc.item(idx)[0] if hasattr(data, "lc") else 0
start_lines.append(line)
entries.append((str(key), value))
def parse_rules_file(content: str, data: Any) -> list[RuleInfo]:
"""Parse a standard rules file with name/tag entries."""
return _build_rule_items(content, data, is_unicode_file=False)

raw_blocks = build_raw_blocks(lines, start_lines)

for (char_key, value), raw_content, line_idx in zip(entries, raw_blocks, start_lines):
rules.append(
RuleInfo(
name=None,
tag=None,
key=char_key,
line_number=line_idx + 1,
raw_content=raw_content,
data=value,
untranslated_entries=find_untranslated_text_entries(value),
line_map=build_line_map(value),
audit_ignore=has_audit_ignore(raw_content),
)
)

return rules
def parse_unicode_file(content: str, data: Any) -> list[RuleInfo]:
"""Parse a unicode file with character/range keys."""
return _build_rule_items(content, data, is_unicode_file=True)


def has_audit_ignore(content: str) -> bool:
Expand Down
80 changes: 80 additions & 0 deletions PythonScripts/audit_translations/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,45 @@ def test_sorts_tag_lists(self):
rules = parse_rules_file(content, data)
assert rules[0].tag == "[mo, mtext]"

def test_returns_empty_for_non_list_data(self):
"""Non-list YAML data returns no rules."""
rules = parse_rules_file("key: value", {"key": "value"})
assert rules == []

def test_skips_items_without_name(self):
"""Items like '- include: file' that lack a 'name' key are skipped."""
content = """- include: shared.yaml
- name: real-rule
tag: mo
match: "."
"""
yaml = YAML()
data = yaml.load(content)
rules = parse_rules_file(content, data)
assert len(rules) == 1
assert rules[0].name == "real-rule"

def test_mixed_valid_and_skipped_items(self):
"""Valid rules interspersed with non-rule items keep correct line numbers."""
content = """- name: first
tag: mo
match: "."

- include: other.yaml

- name: second
tag: mi
match: "x"
"""
yaml = YAML()
data = yaml.load(content)
rules = parse_rules_file(content, data)
assert len(rules) == 2
assert rules[0].name == "first"
assert rules[0].line_number == 1
assert rules[1].name == "second"
assert rules[1].line_number == 7

def test_parse_yaml_file_handles_tabs(self, tmp_path):
"""Ensure parse yaml file handles tabs."""
content = """- name: tabbed
Expand Down Expand Up @@ -268,6 +307,47 @@ def test_parses_multiple_entries(self):
assert len(rules) == 2


def test_returns_empty_for_non_list_data(self):
"""Non-list YAML data returns no rules."""
rules = parse_unicode_file("key: value", {"key": "value"})
assert rules == []

def test_skips_multi_key_dicts(self):
"""Dicts with more than one key are not valid unicode entries and are skipped."""
content = """- "a":
- t: "a"
- "b":
- t: "b"
"c":
- t: "c"
"""
yaml = YAML()
data = yaml.load(content)
rules = parse_unicode_file(content, data)
assert len(rules) == 1
assert rules[0].key == "a"

def test_mixed_valid_and_skipped_items(self):
"""Valid entries interspersed with non-entry items keep correct line numbers."""
content = """- "a":
- t: "alpha"

- not: a unicode entry
extra: key

- "b":
- t: "bravo"
"""
yaml = YAML()
data = yaml.load(content)
rules = parse_unicode_file(content, data)
assert len(rules) == 2
assert rules[0].key == "a"
assert rules[0].line_number == 1
assert rules[1].key == "b"
assert rules[1].line_number == 7


class TestExtractMatchPattern:
def test_extracts_inline_match(self):
"""Ensure extracts inline match."""
Expand Down