Skip to content

Commit eb9738d

Browse files
committed
automark add reasons to reason-less expectedFailures
1 parent dd09f41 commit eb9738d

2 files changed

Lines changed: 630 additions & 12 deletions

File tree

scripts/update_lib/cmd_auto_mark.py

Lines changed: 242 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,113 @@ def path_to_test_parts(path: str) -> list[str]:
250250
return parts[-2:]
251251

252252

253+
def _expand_stripped_to_children(
254+
contents: str,
255+
stripped_tests: set[tuple[str, str]],
256+
all_failing_tests: set[tuple[str, str]],
257+
) -> set[tuple[str, str]]:
258+
"""Find child-class failures that correspond to stripped parent-class markers.
259+
260+
When ``strip_reasonless_expected_failures`` removes a marker from a parent
261+
(mixin) class, test failures are reported against the concrete subclasses,
262+
not the parent itself. This function maps those child failures back so
263+
they get re-marked (and later consolidated to the parent by
264+
``_consolidate_to_parent``).
265+
266+
Returns the set of ``(class, method)`` pairs from *all_failing_tests* that
267+
should be re-marked.
268+
"""
269+
# Direct matches (stripped test itself is a concrete TestCase)
270+
result = stripped_tests & all_failing_tests
271+
272+
unmatched = stripped_tests - all_failing_tests
273+
if not unmatched:
274+
return result
275+
276+
tree = ast.parse(contents)
277+
class_bases, class_methods = _build_inheritance_info(tree)
278+
279+
for parent_cls, method_name in unmatched:
280+
# parent must actually define this method
281+
if method_name not in class_methods.get(parent_cls, set()):
282+
continue
283+
for cls in class_bases:
284+
if cls == parent_cls:
285+
continue
286+
if method_name in class_methods.get(cls, set()):
287+
continue
288+
if (
289+
_find_method_definition(cls, method_name, class_bases, class_methods)
290+
== parent_cls
291+
and (cls, method_name) in all_failing_tests
292+
):
293+
result.add((cls, method_name))
294+
295+
return result
296+
297+
298+
def _consolidate_to_parent(
299+
contents: str,
300+
failing_tests: set[tuple[str, str]],
301+
error_messages: dict[tuple[str, str], str] | None = None,
302+
) -> tuple[set[tuple[str, str]], dict[tuple[str, str], str] | None]:
303+
"""Move failures to the parent class when ALL inheritors fail.
304+
305+
If every concrete subclass that inherits a method from a parent class
306+
appears in *failing_tests*, replace those per-subclass entries with a
307+
single entry on the parent. This avoids creating redundant super-call
308+
overrides in every child.
309+
310+
Returns:
311+
(consolidated_failing_tests, consolidated_error_messages)
312+
"""
313+
tree = ast.parse(contents)
314+
class_bases, class_methods = _build_inheritance_info(tree)
315+
316+
# Group by (defining_parent, method) → set of failing children
317+
from collections import defaultdict
318+
319+
groups: dict[tuple[str, str], set[str]] = defaultdict(set)
320+
for class_name, method_name in failing_tests:
321+
defining = _find_method_definition(
322+
class_name, method_name, class_bases, class_methods
323+
)
324+
if defining and defining != class_name:
325+
groups[(defining, method_name)].add(class_name)
326+
327+
if not groups:
328+
return failing_tests, error_messages
329+
330+
result = set(failing_tests)
331+
new_error_messages = dict(error_messages) if error_messages else {}
332+
333+
for (parent, method_name), failing_children in groups.items():
334+
# Find ALL classes that inherit this method from parent
335+
all_inheritors: set[str] = set()
336+
for cls_name in class_bases:
337+
if cls_name == parent:
338+
continue
339+
# Skip if this class defines the method itself
340+
if method_name in class_methods.get(cls_name, set()):
341+
continue
342+
if _find_method_definition(cls_name, method_name, class_bases, class_methods) == parent:
343+
all_inheritors.add(cls_name)
344+
345+
if all_inheritors and failing_children >= all_inheritors:
346+
# All inheritors fail → mark on parent instead
347+
children_keys = {(child, method_name) for child in failing_children}
348+
result -= children_keys
349+
result.add((parent, method_name))
350+
# Pick any child's error message for the parent
351+
if new_error_messages:
352+
for child in failing_children:
353+
msg = new_error_messages.pop((child, method_name), "")
354+
if msg:
355+
new_error_messages[(parent, method_name)] = msg
356+
357+
return result, new_error_messages or error_messages
358+
359+
253360
def build_patches(
254361
test_parts_set: set[tuple[str, str]],
255362
error_messages: dict[tuple[str, str], str] | None = None,
@@ -406,11 +513,18 @@ def remove_expected_failures(
406513
and lines[dec_line - 1].strip().startswith("#")
407514
and COMMENT in lines[dec_line - 1]
408515
)
516+
has_comment_after = (
517+
dec_line + 1 < len(lines)
518+
and lines[dec_line + 1].strip().startswith("#")
519+
and COMMENT not in lines[dec_line + 1]
520+
)
409521

410522
if has_comment_on_line or has_comment_before:
411523
lines_to_remove.add(dec_line)
412524
if has_comment_before:
413525
lines_to_remove.add(dec_line - 1)
526+
if has_comment_after and has_comment_on_line:
527+
lines_to_remove.add(dec_line + 1)
414528

415529
for line_idx in sorted(lines_to_remove, reverse=True):
416530
del lines[line_idx]
@@ -481,12 +595,104 @@ def apply_test_changes(
481595
contents = remove_expected_failures(contents, unexpected_successes)
482596

483597
if failing_tests:
598+
failing_tests, error_messages = _consolidate_to_parent(
599+
contents, failing_tests, error_messages
600+
)
484601
patches = build_patches(failing_tests, error_messages)
485602
contents = apply_patches(contents, patches)
486603

487604
return contents
488605

489606

607+
def strip_reasonless_expected_failures(
608+
contents: str,
609+
) -> tuple[str, set[tuple[str, str]]]:
610+
"""Strip @expectedFailure decorators that have no failure reason.
611+
612+
Markers like ``@unittest.expectedFailure # TODO: RUSTPYTHON`` (without a
613+
reason after the semicolon) are removed so the tests fail normally during
614+
the next test run and error messages can be captured.
615+
616+
Returns:
617+
(modified_contents, stripped_tests) where stripped_tests is a set of
618+
(class_name, method_name) tuples whose markers were removed.
619+
"""
620+
tree = ast.parse(contents)
621+
lines = contents.splitlines()
622+
stripped_tests: set[tuple[str, str]] = set()
623+
lines_to_remove: set[int] = set()
624+
625+
for node in ast.walk(tree):
626+
if not isinstance(node, ast.ClassDef):
627+
continue
628+
for item in node.body:
629+
if not isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
630+
continue
631+
for dec in item.decorator_list:
632+
dec_line = dec.lineno - 1
633+
line_content = lines[dec_line]
634+
635+
if "expectedFailure" not in line_content:
636+
continue
637+
638+
has_comment_on_line = COMMENT in line_content
639+
has_comment_before = (
640+
dec_line > 0
641+
and lines[dec_line - 1].strip().startswith("#")
642+
and COMMENT in lines[dec_line - 1]
643+
)
644+
645+
if not has_comment_on_line and not has_comment_before:
646+
continue # not our marker
647+
648+
# Check if there's a reason (on either the decorator or before)
649+
for check_line in (
650+
line_content,
651+
lines[dec_line - 1] if has_comment_before else "",
652+
):
653+
match = re.search(rf"{COMMENT}(.*)", check_line)
654+
if match and match.group(1).strip(";:, "):
655+
break # has a reason, keep it
656+
else:
657+
# No reason found — strip this decorator
658+
stripped_tests.add((node.name, item.name))
659+
660+
if _is_super_call_only(item):
661+
# Remove entire super-call override (the method
662+
# exists only to apply the decorator; without it
663+
# the override is pointless and blocks parent
664+
# consolidation)
665+
first_line = item.decorator_list[0].lineno - 1
666+
if first_line > 0:
667+
prev = lines[first_line - 1].strip()
668+
if prev.startswith("#") and COMMENT in prev:
669+
first_line -= 1
670+
for i in range(first_line, item.end_lineno):
671+
lines_to_remove.add(i)
672+
else:
673+
lines_to_remove.add(dec_line)
674+
675+
if has_comment_before:
676+
lines_to_remove.add(dec_line - 1)
677+
678+
# Also remove a reason-comment on the line after (old format)
679+
if (
680+
has_comment_on_line
681+
and dec_line + 1 < len(lines)
682+
and lines[dec_line + 1].strip().startswith("#")
683+
and COMMENT not in lines[dec_line + 1]
684+
):
685+
lines_to_remove.add(dec_line + 1)
686+
687+
if not lines_to_remove:
688+
return contents, stripped_tests
689+
690+
for idx in sorted(lines_to_remove, reverse=True):
691+
del lines[idx]
692+
693+
return "\n".join(lines) + "\n" if lines else "", stripped_tests
694+
695+
490696
def extract_test_methods(contents: str) -> set[tuple[str, str]]:
491697
"""
492698
Extract all test method names from file contents.
@@ -529,6 +735,13 @@ def auto_mark_file(
529735
if not test_path.exists():
530736
raise FileNotFoundError(f"File not found: {test_path}")
531737

738+
# Strip reason-less markers so those tests fail normally and we capture
739+
# their error messages during the test run.
740+
contents = test_path.read_text(encoding="utf-8")
741+
contents, stripped_tests = strip_reasonless_expected_failures(contents)
742+
if stripped_tests:
743+
test_path.write_text(contents, encoding="utf-8")
744+
532745
test_name = get_test_module_name(test_path)
533746
if verbose:
534747
print(f"Running test: {test_name}")
@@ -559,6 +772,13 @@ def auto_mark_file(
559772
else:
560773
failing_tests = set()
561774

775+
# Re-mark stripped tests that still fail (to restore markers with reasons).
776+
# Uses inheritance expansion: if a parent marker was stripped, child
777+
# failures are included so _consolidate_to_parent can re-mark the parent.
778+
failing_tests |= _expand_stripped_to_children(
779+
contents, stripped_tests, all_failing_tests
780+
)
781+
562782
regressions = all_failing_tests - failing_tests
563783

564784
if verbose:
@@ -626,6 +846,19 @@ def auto_mark_directory(
626846
if not test_dir.is_dir():
627847
raise ValueError(f"Not a directory: {test_dir}")
628848

849+
# Get all .py files in directory
850+
test_files = sorted(test_dir.glob("**/*.py"))
851+
852+
# Strip reason-less markers from ALL files before running tests so those
853+
# tests fail normally and we capture their error messages.
854+
stripped_per_file: dict[pathlib.Path, set[tuple[str, str]]] = {}
855+
for test_file in test_files:
856+
contents = test_file.read_text(encoding="utf-8")
857+
contents, stripped = strip_reasonless_expected_failures(contents)
858+
if stripped:
859+
test_file.write_text(contents, encoding="utf-8")
860+
stripped_per_file[test_file] = stripped
861+
629862
test_name = get_test_module_name(test_dir)
630863
if verbose:
631864
print(f"Running test: {test_name}")
@@ -644,9 +877,6 @@ def auto_mark_directory(
644877
total_regressions = 0
645878
all_regressions: list[tuple[str, str, str, str]] = []
646879

647-
# Get all .py files in directory
648-
test_files = sorted(test_dir.glob("**/*.py"))
649-
650880
for test_file in test_files:
651881
# Get module prefix for this file (e.g., "test_inspect.test_inspect")
652882
module_prefix = get_test_module_name(test_file)
@@ -671,6 +901,15 @@ def auto_mark_directory(
671901
else:
672902
failing_tests = set()
673903

904+
# Re-mark stripped tests that still fail (restore markers with reasons).
905+
# Uses inheritance expansion for parent→child mapping.
906+
stripped = stripped_per_file.get(test_file, set())
907+
if stripped:
908+
file_contents = test_file.read_text(encoding="utf-8")
909+
failing_tests |= _expand_stripped_to_children(
910+
file_contents, stripped, all_failing_tests
911+
)
912+
674913
regressions = all_failing_tests - failing_tests
675914

676915
if failing_tests or unexpected_successes:

0 commit comments

Comments
 (0)