@@ -250,6 +250,113 @@ def path_to_test_parts(path: str) -> list[str]:
250250 return parts [- 2 :]
251251
252252
253+ def _expand_stripped_to_children (
254+ contents : str ,
255+ stripped_tests : set [tuple [str , str ]],
256+ all_failing_tests : set [tuple [str , str ]],
257+ ) -> set [tuple [str , str ]]:
258+ """Find child-class failures that correspond to stripped parent-class markers.
259+
260+ When ``strip_reasonless_expected_failures`` removes a marker from a parent
261+ (mixin) class, test failures are reported against the concrete subclasses,
262+ not the parent itself. This function maps those child failures back so
263+ they get re-marked (and later consolidated to the parent by
264+ ``_consolidate_to_parent``).
265+
266+ Returns the set of ``(class, method)`` pairs from *all_failing_tests* that
267+ should be re-marked.
268+ """
269+ # Direct matches (stripped test itself is a concrete TestCase)
270+ result = stripped_tests & all_failing_tests
271+
272+ unmatched = stripped_tests - all_failing_tests
273+ if not unmatched :
274+ return result
275+
276+ tree = ast .parse (contents )
277+ class_bases , class_methods = _build_inheritance_info (tree )
278+
279+ for parent_cls , method_name in unmatched :
280+ # parent must actually define this method
281+ if method_name not in class_methods .get (parent_cls , set ()):
282+ continue
283+ for cls in class_bases :
284+ if cls == parent_cls :
285+ continue
286+ if method_name in class_methods .get (cls , set ()):
287+ continue
288+ if (
289+ _find_method_definition (cls , method_name , class_bases , class_methods )
290+ == parent_cls
291+ and (cls , method_name ) in all_failing_tests
292+ ):
293+ result .add ((cls , method_name ))
294+
295+ return result
296+
297+
298+ def _consolidate_to_parent (
299+ contents : str ,
300+ failing_tests : set [tuple [str , str ]],
301+ error_messages : dict [tuple [str , str ], str ] | None = None ,
302+ ) -> tuple [set [tuple [str , str ]], dict [tuple [str , str ], str ] | None ]:
303+ """Move failures to the parent class when ALL inheritors fail.
304+
305+ If every concrete subclass that inherits a method from a parent class
306+ appears in *failing_tests*, replace those per-subclass entries with a
307+ single entry on the parent. This avoids creating redundant super-call
308+ overrides in every child.
309+
310+ Returns:
311+ (consolidated_failing_tests, consolidated_error_messages)
312+ """
313+ tree = ast .parse (contents )
314+ class_bases , class_methods = _build_inheritance_info (tree )
315+
316+ # Group by (defining_parent, method) → set of failing children
317+ from collections import defaultdict
318+
319+ groups : dict [tuple [str , str ], set [str ]] = defaultdict (set )
320+ for class_name , method_name in failing_tests :
321+ defining = _find_method_definition (
322+ class_name , method_name , class_bases , class_methods
323+ )
324+ if defining and defining != class_name :
325+ groups [(defining , method_name )].add (class_name )
326+
327+ if not groups :
328+ return failing_tests , error_messages
329+
330+ result = set (failing_tests )
331+ new_error_messages = dict (error_messages ) if error_messages else {}
332+
333+ for (parent , method_name ), failing_children in groups .items ():
334+ # Find ALL classes that inherit this method from parent
335+ all_inheritors : set [str ] = set ()
336+ for cls_name in class_bases :
337+ if cls_name == parent :
338+ continue
339+ # Skip if this class defines the method itself
340+ if method_name in class_methods .get (cls_name , set ()):
341+ continue
342+ if _find_method_definition (cls_name , method_name , class_bases , class_methods ) == parent :
343+ all_inheritors .add (cls_name )
344+
345+ if all_inheritors and failing_children >= all_inheritors :
346+ # All inheritors fail → mark on parent instead
347+ children_keys = {(child , method_name ) for child in failing_children }
348+ result -= children_keys
349+ result .add ((parent , method_name ))
350+ # Pick any child's error message for the parent
351+ if new_error_messages :
352+ for child in failing_children :
353+ msg = new_error_messages .pop ((child , method_name ), "" )
354+ if msg :
355+ new_error_messages [(parent , method_name )] = msg
356+
357+ return result , new_error_messages or error_messages
358+
359+
253360def build_patches (
254361 test_parts_set : set [tuple [str , str ]],
255362 error_messages : dict [tuple [str , str ], str ] | None = None ,
@@ -406,11 +513,18 @@ def remove_expected_failures(
406513 and lines [dec_line - 1 ].strip ().startswith ("#" )
407514 and COMMENT in lines [dec_line - 1 ]
408515 )
516+ has_comment_after = (
517+ dec_line + 1 < len (lines )
518+ and lines [dec_line + 1 ].strip ().startswith ("#" )
519+ and COMMENT not in lines [dec_line + 1 ]
520+ )
409521
410522 if has_comment_on_line or has_comment_before :
411523 lines_to_remove .add (dec_line )
412524 if has_comment_before :
413525 lines_to_remove .add (dec_line - 1 )
526+ if has_comment_after and has_comment_on_line :
527+ lines_to_remove .add (dec_line + 1 )
414528
415529 for line_idx in sorted (lines_to_remove , reverse = True ):
416530 del lines [line_idx ]
@@ -481,12 +595,104 @@ def apply_test_changes(
481595 contents = remove_expected_failures (contents , unexpected_successes )
482596
483597 if failing_tests :
598+ failing_tests , error_messages = _consolidate_to_parent (
599+ contents , failing_tests , error_messages
600+ )
484601 patches = build_patches (failing_tests , error_messages )
485602 contents = apply_patches (contents , patches )
486603
487604 return contents
488605
489606
607+ def strip_reasonless_expected_failures (
608+ contents : str ,
609+ ) -> tuple [str , set [tuple [str , str ]]]:
610+ """Strip @expectedFailure decorators that have no failure reason.
611+
612+ Markers like ``@unittest.expectedFailure # TODO: RUSTPYTHON`` (without a
613+ reason after the semicolon) are removed so the tests fail normally during
614+ the next test run and error messages can be captured.
615+
616+ Returns:
617+ (modified_contents, stripped_tests) where stripped_tests is a set of
618+ (class_name, method_name) tuples whose markers were removed.
619+ """
620+ tree = ast .parse (contents )
621+ lines = contents .splitlines ()
622+ stripped_tests : set [tuple [str , str ]] = set ()
623+ lines_to_remove : set [int ] = set ()
624+
625+ for node in ast .walk (tree ):
626+ if not isinstance (node , ast .ClassDef ):
627+ continue
628+ for item in node .body :
629+ if not isinstance (item , (ast .FunctionDef , ast .AsyncFunctionDef )):
630+ continue
631+ for dec in item .decorator_list :
632+ dec_line = dec .lineno - 1
633+ line_content = lines [dec_line ]
634+
635+ if "expectedFailure" not in line_content :
636+ continue
637+
638+ has_comment_on_line = COMMENT in line_content
639+ has_comment_before = (
640+ dec_line > 0
641+ and lines [dec_line - 1 ].strip ().startswith ("#" )
642+ and COMMENT in lines [dec_line - 1 ]
643+ )
644+
645+ if not has_comment_on_line and not has_comment_before :
646+ continue # not our marker
647+
648+ # Check if there's a reason (on either the decorator or before)
649+ for check_line in (
650+ line_content ,
651+ lines [dec_line - 1 ] if has_comment_before else "" ,
652+ ):
653+ match = re .search (rf"{ COMMENT } (.*)" , check_line )
654+ if match and match .group (1 ).strip (";:, " ):
655+ break # has a reason, keep it
656+ else :
657+ # No reason found — strip this decorator
658+ stripped_tests .add ((node .name , item .name ))
659+
660+ if _is_super_call_only (item ):
661+ # Remove entire super-call override (the method
662+ # exists only to apply the decorator; without it
663+ # the override is pointless and blocks parent
664+ # consolidation)
665+ first_line = item .decorator_list [0 ].lineno - 1
666+ if first_line > 0 :
667+ prev = lines [first_line - 1 ].strip ()
668+ if prev .startswith ("#" ) and COMMENT in prev :
669+ first_line -= 1
670+ for i in range (first_line , item .end_lineno ):
671+ lines_to_remove .add (i )
672+ else :
673+ lines_to_remove .add (dec_line )
674+
675+ if has_comment_before :
676+ lines_to_remove .add (dec_line - 1 )
677+
678+ # Also remove a reason-comment on the line after (old format)
679+ if (
680+ has_comment_on_line
681+ and dec_line + 1 < len (lines )
682+ and lines [dec_line + 1 ].strip ().startswith ("#" )
683+ and COMMENT not in lines [dec_line + 1 ]
684+ ):
685+ lines_to_remove .add (dec_line + 1 )
686+
687+ if not lines_to_remove :
688+ return contents , stripped_tests
689+
690+ for idx in sorted (lines_to_remove , reverse = True ):
691+ del lines [idx ]
692+
693+ return "\n " .join (lines ) + "\n " if lines else "" , stripped_tests
694+
695+
490696def extract_test_methods (contents : str ) -> set [tuple [str , str ]]:
491697 """
492698 Extract all test method names from file contents.
@@ -529,6 +735,13 @@ def auto_mark_file(
529735 if not test_path .exists ():
530736 raise FileNotFoundError (f"File not found: { test_path } " )
531737
738+ # Strip reason-less markers so those tests fail normally and we capture
739+ # their error messages during the test run.
740+ contents = test_path .read_text (encoding = "utf-8" )
741+ contents , stripped_tests = strip_reasonless_expected_failures (contents )
742+ if stripped_tests :
743+ test_path .write_text (contents , encoding = "utf-8" )
744+
532745 test_name = get_test_module_name (test_path )
533746 if verbose :
534747 print (f"Running test: { test_name } " )
@@ -559,6 +772,13 @@ def auto_mark_file(
559772 else :
560773 failing_tests = set ()
561774
775+ # Re-mark stripped tests that still fail (to restore markers with reasons).
776+ # Uses inheritance expansion: if a parent marker was stripped, child
777+ # failures are included so _consolidate_to_parent can re-mark the parent.
778+ failing_tests |= _expand_stripped_to_children (
779+ contents , stripped_tests , all_failing_tests
780+ )
781+
562782 regressions = all_failing_tests - failing_tests
563783
564784 if verbose :
@@ -626,6 +846,19 @@ def auto_mark_directory(
626846 if not test_dir .is_dir ():
627847 raise ValueError (f"Not a directory: { test_dir } " )
628848
849+ # Get all .py files in directory
850+ test_files = sorted (test_dir .glob ("**/*.py" ))
851+
852+ # Strip reason-less markers from ALL files before running tests so those
853+ # tests fail normally and we capture their error messages.
854+ stripped_per_file : dict [pathlib .Path , set [tuple [str , str ]]] = {}
855+ for test_file in test_files :
856+ contents = test_file .read_text (encoding = "utf-8" )
857+ contents , stripped = strip_reasonless_expected_failures (contents )
858+ if stripped :
859+ test_file .write_text (contents , encoding = "utf-8" )
860+ stripped_per_file [test_file ] = stripped
861+
629862 test_name = get_test_module_name (test_dir )
630863 if verbose :
631864 print (f"Running test: { test_name } " )
@@ -644,9 +877,6 @@ def auto_mark_directory(
644877 total_regressions = 0
645878 all_regressions : list [tuple [str , str , str , str ]] = []
646879
647- # Get all .py files in directory
648- test_files = sorted (test_dir .glob ("**/*.py" ))
649-
650880 for test_file in test_files :
651881 # Get module prefix for this file (e.g., "test_inspect.test_inspect")
652882 module_prefix = get_test_module_name (test_file )
@@ -671,6 +901,15 @@ def auto_mark_directory(
671901 else :
672902 failing_tests = set ()
673903
904+ # Re-mark stripped tests that still fail (restore markers with reasons).
905+ # Uses inheritance expansion for parent→child mapping.
906+ stripped = stripped_per_file .get (test_file , set ())
907+ if stripped :
908+ file_contents = test_file .read_text (encoding = "utf-8" )
909+ failing_tests |= _expand_stripped_to_children (
910+ file_contents , stripped , all_failing_tests
911+ )
912+
674913 regressions = all_failing_tests - failing_tests
675914
676915 if failing_tests or unexpected_successes :
0 commit comments