CCExtractor · Rahul-2k4 · Mar 6, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
@@ -0,0 +1,48 @@
+"""Add baseline_status to regression_test for never-worked tracking
+
+Revision ID: d1f3a9c2e8b7
+Revises: c8f3a2b1d4e5
+Create Date: 2026-03-07 00:00:00.000000
+
+"""
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = 'd1f3a9c2e8b7'
+down_revision = 'c8f3a2b1d4e5'
+branch_labels = None
+depends_on = None
+
+# Enum values mirror BaselineStatus in mod_regression/models.py
+baseline_status_enum = sa.Enum('unknown', 'never_worked', 'established', name='baselinestatus')
+
+
+def upgrade():
+    """Add baseline_status column to regression_test table."""
+    # Add column with default so existing rows get 'unknown' immediately
+    op.add_column(
+        'regression_test',
+        sa.Column(
+            'baseline_status',
+            baseline_status_enum,
+            nullable=False,
+            server_default='unknown'
+        )
+    )
+
+    # Historical backfill:
+    # - if the regression test has ever passed on either tracked platform, it is established
+    # - otherwise keep the trusted state as unknown until a main-repo commit run refreshes it
+    op.execute(
+        """
+        UPDATE regression_test
+        SET baseline_status = 'established'
+        WHERE last_passed_on_linux IS NOT NULL OR last_passed_on_windows IS NOT NULL
+        """
+    )
+
+
+def downgrade():
+    """Remove baseline_status column from regression_test table."""
+    op.drop_column('regression_test', 'baseline_status')
@@ -41,7 +41,7 @@
                            Status)
 from mod_customized.models import CustomizedTest
 from mod_home.models import CCExtractorVersion, GeneralData
-from mod_regression.models import (Category, RegressionTest,
+from mod_regression.models import (BaselineStatus, Category, RegressionTest,
                                    RegressionTestOutput)
 from mod_sample.models import Issue
 from mod_test.controllers import get_test_results
@@ -2421,6 +2421,8 @@ def progress_type_request(log, test, test_id, request) -> bool:
         message = 'Tests aborted due to an error; please check'
 
     elif status == TestStatus.completed:
+        if test.test_type == TestType.commit and is_main_repo(test.fork.github):
+            refresh_baseline_statuses_for_test(test)
         # Determine if success or failure
         # It fails if any of these happen:
         # - A crash (unexpected exit code)
@@ -2707,6 +2709,39 @@ def finish_type_request(log, test_id, test, request):
         log.error(f"Could not save the results for test {test_id}")
 
 
+def refresh_baseline_statuses_for_test(test: Test) -> None:
+    """
+    Persist baseline status for each regression test touched by a completed test run.
+
+    This uses the same full-result logic as the UI and PR comment paths, so output-file
+    mismatches and missing expected outputs count as failures in addition to exit-code
+    mismatches.
+
+    :param test: The completed test run whose regression results should refresh baseline state.
+    :type test: Test
+    """
+    from run import log
+
+    if test.test_type != TestType.commit or not is_main_repo(test.fork.github):
+        return
+
+    changed = False
+    processed_ids = set()
+    for category_results in get_test_results(test):
+        for category_test in category_results['tests']:
+            regression_test = category_test['test']
+            if regression_test.id in processed_ids or category_test['result'] is None:
+                continue
+
+            processed_ids.add(regression_test.id)
+            if regression_test.update_baseline_status(passed=not category_test['error']):
+                g.db.add(regression_test)
+                changed = True
+
+    if changed and not safe_db_commit(g.db, f"refreshing baseline status for test {test.id}"):
+        log.error(f"Failed to refresh baseline status for completed test {test.id}")
+
+
 def set_avg_time(platform, process_type: str, time_taken: int) -> None:
     """
     Set average platform preparation time.
@@ -2756,6 +2791,7 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo:
     extra_failed_tests = []
     common_failed_tests = []
     fixed_tests = []
+    never_worked_tests = []
     category_stats = []
 
     test_results = get_test_results(test)
@@ -2764,20 +2800,30 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo:
         category_name = category_results['category'].name
 
         category_test_pass_count = 0
-        for test in category_results['tests']:
-            if not test['error']:
+        for category_test in category_results['tests']:
+            platform_last_passed = getattr(category_test['test'], platform_column)
+            if not category_test['error']:
                 category_test_pass_count += 1
-                if last_test_master and getattr(test['test'], platform_column) != last_test_master.id:
-                    fixed_tests.append(test['test'])
+                if last_test_master and platform_last_passed != last_test_master.id:
+                    fixed_tests.append(category_test['test'])
             else:
-                if last_test_master and getattr(test['test'], platform_column) != last_test_master.id:
-                    common_failed_tests.append(test['test'])
+                if platform_last_passed is None and category_test['test'].baseline_status != BaselineStatus.unknown:
+                    never_worked_tests.append(category_test['test'])
+                elif last_test_master and platform_last_passed != last_test_master.id:
+                    common_failed_tests.append(category_test['test'])
                 else:
-                    extra_failed_tests.append(test['test'])
+                    extra_failed_tests.append(category_test['test'])
 
         category_stats.append(CategoryTestInfo(category_name, len(category_results['tests']), category_test_pass_count))
 
-    return PrCommentInfo(category_stats, extra_failed_tests, fixed_tests, common_failed_tests, last_test_master)
+    return PrCommentInfo(
+        category_stats,
+        extra_failed_tests,
+        fixed_tests,
+        common_failed_tests,
+        never_worked_tests,
+        last_test_master,
+    )
 
 
 def comment_pr(test: Test) -> str:

@@ -178,4 +178,5 @@ class PrCommentInfo:
     extra_failed_tests: List[RegressionTest]
     fixed_tests: List[RegressionTest]
     common_failed_tests: List[RegressionTest]
+    never_worked_tests: List[RegressionTest]
     last_test_master: Test
@@ -59,6 +59,24 @@ def __repr__(self) -> str:
         return f"<Category {self.name}>"
 
 
+class BaselineStatus(DeclEnum):
+    """Enum to track whether a regression test has ever passed.
+
+    This distinguishes true regressions (tests that used to pass but now fail)
+    from tests that have never produced correct output on any CCExtractor version.
+
+    Transitions:
+        unknown -> established  (first test run passes)
+        unknown -> never_worked (first test run fails)
+        never_worked -> established (a passing run occurs; test now works)
+        established stays established (a failure is a regression, not "never worked")
+    """
+
+    unknown = "unknown", "Unknown"
+    never_worked = "never_worked", "Never Worked"
+    established = "established", "Established"
+
+
 class InputType(DeclEnum):
     """Enumerator types for input."""
 
@@ -97,6 +115,7 @@ class RegressionTest(Base):
     last_passed_on_windows = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL"))
     last_passed_on_linux = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL"))
     description = Column(String(length=1024))
+    baseline_status = Column(BaselineStatus.db_type(), nullable=False, default=BaselineStatus.unknown)
 
     def __init__(self, sample_id, command, input_type, output_type, category_id, expected_rc,
                  active=True, description="") -> None:
@@ -117,7 +136,8 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp
         :type expected_rc: int
         :param active: The value of the 'active' field of RegressionTest model
         :type active: bool
-
+        :param description: The value of the 'description' field of RegressionTest model
+        :type description: str
         """
         self.sample_id = sample_id
         self.command = command
@@ -127,6 +147,7 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp
         self.expected_rc = expected_rc
         self.active = active
         self.description = description
+        self.baseline_status = BaselineStatus.unknown
 
     def __repr__(self) -> str:
         """
@@ -137,6 +158,48 @@ def __repr__(self) -> str:
         """
         return f"<RegressionTest {self.id}>"
 
+    def update_baseline_status(self, passed: bool) -> bool:
+        """
+        Update baseline_status based on the outcome of a test run.
+
+        Called after each completed test run for this regression test.
+        Returns True if the status changed, False if it stayed the same.
+
+        Transition table::
+
+            unknown + pass      -> established
+            unknown + fail      -> never_worked
+            never_worked + pass -> established
+            never_worked + fail -> never_worked  (no change)
+            established + pass  -> established   (no change)
+            established + fail  -> established   (it's a regression, not "never worked")
+
+        :param passed: True if exit_code matched expected_rc for this test run.
+        :type passed: bool
+        :return: True if the baseline_status changed, False otherwise.
+        :rtype: bool
+        """
+        previous = self.baseline_status
+        if passed:
+            self.baseline_status = BaselineStatus.established
+        elif self.baseline_status == BaselineStatus.unknown:
+            self.baseline_status = BaselineStatus.never_worked
+        return self.baseline_status != previous
+
+    @property
+    def is_regression(self) -> bool:
+        """
+        Return True if a failing result on this test is a true regression.
+
+        A result is a regression only when the test is established (has passed before)
+        but is currently failing. Tests with 'never_worked' or 'unknown' status are
+        not regressions; they are pre-existing issues.
+
+        :return: True if this test can produce a regression result.
+        :rtype: bool
+        """
+        return self.baseline_status == BaselineStatus.established
+
 
 class RegressionTestOutput(Base):
     """Model to store output of regression test."""

@@ -41,11 +41,22 @@ NOTE: The following tests have been failing on the master branch as well as the
 {% endfor %}
 </ul>
 {% endif %}
+{% if comment_info.never_worked_tests | length %}
+NOTE: The following tests have never passed on the platform yet:
+<ul>
+{% for test in comment_info.never_worked_tests %}
+<li> ccextractor {{ test.command }} <a href="{{ url_for('sample.sample_by_id', sample_id=test.sample.id, _external=True) }}">{{ test.sample.sha[:10] }}...</a>, Last passed:
+<span>Never</span>
+</li>
+{% endfor %}
+</ul>
+{% endif %}
 {% if comment_info.fixed_tests | length %}
 Congratulations: Merging this PR would fix the following tests:
 <ul>
 {% for test in comment_info.fixed_tests %}
-<li> ccextractor {{ test.command }} <a href="{{ url_for('sample.sample_by_id', sample_id=test.sample.id, _external=True) }}">{{ test.sample.sha[:10] }}...</a>, Last passed: {% if test.last_passed_on %}<a href="{{ url_for('test.by_id', test_id=test.last_passed_on, _external=True) }}">Test {{ test.last_passed_on }}</a>{% else %}<span>Never</span>{% endif %}</li>
+{% set last_passed_id = test.last_passed_on_windows if platform.lower() == 'windows' else test.last_passed_on_linux %}
+<li> ccextractor {{ test.command }} <a href="{{ url_for('sample.sample_by_id', sample_id=test.sample.id, _external=True) }}">{{ test.sample.sha[:10] }}...</a>, Last passed: {% if last_passed_id %}<a href="{{ url_for('test.by_id', test_id=last_passed_id, _external=True) }}">Test {{ last_passed_id }}</a>{% else %}<span>Never</span>{% endif %}</li>
 {% endfor %}
 </ul>
 {% endif %}
@@ -54,8 +65,8 @@ Congratulations: Merging this PR would fix the following tests:
 
 {% if comment_info.extra_failed_tests | length %}
 It seems that not all tests were passed completely. This is an indication that the output of some files is not as expected (but might be according to you).
-{% elif comment_info.common_failed_tests | length %}
-This PR does not introduce any new test failures. However, some tests are failing on both master and this PR (see above).
+{% elif comment_info.common_failed_tests | length or comment_info.never_worked_tests | length %}
+This PR does not introduce any new test failures. However, some tests are already failing on master or have never worked on the platform yet (see above).
 {% else %}
 All tests passed completely.
 {% endif %}