Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions migrations/versions/d1f3a9c2e8b7_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Add baseline_status to regression_test for never-worked tracking

Revision ID: d1f3a9c2e8b7
Revises: c8f3a2b1d4e5
Create Date: 2026-03-07 00:00:00.000000

"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = 'd1f3a9c2e8b7'
down_revision = 'c8f3a2b1d4e5'
branch_labels = None
depends_on = None

# Enum values mirror BaselineStatus in mod_regression/models.py
baseline_status_enum = sa.Enum('unknown', 'never_worked', 'established', name='baselinestatus')


def upgrade():
"""Add baseline_status column to regression_test table."""
# Add column with default so existing rows get 'unknown' immediately
op.add_column(
'regression_test',
sa.Column(
'baseline_status',
baseline_status_enum,
nullable=False,
server_default='unknown'
)
)

# Historical backfill:
# - if the regression test has ever passed on either tracked platform, it is established
# - otherwise keep the trusted state as unknown until a main-repo commit run refreshes it
op.execute(
"""
UPDATE regression_test
SET baseline_status = 'established'
WHERE last_passed_on_linux IS NOT NULL OR last_passed_on_windows IS NOT NULL
"""
)


def downgrade():
"""Remove baseline_status column from regression_test table."""
op.drop_column('regression_test', 'baseline_status')
64 changes: 55 additions & 9 deletions mod_ci/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
Status)
from mod_customized.models import CustomizedTest
from mod_home.models import CCExtractorVersion, GeneralData
from mod_regression.models import (Category, RegressionTest,
from mod_regression.models import (BaselineStatus, Category, RegressionTest,
RegressionTestOutput)
from mod_sample.models import Issue
from mod_test.controllers import get_test_results
Expand Down Expand Up @@ -2421,6 +2421,8 @@ def progress_type_request(log, test, test_id, request) -> bool:
message = 'Tests aborted due to an error; please check'

elif status == TestStatus.completed:
if test.test_type == TestType.commit and is_main_repo(test.fork.github):
refresh_baseline_statuses_for_test(test)
# Determine if success or failure
# It fails if any of these happen:
# - A crash (unexpected exit code)
Expand Down Expand Up @@ -2707,6 +2709,39 @@ def finish_type_request(log, test_id, test, request):
log.error(f"Could not save the results for test {test_id}")


def refresh_baseline_statuses_for_test(test: Test) -> None:
"""
Persist baseline status for each regression test touched by a completed test run.

This uses the same full-result logic as the UI and PR comment paths, so output-file
mismatches and missing expected outputs count as failures in addition to exit-code
mismatches.

:param test: The completed test run whose regression results should refresh baseline state.
:type test: Test
"""
from run import log

if test.test_type != TestType.commit or not is_main_repo(test.fork.github):
return

changed = False
processed_ids = set()
for category_results in get_test_results(test):
for category_test in category_results['tests']:
regression_test = category_test['test']
if regression_test.id in processed_ids or category_test['result'] is None:
continue

processed_ids.add(regression_test.id)
if regression_test.update_baseline_status(passed=not category_test['error']):
g.db.add(regression_test)
changed = True

if changed and not safe_db_commit(g.db, f"refreshing baseline status for test {test.id}"):
log.error(f"Failed to refresh baseline status for completed test {test.id}")


def set_avg_time(platform, process_type: str, time_taken: int) -> None:
"""
Set average platform preparation time.
Expand Down Expand Up @@ -2756,6 +2791,7 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo:
extra_failed_tests = []
common_failed_tests = []
fixed_tests = []
never_worked_tests = []
category_stats = []

test_results = get_test_results(test)
Expand All @@ -2764,20 +2800,30 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo:
category_name = category_results['category'].name

category_test_pass_count = 0
for test in category_results['tests']:
if not test['error']:
for category_test in category_results['tests']:
platform_last_passed = getattr(category_test['test'], platform_column)
if not category_test['error']:
category_test_pass_count += 1
if last_test_master and getattr(test['test'], platform_column) != last_test_master.id:
fixed_tests.append(test['test'])
if last_test_master and platform_last_passed != last_test_master.id:
fixed_tests.append(category_test['test'])
else:
if last_test_master and getattr(test['test'], platform_column) != last_test_master.id:
common_failed_tests.append(test['test'])
if platform_last_passed is None and category_test['test'].baseline_status != BaselineStatus.unknown:
never_worked_tests.append(category_test['test'])
elif last_test_master and platform_last_passed != last_test_master.id:
common_failed_tests.append(category_test['test'])
else:
extra_failed_tests.append(test['test'])
extra_failed_tests.append(category_test['test'])

category_stats.append(CategoryTestInfo(category_name, len(category_results['tests']), category_test_pass_count))

return PrCommentInfo(category_stats, extra_failed_tests, fixed_tests, common_failed_tests, last_test_master)
return PrCommentInfo(
category_stats,
extra_failed_tests,
fixed_tests,
common_failed_tests,
never_worked_tests,
last_test_master,
)


def comment_pr(test: Test) -> str:
Expand Down
1 change: 1 addition & 0 deletions mod_ci/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,5 @@ class PrCommentInfo:
extra_failed_tests: List[RegressionTest]
fixed_tests: List[RegressionTest]
common_failed_tests: List[RegressionTest]
never_worked_tests: List[RegressionTest]
last_test_master: Test
65 changes: 64 additions & 1 deletion mod_regression/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,24 @@ def __repr__(self) -> str:
return f"<Category {self.name}>"


class BaselineStatus(DeclEnum):
"""Enum to track whether a regression test has ever passed.

This distinguishes true regressions (tests that used to pass but now fail)
from tests that have never produced correct output on any CCExtractor version.

Transitions:
unknown -> established (first test run passes)
unknown -> never_worked (first test run fails)
never_worked -> established (a passing run occurs; test now works)
established stays established (a failure is a regression, not "never worked")
"""

unknown = "unknown", "Unknown"
never_worked = "never_worked", "Never Worked"
established = "established", "Established"


class InputType(DeclEnum):
"""Enumerator types for input."""

Expand Down Expand Up @@ -97,6 +115,7 @@ class RegressionTest(Base):
last_passed_on_windows = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL"))
last_passed_on_linux = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL"))
description = Column(String(length=1024))
baseline_status = Column(BaselineStatus.db_type(), nullable=False, default=BaselineStatus.unknown)

def __init__(self, sample_id, command, input_type, output_type, category_id, expected_rc,
active=True, description="") -> None:
Expand All @@ -117,7 +136,8 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp
:type expected_rc: int
:param active: The value of the 'active' field of RegressionTest model
:type active: bool

:param description: The value of the 'description' field of RegressionTest model
:type description: str
"""
self.sample_id = sample_id
self.command = command
Expand All @@ -127,6 +147,7 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp
self.expected_rc = expected_rc
self.active = active
self.description = description
self.baseline_status = BaselineStatus.unknown

def __repr__(self) -> str:
"""
Expand All @@ -137,6 +158,48 @@ def __repr__(self) -> str:
"""
return f"<RegressionTest {self.id}>"

def update_baseline_status(self, passed: bool) -> bool:
"""
Update baseline_status based on the outcome of a test run.

Called after each completed test run for this regression test.
Returns True if the status changed, False if it stayed the same.

Transition table::

unknown + pass -> established
unknown + fail -> never_worked
never_worked + pass -> established
never_worked + fail -> never_worked (no change)
established + pass -> established (no change)
established + fail -> established (it's a regression, not "never worked")

:param passed: True if exit_code matched expected_rc for this test run.
:type passed: bool
:return: True if the baseline_status changed, False otherwise.
:rtype: bool
"""
previous = self.baseline_status
if passed:
self.baseline_status = BaselineStatus.established
elif self.baseline_status == BaselineStatus.unknown:
self.baseline_status = BaselineStatus.never_worked
return self.baseline_status != previous

@property
def is_regression(self) -> bool:
"""
Return True if a failing result on this test is a true regression.

A result is a regression only when the test is established (has passed before)
but is currently failing. Tests with 'never_worked' or 'unknown' status are
not regressions; they are pre-existing issues.

:return: True if this test can produce a regression result.
:rtype: bool
"""
return self.baseline_status == BaselineStatus.established


class RegressionTestOutput(Base):
"""Model to store output of regression test."""
Expand Down
17 changes: 14 additions & 3 deletions templates/ci/pr_comment.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,22 @@ NOTE: The following tests have been failing on the master branch as well as the
{% endfor %}
</ul>
{% endif %}
{% if comment_info.never_worked_tests | length %}
NOTE: The following tests have never passed on the platform yet:
<ul>
{% for test in comment_info.never_worked_tests %}
<li> ccextractor {{ test.command }} <a href="{{ url_for('sample.sample_by_id', sample_id=test.sample.id, _external=True) }}">{{ test.sample.sha[:10] }}...</a>, Last passed:
<span>Never</span>
</li>
{% endfor %}
</ul>
{% endif %}
{% if comment_info.fixed_tests | length %}
Congratulations: Merging this PR would fix the following tests:
<ul>
{% for test in comment_info.fixed_tests %}
<li> ccextractor {{ test.command }} <a href="{{ url_for('sample.sample_by_id', sample_id=test.sample.id, _external=True) }}">{{ test.sample.sha[:10] }}...</a>, Last passed: {% if test.last_passed_on %}<a href="{{ url_for('test.by_id', test_id=test.last_passed_on, _external=True) }}">Test {{ test.last_passed_on }}</a>{% else %}<span>Never</span>{% endif %}</li>
{% set last_passed_id = test.last_passed_on_windows if platform.lower() == 'windows' else test.last_passed_on_linux %}
<li> ccextractor {{ test.command }} <a href="{{ url_for('sample.sample_by_id', sample_id=test.sample.id, _external=True) }}">{{ test.sample.sha[:10] }}...</a>, Last passed: {% if last_passed_id %}<a href="{{ url_for('test.by_id', test_id=last_passed_id, _external=True) }}">Test {{ last_passed_id }}</a>{% else %}<span>Never</span>{% endif %}</li>
{% endfor %}
</ul>
{% endif %}
Expand All @@ -54,8 +65,8 @@ Congratulations: Merging this PR would fix the following tests:

{% if comment_info.extra_failed_tests | length %}
It seems that not all tests were passed completely. This is an indication that the output of some files is not as expected (but might be according to you).
{% elif comment_info.common_failed_tests | length %}
This PR does not introduce any new test failures. However, some tests are failing on both master and this PR (see above).
{% elif comment_info.common_failed_tests | length or comment_info.never_worked_tests | length %}
This PR does not introduce any new test failures. However, some tests are already failing on master or have never worked on the platform yet (see above).
{% else %}
All tests passed completely.
{% endif %}
Expand Down
Loading