From c761eb2655417f36c30e857de3254955f8bd37bf Mon Sep 17 00:00:00 2001 From: Adnan Qureshi Date: Sat, 10 Jan 2026 16:00:04 +0530 Subject: [PATCH 1/4] Add low-quality experience filter operator --- trinity/buffer/operators/__init__.py | 2 ++ .../operators/filters/low_quality_filter.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 trinity/buffer/operators/filters/low_quality_filter.py diff --git a/trinity/buffer/operators/__init__.py b/trinity/buffer/operators/__init__.py index 258d4d76b5..98cf1e4c5b 100644 --- a/trinity/buffer/operators/__init__.py +++ b/trinity/buffer/operators/__init__.py @@ -9,6 +9,8 @@ "reward_shaping_mapper": "trinity.buffer.operators.mappers.reward_shaping_mapper.RewardShapingMapper", "pass_rate_calculator": "trinity.buffer.operators.mappers.pass_rate_calculator.PassRateCalculator", "data_juicer": "trinity.buffer.operators.data_juicer_operator.DataJuicerOperator", + "low_quality_experience_filter": "trinity.buffer.operators.filters.low_quality_filter.LowQualityExperienceFilter", + }, ) diff --git a/trinity/buffer/operators/filters/low_quality_filter.py b/trinity/buffer/operators/filters/low_quality_filter.py new file mode 100644 index 0000000000..b9e55806ba --- /dev/null +++ b/trinity/buffer/operators/filters/low_quality_filter.py @@ -0,0 +1,19 @@ +from typing import List, Tuple +import math + +from trinity.buffer.operators.experience_operator import ExperienceOperator +from trinity.common.experience import Experience + + +class LowQualityExperienceFilter(ExperienceOperator): + def process(self, exps: List[Experience]) -> Tuple[List[Experience], dict]: + kept = [] + for e in exps: + r = e.reward + if r is None: + continue + if isinstance(r, float) and math.isnan(r): + continue + kept.append(e) + + return kept, {"filtered_count": len(exps) - len(kept)} From 77d04d6b7a264a382e5513dce64e1d5be75d90f6 Mon Sep 17 00:00:00 2001 From: Adnan Qureshi <160136063+AdnanQureshi3@users.noreply.github.com> Date: Sat, 10 Jan 2026 16:13:21 +0530 Subject: [PATCH 2/4] Update trinity/buffer/operators/filters/low_quality_filter.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- trinity/buffer/operators/filters/low_quality_filter.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/trinity/buffer/operators/filters/low_quality_filter.py b/trinity/buffer/operators/filters/low_quality_filter.py index b9e55806ba..cd712edb4c 100644 --- a/trinity/buffer/operators/filters/low_quality_filter.py +++ b/trinity/buffer/operators/filters/low_quality_filter.py @@ -7,13 +7,6 @@ class LowQualityExperienceFilter(ExperienceOperator): def process(self, exps: List[Experience]) -> Tuple[List[Experience], dict]: - kept = [] - for e in exps: - r = e.reward - if r is None: - continue - if isinstance(r, float) and math.isnan(r): - continue - kept.append(e) + kept = [e for e in exps if e.reward is not None and e.reward == e.reward] return kept, {"filtered_count": len(exps) - len(kept)} From 7601b75c6eb56f68afff40e75e1255e46a72d96c Mon Sep 17 00:00:00 2001 From: Adnan Qureshi Date: Sat, 10 Jan 2026 22:22:20 +0530 Subject: [PATCH 3/4] Rename to InvalidRewardFilter and add comments --- trinity/buffer/operators/__init__.py | 2 +- .../operators/filters/low_quality_filter.py | 12 ------------ .../buffer/operators/filters/reward_filter.py | 16 ++++++++++++++++ 3 files changed, 17 insertions(+), 13 deletions(-) delete mode 100644 trinity/buffer/operators/filters/low_quality_filter.py diff --git a/trinity/buffer/operators/__init__.py b/trinity/buffer/operators/__init__.py index 98cf1e4c5b..47ed7b2dc7 100644 --- a/trinity/buffer/operators/__init__.py +++ b/trinity/buffer/operators/__init__.py @@ -9,7 +9,7 @@ "reward_shaping_mapper": "trinity.buffer.operators.mappers.reward_shaping_mapper.RewardShapingMapper", "pass_rate_calculator": "trinity.buffer.operators.mappers.pass_rate_calculator.PassRateCalculator", "data_juicer": "trinity.buffer.operators.data_juicer_operator.DataJuicerOperator", - "low_quality_experience_filter": "trinity.buffer.operators.filters.low_quality_filter.LowQualityExperienceFilter", + "invalid_reward_filter": "trinity.buffer.operators.filters.reward_filter.InvalidRewardFilter", }, ) diff --git a/trinity/buffer/operators/filters/low_quality_filter.py b/trinity/buffer/operators/filters/low_quality_filter.py deleted file mode 100644 index cd712edb4c..0000000000 --- a/trinity/buffer/operators/filters/low_quality_filter.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import List, Tuple -import math - -from trinity.buffer.operators.experience_operator import ExperienceOperator -from trinity.common.experience import Experience - - -class LowQualityExperienceFilter(ExperienceOperator): - def process(self, exps: List[Experience]) -> Tuple[List[Experience], dict]: - kept = [e for e in exps if e.reward is not None and e.reward == e.reward] - - return kept, {"filtered_count": len(exps) - len(kept)} diff --git a/trinity/buffer/operators/filters/reward_filter.py b/trinity/buffer/operators/filters/reward_filter.py index dc5bd92e7e..66c40181cd 100644 --- a/trinity/buffer/operators/filters/reward_filter.py +++ b/trinity/buffer/operators/filters/reward_filter.py @@ -50,3 +50,19 @@ def process(self, exps: List[Experience]) -> Tuple[List[Experience], dict]: final_count = len(result_exps) metrics["filtered_count"] = original_count - final_count return result_exps, metrics + + +class InvalidRewardFilter(ExperienceOperator): + """ + Filters out experiences with invalid reward values. + + Note: This operator assumes that rewards are already computed and stored in the + Experience object.Any experience with a missing (`None`) or invalid (`NaN`) + reward is removed to prevent low-quality data from entering the training + pipeline. + """ + def process(self, exps: List[Experience]) -> Tuple[List[Experience], dict]: + kept = [e for e in exps if e.reward is not None and e.reward == e.reward] + + return kept, {"filtered_count": len(exps) - len(kept)} + From a4906da8252a0e38c9fb98c5e3e8820e6156a97a Mon Sep 17 00:00:00 2001 From: Adnan Qureshi Date: Sun, 11 Jan 2026 16:21:55 +0530 Subject: [PATCH 4/4] fix: apply pre-commit formatting fixes --- trinity/buffer/operators/__init__.py | 1 - trinity/buffer/operators/filters/reward_filter.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/trinity/buffer/operators/__init__.py b/trinity/buffer/operators/__init__.py index 47ed7b2dc7..e83b7d05ee 100644 --- a/trinity/buffer/operators/__init__.py +++ b/trinity/buffer/operators/__init__.py @@ -10,7 +10,6 @@ "pass_rate_calculator": "trinity.buffer.operators.mappers.pass_rate_calculator.PassRateCalculator", "data_juicer": "trinity.buffer.operators.data_juicer_operator.DataJuicerOperator", "invalid_reward_filter": "trinity.buffer.operators.filters.reward_filter.InvalidRewardFilter", - }, ) diff --git a/trinity/buffer/operators/filters/reward_filter.py b/trinity/buffer/operators/filters/reward_filter.py index 66c40181cd..07126db12f 100644 --- a/trinity/buffer/operators/filters/reward_filter.py +++ b/trinity/buffer/operators/filters/reward_filter.py @@ -50,7 +50,7 @@ def process(self, exps: List[Experience]) -> Tuple[List[Experience], dict]: final_count = len(result_exps) metrics["filtered_count"] = original_count - final_count return result_exps, metrics - + class InvalidRewardFilter(ExperienceOperator): """ @@ -61,8 +61,8 @@ class InvalidRewardFilter(ExperienceOperator): reward is removed to prevent low-quality data from entering the training pipeline. """ + def process(self, exps: List[Experience]) -> Tuple[List[Experience], dict]: kept = [e for e in exps if e.reward is not None and e.reward == e.reward] return kept, {"filtered_count": len(exps) - len(kept)} -