From 6e8ca9709ac231072caa4a257fb29497e137af25 Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Mon, 2 Mar 2026 17:43:05 +0000 Subject: [PATCH 01/11] Add transcript filtering config and fix output config deserialization --- sdk/batch/speechmatics/batch/_models.py | 22 ++++++-- tests/batch/test_models.py | 69 +++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 tests/batch/test_models.py diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 84970b11..994837f3 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -97,6 +97,8 @@ class TranscriptionConfig: enable_partials: Enable partial transcript results. max_delay: Maximum delay for transcript delivery. max_delay_mode: Mode for handling max delay. + transcript_filtering_config: If True, words identified as disfluencies + are removed from the transcript. """ language: str = "en" @@ -112,11 +114,14 @@ class TranscriptionConfig: enable_partials: Optional[bool] = None max_delay: Optional[float] = None max_delay_mode: Optional[str] = None - + transcript_filtering_config: Optional [bool] = None + def to_dict(self) -> dict[str, Any]: """Convert to dictionary, excluding None values.""" - return {k: v for k, v in asdict(self).items() if v is not None} - + result = asdict(self, dict_factory=lambda x: {k: v for (k, v) in x if v is not None}) + if "transcript_filtering_config" in result: + result["transcript_filtering_config"] = {"remove_disfluencies": result["transcript_filtering_config"]} + return result @dataclass class OutputConfig: @@ -347,7 +352,10 @@ def from_dict(cls, data: dict[str, Any]) -> JobConfig: transcription_config = None if "transcription_config" in data: - tc_data = data["transcription_config"] + tc_data = data["transcription_config"].copy() + if "transcript_filtering_config" in tc_data: + tfc = tc_data["transcript_filtering_config"] + tc_data["transcript_filtering_config"] = tfc.get("remove_disfluencies") if isinstance(tfc, dict) else tfc transcription_config = TranscriptionConfig(**tc_data) alignment_config = None @@ -405,6 +413,11 @@ def from_dict(cls, data: dict[str, Any]) -> JobConfig: fd_data = data["fetch_data"] fetch_data = FetchData(**fd_data) + output_config = None + if "output_config" in data: + oc_data = data["output_config"] + output_config = OutputConfig(**oc_data) + return cls( type=job_type, fetch_data=fetch_data, @@ -419,6 +432,7 @@ def from_dict(cls, data: dict[str, Any]) -> JobConfig: topic_detection_config=topic_detection_config, auto_chapters_config=auto_chapters_config, audio_events_config=audio_events_config, + output_config=output_config, ) diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py new file mode 100644 index 00000000..73261efc --- /dev/null +++ b/tests/batch/test_models.py @@ -0,0 +1,69 @@ +from speechmatics.batch._models import JobConfig, TranscriptionConfig + + +class TestTranscriptFilteringConfigToDict: + def test_true_serializes_to_remove_disfluencies_dict(self): + config = TranscriptionConfig(transcript_filtering_config=True) + result = config.to_dict() + assert result["transcript_filtering_config"] == {"remove_disfluencies": True} + + def test_false_serializes_to_remove_disfluencies_dict(self): + config = TranscriptionConfig(transcript_filtering_config=False) + result = config.to_dict() + assert result["transcript_filtering_config"] == {"remove_disfluencies": False} + + def test_none_excluded_from_output(self): + config = TranscriptionConfig() + result = config.to_dict() + assert "transcript_filtering_config" not in result + + +class TestTranscriptFilteringConfigFromDict: + def test_dict_form_deserializes_to_bool(self): + data = { + "type": "transcription", + "transcription_config": { + "language": "en", + "transcript_filtering_config": {"remove_disfluencies": True}, + }, + } + job_config = JobConfig.from_dict(data) + assert job_config.transcription_config is not None + assert job_config.transcription_config.transcript_filtering_config is True + + def test_bool_form_passes_through(self): + data = { + "type": "transcription", + "transcription_config": { + "language": "en", + "transcript_filtering_config": True, + }, + } + job_config = JobConfig.from_dict(data) + assert job_config.transcription_config is not None + assert job_config.transcription_config.transcript_filtering_config is True + + def test_absent_field_is_none(self): + data = { + "type": "transcription", + "transcription_config": {"language": "en"}, + } + job_config = JobConfig.from_dict(data) + assert job_config.transcription_config is not None + assert job_config.transcription_config.transcript_filtering_config is None + + +class TestOutputConfigFromDict: + def test_output_config_deserialized(self): + data = { + "type": "transcription", + "output_config": {"generate_lattice": True}, + } + job_config = JobConfig.from_dict(data) + assert job_config.output_config is not None + assert job_config.output_config.generate_lattice is True + + def test_absent_output_config_is_none(self): + data = {"type": "transcription"} + job_config = JobConfig.from_dict(data) + assert job_config.output_config is None From 2f9653018e2fffeefb76fcceb2fad68d760e6fdb Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Mon, 2 Mar 2026 18:25:28 +0000 Subject: [PATCH 02/11] Fix formatting and code style in batch _models.py --- sdk/batch/speechmatics/batch/_models.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 994837f3..158bc355 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -114,8 +114,8 @@ class TranscriptionConfig: enable_partials: Optional[bool] = None max_delay: Optional[float] = None max_delay_mode: Optional[str] = None - transcript_filtering_config: Optional [bool] = None - + transcript_filtering_config: Optional[bool] = None + def to_dict(self) -> dict[str, Any]: """Convert to dictionary, excluding None values.""" result = asdict(self, dict_factory=lambda x: {k: v for (k, v) in x if v is not None}) @@ -123,6 +123,7 @@ def to_dict(self) -> dict[str, Any]: result["transcript_filtering_config"] = {"remove_disfluencies": result["transcript_filtering_config"]} return result + @dataclass class OutputConfig: """Configuration for output formatting.""" @@ -355,7 +356,9 @@ def from_dict(cls, data: dict[str, Any]) -> JobConfig: tc_data = data["transcription_config"].copy() if "transcript_filtering_config" in tc_data: tfc = tc_data["transcript_filtering_config"] - tc_data["transcript_filtering_config"] = tfc.get("remove_disfluencies") if isinstance(tfc, dict) else tfc + tc_data["transcript_filtering_config"] = ( + tfc.get("remove_disfluencies") if isinstance(tfc, dict) else tfc + ) transcription_config = TranscriptionConfig(**tc_data) alignment_config = None From 41d0310e47c00fcf268f787e013f496af62b5e43 Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 3 Mar 2026 11:22:15 +0000 Subject: [PATCH 03/11] Refactor transcript filtering config to use own dataclass --- sdk/batch/speechmatics/batch/_models.py | 34 ++++++++++++++++--------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 158bc355..2b20ed66 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -97,10 +97,14 @@ class TranscriptionConfig: enable_partials: Enable partial transcript results. max_delay: Maximum delay for transcript delivery. max_delay_mode: Mode for handling max delay. - transcript_filtering_config: If True, words identified as disfluencies - are removed from the transcript. + transcript_filtering_config: Configuration for filtering transcription. + defaults to None. """ + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary, excluding None values.""" + return {k: v for k, v in asdict(self).items() if v is not None} + language: str = "en" operating_point: OperatingPoint = OperatingPoint.ENHANCED output_locale: Optional[str] = None @@ -114,16 +118,8 @@ class TranscriptionConfig: enable_partials: Optional[bool] = None max_delay: Optional[float] = None max_delay_mode: Optional[str] = None - transcript_filtering_config: Optional[bool] = None - - def to_dict(self) -> dict[str, Any]: - """Convert to dictionary, excluding None values.""" - result = asdict(self, dict_factory=lambda x: {k: v for (k, v) in x if v is not None}) - if "transcript_filtering_config" in result: - result["transcript_filtering_config"] = {"remove_disfluencies": result["transcript_filtering_config"]} - return result - - + transcript_filtering_config: Optional[object] = None + @dataclass class OutputConfig: """Configuration for output formatting.""" @@ -272,6 +268,17 @@ def to_dict(self) -> dict[str, Any]: """Convert to dictionary, excluding None values.""" return {k: v for k, v in asdict(self).items() if v is not None} +@dataclass +class TranscriptFilteringConfig: + """Configuration for transcript filtering.""" + + remove_disfluencies: Optional[bool] = None + replacements: Optional[list[dict[str, str]]] = None + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary, excluding None values.""" + return {k: v for k, v in asdict(self).items() if v is not None} + @dataclass class JobConfig: @@ -312,6 +319,7 @@ class JobConfig: auto_chapters_config: Optional[AutoChaptersConfig] = None audio_events_config: Optional[AudioEventsConfig] = None output_config: Optional[OutputConfig] = None + transcript_filtering_config: Optional[TranscriptFilteringConfig] = None def to_dict(self) -> dict[str, Any]: """Convert job config to dictionary for API submission.""" @@ -343,6 +351,8 @@ def to_dict(self) -> dict[str, Any]: config["audio_events_config"] = self.audio_events_config.to_dict() if self.output_config: config["output_config"] = self.output_config.to_dict() + if self.transcript_filtering_config: + config["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() return config From f5f05453b839efef31ab2d15c01e6320dcdf3d23 Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 3 Mar 2026 13:32:49 +0000 Subject: [PATCH 04/11] Fix transcript filtering config serialization and deserialization --- sdk/batch/speechmatics/batch/_models.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 2b20ed66..e37a28e7 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -103,7 +103,10 @@ class TranscriptionConfig: def to_dict(self) -> dict[str, Any]: """Convert to dictionary, excluding None values.""" - return {k: v for k, v in asdict(self).items() if v is not None} + result = {k: v for k, v in asdict(self).items() if v is not None} + if isinstance(self.transcript_filtering_config, TranscriptFilteringConfig): + result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() + return result language: str = "en" operating_point: OperatingPoint = OperatingPoint.ENHANCED @@ -366,9 +369,10 @@ def from_dict(cls, data: dict[str, Any]) -> JobConfig: tc_data = data["transcription_config"].copy() if "transcript_filtering_config" in tc_data: tfc = tc_data["transcript_filtering_config"] - tc_data["transcript_filtering_config"] = ( - tfc.get("remove_disfluencies") if isinstance(tfc, dict) else tfc - ) + if isinstance(tfc, dict): + tc_data["transcript_filtering_config"] = TranscriptFilteringConfig(**tfc) + elif isinstance(tfc, bool): + tc_data["transcript_filtering_config"] = TranscriptFilteringConfig(remove_disfluencies=tfc) transcription_config = TranscriptionConfig(**tc_data) alignment_config = None From c526cc9b8bc89c419c9a2858f0dbf24afd5cf65c Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 3 Mar 2026 13:43:47 +0000 Subject: [PATCH 05/11] Update TranscriptFilteringConfig dataclass --- sdk/batch/speechmatics/batch/_models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index e37a28e7..898f2990 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -106,6 +106,8 @@ def to_dict(self) -> dict[str, Any]: result = {k: v for k, v in asdict(self).items() if v is not None} if isinstance(self.transcript_filtering_config, TranscriptFilteringConfig): result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() + elif isinstance(self.transcript_filtering_config, bool): + result["transcript_filtering_config"] = {"remove_disfluencies": self.transcript_filtering_config} return result language: str = "en" @@ -275,7 +277,7 @@ def to_dict(self) -> dict[str, Any]: class TranscriptFilteringConfig: """Configuration for transcript filtering.""" - remove_disfluencies: Optional[bool] = None + remove_disfluencies: Optional[object] = None replacements: Optional[list[dict[str, str]]] = None def to_dict(self) -> dict[str, Any]: From 2a9066e640de800251314021ee2121fede50959e Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 3 Mar 2026 13:43:56 +0000 Subject: [PATCH 06/11] Add tests for transcript filtering config replacements field --- tests/batch/test_models.py | 77 +++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py index 73261efc..b59b2f0f 100644 --- a/tests/batch/test_models.py +++ b/tests/batch/test_models.py @@ -1,4 +1,4 @@ -from speechmatics.batch._models import JobConfig, TranscriptionConfig +from speechmatics.batch._models import JobConfig, TranscriptFilteringConfig, TranscriptionConfig class TestTranscriptFilteringConfigToDict: @@ -17,9 +17,37 @@ def test_none_excluded_from_output(self): result = config.to_dict() assert "transcript_filtering_config" not in result + def test_replacements_serialized(self): + replacements = [{"from": "um", "to": ""}, {"from": "uh", "to": ""}] + config = TranscriptionConfig( + transcript_filtering_config=TranscriptFilteringConfig(replacements=replacements) + ) + result = config.to_dict() + assert result["transcript_filtering_config"] == {"replacements": replacements} + + def test_replacements_absent_when_none(self): + config = TranscriptionConfig( + transcript_filtering_config=TranscriptFilteringConfig(remove_disfluencies=True) + ) + result = config.to_dict() + assert "replacements" not in result["transcript_filtering_config"] + + def test_replacements_and_remove_disfluencies_together(self): + replacements = [{"from": "gonna", "to": "going to"}] + config = TranscriptionConfig( + transcript_filtering_config=TranscriptFilteringConfig( + remove_disfluencies=True, replacements=replacements + ) + ) + result = config.to_dict() + assert result["transcript_filtering_config"] == { + "remove_disfluencies": True, + "replacements": replacements, + } + class TestTranscriptFilteringConfigFromDict: - def test_dict_form_deserializes_to_bool(self): + def test_dict_form_deserializes_to_config_object(self): data = { "type": "transcription", "transcription_config": { @@ -29,9 +57,11 @@ def test_dict_form_deserializes_to_bool(self): } job_config = JobConfig.from_dict(data) assert job_config.transcription_config is not None - assert job_config.transcription_config.transcript_filtering_config is True + tfc = job_config.transcription_config.transcript_filtering_config + assert isinstance(tfc, TranscriptFilteringConfig) + assert tfc.remove_disfluencies is True - def test_bool_form_passes_through(self): + def test_bool_form_normalizes_to_config_object(self): data = { "type": "transcription", "transcription_config": { @@ -41,7 +71,9 @@ def test_bool_form_passes_through(self): } job_config = JobConfig.from_dict(data) assert job_config.transcription_config is not None - assert job_config.transcription_config.transcript_filtering_config is True + tfc = job_config.transcription_config.transcript_filtering_config + assert isinstance(tfc, TranscriptFilteringConfig) + assert tfc.remove_disfluencies is True def test_absent_field_is_none(self): data = { @@ -52,6 +84,41 @@ def test_absent_field_is_none(self): assert job_config.transcription_config is not None assert job_config.transcription_config.transcript_filtering_config is None + def test_dict_with_replacements_deserializes(self): + replacements = [{"from": "um", "to": ""}, {"from": "uh", "to": ""}] + data = { + "type": "transcription", + "transcription_config": { + "language": "en", + "transcript_filtering_config": {"replacements": replacements}, + }, + } + job_config = JobConfig.from_dict(data) + assert job_config.transcription_config is not None + tfc = job_config.transcription_config.transcript_filtering_config + assert isinstance(tfc, TranscriptFilteringConfig) + assert tfc.replacements == replacements + assert tfc.remove_disfluencies is None + + def test_dict_with_replacements_and_remove_disfluencies_deserializes(self): + replacements = [{"from": "gonna", "to": "going to"}] + data = { + "type": "transcription", + "transcription_config": { + "language": "en", + "transcript_filtering_config": { + "remove_disfluencies": True, + "replacements": replacements, + }, + }, + } + job_config = JobConfig.from_dict(data) + assert job_config.transcription_config is not None + tfc = job_config.transcription_config.transcript_filtering_config + assert isinstance(tfc, TranscriptFilteringConfig) + assert tfc.remove_disfluencies is True + assert tfc.replacements == replacements + class TestOutputConfigFromDict: def test_output_config_deserialized(self): From 4db1e6852940c5039683801a2afd4b265768a794 Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 3 Mar 2026 14:02:44 +0000 Subject: [PATCH 07/11] Linting --- sdk/batch/speechmatics/batch/_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 898f2990..d34a4e43 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -124,7 +124,7 @@ def to_dict(self) -> dict[str, Any]: max_delay: Optional[float] = None max_delay_mode: Optional[str] = None transcript_filtering_config: Optional[object] = None - + @dataclass class OutputConfig: """Configuration for output formatting.""" @@ -279,11 +279,11 @@ class TranscriptFilteringConfig: remove_disfluencies: Optional[object] = None replacements: Optional[list[dict[str, str]]] = None - + def to_dict(self) -> dict[str, Any]: """Convert to dictionary, excluding None values.""" return {k: v for k, v in asdict(self).items() if v is not None} - + @dataclass class JobConfig: From 27d3cba625ddb16f605a12718865490dc486293f Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 3 Mar 2026 14:38:26 +0000 Subject: [PATCH 08/11] Fix type annotations in TranscriptFilteringConfig and update datatype to object, --- sdk/batch/speechmatics/batch/_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index d34a4e43..93c10e5d 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -106,7 +106,7 @@ def to_dict(self) -> dict[str, Any]: result = {k: v for k, v in asdict(self).items() if v is not None} if isinstance(self.transcript_filtering_config, TranscriptFilteringConfig): result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() - elif isinstance(self.transcript_filtering_config, bool): + elif isinstance(self.transcript_filtering_config, object): result["transcript_filtering_config"] = {"remove_disfluencies": self.transcript_filtering_config} return result @@ -277,7 +277,7 @@ def to_dict(self) -> dict[str, Any]: class TranscriptFilteringConfig: """Configuration for transcript filtering.""" - remove_disfluencies: Optional[object] = None + remove_disfluencies: Optional[bool] = None replacements: Optional[list[dict[str, str]]] = None def to_dict(self) -> dict[str, Any]: From 80e0e3cde84de91cd3e02303223b0a4f8f098fce Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 3 Mar 2026 14:59:54 +0000 Subject: [PATCH 09/11] Remove redundant isinstance check in TranscriptionConfig serialization --- sdk/batch/speechmatics/batch/_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 93c10e5d..cfdb4e2b 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -106,7 +106,7 @@ def to_dict(self) -> dict[str, Any]: result = {k: v for k, v in asdict(self).items() if v is not None} if isinstance(self.transcript_filtering_config, TranscriptFilteringConfig): result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() - elif isinstance(self.transcript_filtering_config, object): + elif self.transcript_filtering_config is not None and isinstance(self.transcript_filtering_config, object): result["transcript_filtering_config"] = {"remove_disfluencies": self.transcript_filtering_config} return result From c2c84c17c78ef00bf2aa012b89ac2aab1493e86e Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Wed, 4 Mar 2026 13:29:49 +0000 Subject: [PATCH 10/11] Simplify transcript filtering config handling and move field to TranscriptionConfig --- sdk/batch/speechmatics/batch/_models.py | 34 ++++++++----------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index cfdb4e2b..8b235969 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -100,16 +100,6 @@ class TranscriptionConfig: transcript_filtering_config: Configuration for filtering transcription. defaults to None. """ - - def to_dict(self) -> dict[str, Any]: - """Convert to dictionary, excluding None values.""" - result = {k: v for k, v in asdict(self).items() if v is not None} - if isinstance(self.transcript_filtering_config, TranscriptFilteringConfig): - result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() - elif self.transcript_filtering_config is not None and isinstance(self.transcript_filtering_config, object): - result["transcript_filtering_config"] = {"remove_disfluencies": self.transcript_filtering_config} - return result - language: str = "en" operating_point: OperatingPoint = OperatingPoint.ENHANCED output_locale: Optional[str] = None @@ -123,7 +113,13 @@ def to_dict(self) -> dict[str, Any]: enable_partials: Optional[bool] = None max_delay: Optional[float] = None max_delay_mode: Optional[str] = None - transcript_filtering_config: Optional[object] = None + transcript_filtering_config: Optional[TranscriptFilteringConfig] = None + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None} + if self.transcript_filtering_config is not None: + result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() + return result @dataclass class OutputConfig: @@ -136,7 +132,6 @@ def to_dict(self) -> dict[str, Any]: """Convert to dictionary, excluding None values.""" return {k: v for k, v in asdict(self).items() if v is not None} - @dataclass class AlignmentConfig: """Configuration for alignment jobs.""" @@ -277,14 +272,13 @@ def to_dict(self) -> dict[str, Any]: class TranscriptFilteringConfig: """Configuration for transcript filtering.""" - remove_disfluencies: Optional[bool] = None + remove_disfluencies: bool = False replacements: Optional[list[dict[str, str]]] = None def to_dict(self) -> dict[str, Any]: """Convert to dictionary, excluding None values.""" return {k: v for k, v in asdict(self).items() if v is not None} - @dataclass class JobConfig: """ @@ -324,7 +318,6 @@ class JobConfig: auto_chapters_config: Optional[AutoChaptersConfig] = None audio_events_config: Optional[AudioEventsConfig] = None output_config: Optional[OutputConfig] = None - transcript_filtering_config: Optional[TranscriptFilteringConfig] = None def to_dict(self) -> dict[str, Any]: """Convert job config to dictionary for API submission.""" @@ -356,9 +349,6 @@ def to_dict(self) -> dict[str, Any]: config["audio_events_config"] = self.audio_events_config.to_dict() if self.output_config: config["output_config"] = self.output_config.to_dict() - if self.transcript_filtering_config: - config["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() - return config @classmethod @@ -369,12 +359,8 @@ def from_dict(cls, data: dict[str, Any]) -> JobConfig: transcription_config = None if "transcription_config" in data: tc_data = data["transcription_config"].copy() - if "transcript_filtering_config" in tc_data: - tfc = tc_data["transcript_filtering_config"] - if isinstance(tfc, dict): - tc_data["transcript_filtering_config"] = TranscriptFilteringConfig(**tfc) - elif isinstance(tfc, bool): - tc_data["transcript_filtering_config"] = TranscriptFilteringConfig(remove_disfluencies=tfc) + if "transcript_filtering_config" in tc_data and isinstance(tc_data["transcript_filtering_config"], dict): + tc_data["transcript_filtering_config"] = TranscriptFilteringConfig(**tc_data["transcript_filtering_config"]) transcription_config = TranscriptionConfig(**tc_data) alignment_config = None From 0c4236e43141573624960e5972b97392392366b4 Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Wed, 4 Mar 2026 13:30:04 +0000 Subject: [PATCH 11/11] Update transcript filtering config tests to reflect dataclass changes and remove boolean support --- tests/batch/test_models.py | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py index b59b2f0f..d262685e 100644 --- a/tests/batch/test_models.py +++ b/tests/batch/test_models.py @@ -2,13 +2,17 @@ class TestTranscriptFilteringConfigToDict: - def test_true_serializes_to_remove_disfluencies_dict(self): - config = TranscriptionConfig(transcript_filtering_config=True) + def test_remove_disfluencies_true_serializes_correctly(self): + config = TranscriptionConfig( + transcript_filtering_config=TranscriptFilteringConfig(remove_disfluencies=True) + ) result = config.to_dict() assert result["transcript_filtering_config"] == {"remove_disfluencies": True} - def test_false_serializes_to_remove_disfluencies_dict(self): - config = TranscriptionConfig(transcript_filtering_config=False) + def test_remove_disfluencies_false_included_in_output(self): + config = TranscriptionConfig( + transcript_filtering_config=TranscriptFilteringConfig(remove_disfluencies=False) + ) result = config.to_dict() assert result["transcript_filtering_config"] == {"remove_disfluencies": False} @@ -23,7 +27,10 @@ def test_replacements_serialized(self): transcript_filtering_config=TranscriptFilteringConfig(replacements=replacements) ) result = config.to_dict() - assert result["transcript_filtering_config"] == {"replacements": replacements} + assert result["transcript_filtering_config"] == { + "remove_disfluencies": False, + "replacements": replacements, + } def test_replacements_absent_when_none(self): config = TranscriptionConfig( @@ -61,20 +68,6 @@ def test_dict_form_deserializes_to_config_object(self): assert isinstance(tfc, TranscriptFilteringConfig) assert tfc.remove_disfluencies is True - def test_bool_form_normalizes_to_config_object(self): - data = { - "type": "transcription", - "transcription_config": { - "language": "en", - "transcript_filtering_config": True, - }, - } - job_config = JobConfig.from_dict(data) - assert job_config.transcription_config is not None - tfc = job_config.transcription_config.transcript_filtering_config - assert isinstance(tfc, TranscriptFilteringConfig) - assert tfc.remove_disfluencies is True - def test_absent_field_is_none(self): data = { "type": "transcription", @@ -98,7 +91,7 @@ def test_dict_with_replacements_deserializes(self): tfc = job_config.transcription_config.transcript_filtering_config assert isinstance(tfc, TranscriptFilteringConfig) assert tfc.replacements == replacements - assert tfc.remove_disfluencies is None + assert tfc.remove_disfluencies is False def test_dict_with_replacements_and_remove_disfluencies_deserializes(self): replacements = [{"from": "gonna", "to": "going to"}]