diff --git a/funasr/models/fsmn_vad_streaming/model.py b/funasr/models/fsmn_vad_streaming/model.py index f62e36a2c..7dfb96a05 100644 --- a/funasr/models/fsmn_vad_streaming/model.py +++ b/funasr/models/fsmn_vad_streaming/model.py @@ -724,7 +724,7 @@ def inference( if len(segments_i) > 0: segments.extend(*segments_i) - cache["prev_samples"] = audio_sample[:-m] + cache["prev_samples"] = audio_sample[-m:] if m > 0 else torch.empty(0) if _is_final: self.init_cache(cache) diff --git a/funasr/models/paraformer_streaming/model.py b/funasr/models/paraformer_streaming/model.py index 16021ceb6..cbf666992 100644 --- a/funasr/models/paraformer_streaming/model.py +++ b/funasr/models/paraformer_streaming/model.py @@ -642,7 +642,7 @@ def inference( result_i = {"key": key[0], "text": text_postprocessed} result = [result_i] - cache["prev_samples"] = audio_sample[:-m] + cache["prev_samples"] = audio_sample[-m:] if m > 0 else torch.empty(0) if _is_final: self.init_cache(cache, **kwargs) diff --git a/funasr/models/scama/model.py b/funasr/models/scama/model.py index c15f435a4..22a48bccc 100644 --- a/funasr/models/scama/model.py +++ b/funasr/models/scama/model.py @@ -637,7 +637,7 @@ def init_cache(self, cache: dict = {}, **kwargs): cache["decoder"] = cache_decoder cache["frontend"] = {} - cache["prev_samples"] = torch.empty(0).to(device=device) + cache["prev_samples"] = torch.empty(0) return cache @@ -726,7 +726,7 @@ def inference( result_i = {"key": key[0], "text": text_postprocessed} result = [result_i] - cache["prev_samples"] = audio_sample[:-m] + cache["prev_samples"] = audio_sample[-m:] if m > 0 else torch.empty(0) if _is_final: self.init_cache(cache, **kwargs)