From e71519d6981fe8263f3ebfd6fccf3b0adf15e4c0 Mon Sep 17 00:00:00 2001 From: pengkun Date: Wed, 20 Nov 2024 14:09:15 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E4=BF=AE=E6=94=B9FunASR=E5=AE=9E=E6=97=B6?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E6=A1=86=E6=9E=B6=EF=BC=8C=E5=AE=9E=E6=97=B6?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E6=97=B62pass=E6=A8=A1=E5=BC=8F=E4=B8=8B?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=A1=86=E6=9E=B6=E5=B1=82=E9=9D=A2=E8=BF=94?= =?UTF-8?q?=E5=9B=9E=E5=8F=A5=E5=AD=90=E7=BA=A7=E5=88=AB=E7=9A=84=E6=97=B6?= =?UTF-8?q?=E9=97=B4=E6=88=B3=EF=BC=8C=E5=8D=95=E4=BD=8D=E6=AF=AB=E7=A7=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- runtime/onnxruntime/include/audio.h | 2 + runtime/onnxruntime/include/funasrruntime.h | 2 + runtime/onnxruntime/src/audio.cpp | 40 ++++++++++++++++++ runtime/onnxruntime/src/commonfunc.h | 2 + runtime/onnxruntime/src/funasrruntime.cpp | 20 +++++++++ .../websocket/bin/websocket-server-2pass.cpp | 42 +++++++++++++++++-- .../websocket/bin/websocket-server-2pass.h | 10 ++++- 7 files changed, 112 insertions(+), 6 deletions(-) diff --git a/runtime/onnxruntime/include/audio.h b/runtime/onnxruntime/include/audio.h index 3011050ed..b14d3b85d 100644 --- a/runtime/onnxruntime/include/audio.h +++ b/runtime/onnxruntime/include/audio.h @@ -100,6 +100,8 @@ class DLLAPI Audio { int offset = 0; int speech_start=-1, speech_end=0; int speech_offline_start=-1; + int64_t start = 0; + int64_t end = 0; int seg_sample = MODEL_SAMPLE_RATE/1000; bool LoadPcmwavOnline(const char* buf, int n_file_len, int32_t* sampling_rate); diff --git a/runtime/onnxruntime/include/funasrruntime.h b/runtime/onnxruntime/include/funasrruntime.h index 685c0241f..1a3cff607 100644 --- a/runtime/onnxruntime/include/funasrruntime.h +++ b/runtime/onnxruntime/include/funasrruntime.h @@ -70,6 +70,8 @@ _FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index); _FUNASRAPI const char* FunASRGetStamp(FUNASR_RESULT result); _FUNASRAPI const char* FunASRGetStampSents(FUNASR_RESULT result); _FUNASRAPI const char* FunASRGetTpassResult(FUNASR_RESULT result,int n_index); +_FUNASRAPI const int64_t FunASRGetTpassStart(FUNASR_RESULT result); +_FUNASRAPI const int64_t FunASRGetTpassEnd(FUNASR_RESULT result); _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result); _FUNASRAPI void FunASRFreeResult(FUNASR_RESULT result); _FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle); diff --git a/runtime/onnxruntime/src/audio.cpp b/runtime/onnxruntime/src/audio.cpp index 22a9ecd29..b73369247 100644 --- a/runtime/onnxruntime/src/audio.cpp +++ b/runtime/onnxruntime/src/audio.cpp @@ -1289,6 +1289,33 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP } } }else{ + + int sample_rate = 16000; // sample_rate 是音频的采样率 这里固定为16000 Hz + float segment_duration = (static_cast(seg_sample) / sample_rate) * 1000; // 每个分段的持续时间(毫秒) + + // for (auto vad_segment : vad_segments) { + // int speech_start_i = -1, speech_end_i = -1; + // if (vad_segment[0] != -1) { + // speech_start_i = vad_segment[0]; + // } + // if (vad_segment[1] != -1) { + // speech_end_i = vad_segment[1]; + // } + + // // 计算并打印语音片段的开始和结束时间 + // if (speech_start_i != -1 && speech_end_i != -1) { + // float start_time = speech_start_i * segment_duration; // 开始时间(秒) + // float end_time = speech_end_i * segment_duration; // 结束时间(秒) + // std::cout << "Speech segment: Start time = " << start_time << "s, End time = " << end_time << "s" << std::endl; + // } else if (speech_start_i != -1) { + // float start_time = speech_start_i * segment_duration; // 仅有开始时间 + // std::cout << "Speech segment: Start time = " << start_time << "s, End time = Unknown" << std::endl; + // } else if (speech_end_i != -1) { + // float end_time = speech_end_i * segment_duration; // 仅有结束时间 + // std::cout << "Speech segment: Start time = Unknown, End time = " << end_time << "s" << std::endl; + // } + // } + for(auto vad_segment: vad_segments){ int speech_start_i=-1, speech_end_i=-1; if(vad_segment[0] != -1){ @@ -1325,6 +1352,13 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP frame = nullptr; } + //设置开始时间和结束时间 + float start_time = speech_start_i * segment_duration; // 开始时间(毫秒) + float end_time = speech_end_i * segment_duration; // 结束时间(毫秒) + // 转换为 int64_t 类型并赋值给类的成员变量 + this->start = static_cast(start_time); + this->end = static_cast(end_time); + //std::cout << "Speech segment: Start time = " << this->start << "ms, End time = " << this->end << "ms" << std::endl; speech_start = -1; speech_offline_start = -1; // [70, -1] @@ -1350,6 +1384,9 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP } } + float start_time = speech_start_i * segment_duration; // 仅有开始时间 + this->start = static_cast(start_time); + //std::cout << "Speech segment: Start time = " << this->start << "ms, End time = Unknown" << std::endl; }else if(speech_end_i != -1){ // [-1,100] if(speech_start == -1 || speech_offline_start == -1){ LOG(ERROR) <<"Vad start is null while vad end is available. Set vad start 0" ; @@ -1399,6 +1436,9 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP frame = nullptr; } } + float end_time = speech_end_i * segment_duration; // 仅有结束时间 + this->end = static_cast(end_time); + //std::cout << "Speech segment: Start time = Unknown, End time = " << this->end << "ms" << std::endl; speech_start = -1; speech_offline_start = -1; } diff --git a/runtime/onnxruntime/src/commonfunc.h b/runtime/onnxruntime/src/commonfunc.h index 6fd553fe0..81fa2422e 100644 --- a/runtime/onnxruntime/src/commonfunc.h +++ b/runtime/onnxruntime/src/commonfunc.h @@ -12,6 +12,8 @@ typedef struct std::string stamp_sents; std::string tpass_msg; float snippet_time; + int64_t start = 0; + int64_t end = 0; }FUNASR_RECOG_RESULT; typedef struct diff --git a/runtime/onnxruntime/src/funasrruntime.cpp b/runtime/onnxruntime/src/funasrruntime.cpp index 628641268..1727900e4 100644 --- a/runtime/onnxruntime/src/funasrruntime.cpp +++ b/runtime/onnxruntime/src/funasrruntime.cpp @@ -523,6 +523,9 @@ p_result->snippet_time = audio->GetTimeLen(); audio->Split(vad_online_handle, chunk_len, input_finished, mode); + p_result->start = audio->start; + p_result->end = audio->end; + //std::cout << "p_result: Start time = " << p_result->start << "ms, End time = " << p_result->end << "ms" << std::endl; funasr::AudioFrame* frame = nullptr; while(audio->FetchChunck(frame) > 0){ @@ -695,6 +698,23 @@ return p_result->tpass_msg.c_str(); } + _FUNASRAPI const int64_t FunASRGetTpassStart(FUNASR_RESULT result) + { + funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result; + if(!p_result) + return 0; + + return p_result->start; + } + _FUNASRAPI const int64_t FunASRGetTpassEnd(FUNASR_RESULT result) + { + funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result; + if(!p_result) + return 0; + + return p_result->end; + } + _FUNASRAPI const char* CTTransformerGetResult(FUNASR_RESULT result,int n_index) { funasr::FUNASR_PUNC_RESULT * p_result = (funasr::FUNASR_PUNC_RESULT*)result; diff --git a/runtime/websocket/bin/websocket-server-2pass.cpp b/runtime/websocket/bin/websocket-server-2pass.cpp index ff23e9d41..53f499d06 100644 --- a/runtime/websocket/bin/websocket-server-2pass.cpp +++ b/runtime/websocket/bin/websocket-server-2pass.cpp @@ -15,11 +15,19 @@ #include #include #include +#include +#include extern std::unordered_map hws_map_; extern int fst_inc_wts_; extern float global_beam_, lattice_beam_, am_scale_; +int64_t getCurrentTimeMillis() { + auto now = std::chrono::system_clock::now(); + auto millis = std::chrono::duration_cast(now.time_since_epoch()).count(); + return millis; +} + context_ptr WebSocketServer::on_tls_init(tls_mode mode, websocketpp::connection_hdl hdl, std::string& s_certfile, @@ -57,7 +65,13 @@ context_ptr WebSocketServer::on_tls_init(tls_mode mode, return ctx; } -nlohmann::json handle_result(FUNASR_RESULT result) { +nlohmann::json handle_result(FUNASR_RESULT result, websocketpp::connection_hdl& hdl, std::map,std::owner_less>& data_map) { + std::shared_ptr data_msg = nullptr; + auto it = data_map.find(hdl); + if (it != data_map.end()) { + data_msg = it->second; + } + websocketpp::lib::error_code ec; nlohmann::json jsonresult; jsonresult["text"] = ""; @@ -67,12 +81,26 @@ nlohmann::json handle_result(FUNASR_RESULT result) { LOG(INFO) << "online_res :" << tmp_online_msg; jsonresult["text"] = tmp_online_msg; jsonresult["mode"] = "2pass-online"; + + // 如果是第一句话的第一个实时结果或新的句子开始 + if (!data_msg->is_sentence_started) { + data_msg->start_time = FunASRGetTpassStart(result); // 记录句子的开始时间 + data_msg->is_sentence_started = true; + } } + + data_msg->end_time = FunASRGetTpassEnd(result); // 记录句子的结束时间 + std::string tmp_tpass_msg = FunASRGetTpassResult(result, 0); if (tmp_tpass_msg != "") { LOG(INFO) << "offline results : " << tmp_tpass_msg; jsonresult["text"] = tmp_tpass_msg; jsonresult["mode"] = "2pass-offline"; + + // 句子结束,记录结束时间 + jsonresult["start_time"] = data_msg->start_time; + jsonresult["end_time"] = data_msg->end_time; + data_msg->is_sentence_started = false; // 重置句子状态 } std::string tmp_stamp_msg = FunASRGetStamp(result); @@ -98,6 +126,7 @@ nlohmann::json handle_result(FUNASR_RESULT result) { } // feed buffer to asr engine for decoder void WebSocketServer::do_decoder( + std::map,std::owner_less>& data_map, std::vector& buffer, websocketpp::connection_hdl& hdl, nlohmann::json& msg, @@ -158,7 +187,7 @@ void WebSocketServer::do_decoder( } if (Result) { websocketpp::lib::error_code ec; - nlohmann::json jsonresult = handle_result(Result); + nlohmann::json jsonresult = handle_result(Result, hdl, data_map); jsonresult["wav_name"] = wav_name; jsonresult["is_final"] = false; if (jsonresult["text"] != "") { @@ -200,7 +229,7 @@ void WebSocketServer::do_decoder( } if (Result) { websocketpp::lib::error_code ec; - nlohmann::json jsonresult = handle_result(Result); + nlohmann::json jsonresult = handle_result(Result, hdl, data_map); jsonresult["wav_name"] = wav_name; jsonresult["is_final"] = true; if (is_ssl) { @@ -254,7 +283,8 @@ void WebSocketServer::on_open(websocketpp::connection_hdl hdl) { data_msg->msg["audio_fs"] = 16000; // default is 16k data_msg->msg["access_num"] = 0; // the number of access for this object, when it is 0, we can free it saftly data_msg->msg["is_eof"]=false; // if this connection is closed - data_msg->msg["svs_lang"]="auto"; + //data_msg->msg["svs_lang"]="auto"; + data_msg->msg["svs_lang"]="zh"; // SenseVoice使用时,只需要中文则设为zh data_msg->msg["svs_itn"]=true; FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, global_beam_, lattice_beam_, am_scale_); @@ -263,6 +293,8 @@ void WebSocketServer::on_open(websocketpp::connection_hdl hdl) { std::make_shared>>(2); data_msg->strand_ = std::make_shared(io_decoder_); + data_msg->is_sentence_started = false; + data_map.emplace(hdl, data_msg); }catch (std::exception const& e) { std::cerr << "Error: " << e.what() << std::endl; @@ -501,6 +533,7 @@ void WebSocketServer::on_message(websocketpp::connection_hdl hdl, std::vector> hotwords_embedding_(*(msg_data->hotwords_embedding)); msg_data->strand_->post( std::bind(&WebSocketServer::do_decoder, this, + data_map, std::move(*(sample_data_p.get())), std::move(hdl), std::ref(msg_data->msg), std::ref(*(punc_cache_p.get())), std::move(hotwords_embedding_), @@ -550,6 +583,7 @@ void WebSocketServer::on_message(websocketpp::connection_hdl hdl, std::vector> hotwords_embedding_(*(msg_data->hotwords_embedding)); msg_data->strand_->post( std::bind(&WebSocketServer::do_decoder, this, + data_map, std::move(subvector), std::move(hdl), std::ref(msg_data->msg), std::ref(*(punc_cache_p.get())), diff --git a/runtime/websocket/bin/websocket-server-2pass.h b/runtime/websocket/bin/websocket-server-2pass.h index e61a93b2d..d0d0ca85e 100644 --- a/runtime/websocket/bin/websocket-server-2pass.h +++ b/runtime/websocket/bin/websocket-server-2pass.h @@ -61,7 +61,11 @@ typedef struct { std::string online_res = ""; std::string tpass_res = ""; std::shared_ptr strand_; // for data execute in order - FUNASR_DEC_HANDLE decoder_handle=nullptr; + FUNASR_DEC_HANDLE decoder_handle=nullptr; + + bool is_sentence_started = false; + int64_t start_time = 0; + int64_t end_time = 0; } FUNASR_MESSAGE; // See https://wiki.mozilla.org/Security/Server_Side_TLS for more details about @@ -114,7 +118,9 @@ class WebSocketServer { server_->clear_access_channels(websocketpp::log::alevel::all); } } - void do_decoder(std::vector& buffer, websocketpp::connection_hdl& hdl, + void do_decoder(std::map,std::owner_less>& data_map, + std::vector& buffer, + websocketpp::connection_hdl& hdl, nlohmann::json& msg, std::vector>& punc_cache, std::vector> &hotwords_embedding, From 2fbfd5f00bf9dc386899d2d2b3df3239bf4641de Mon Sep 17 00:00:00 2001 From: pengkun Date: Wed, 20 Nov 2024 14:41:32 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=E4=BF=AE=E6=94=B9FunASR=E5=AE=9E=E6=97=B6?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E6=A1=86=E6=9E=B6=EF=BC=8C=E5=AE=9E=E6=97=B6?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E6=97=B62pass=E6=A8=A1=E5=BC=8F=E4=B8=8B?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=A1=86=E6=9E=B6=E5=B1=82=E9=9D=A2=E8=BF=94?= =?UTF-8?q?=E5=9B=9E=E5=8F=A5=E5=AD=90=E7=BA=A7=E5=88=AB=E7=9A=84=E6=97=B6?= =?UTF-8?q?=E9=97=B4=E6=88=B3=EF=BC=8C=E5=8D=95=E4=BD=8D=E6=AF=AB=E7=A7=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- runtime/onnxruntime/src/audio.cpp | 26 ------------------- runtime/onnxruntime/src/funasrruntime.cpp | 1 - .../websocket/bin/websocket-server-2pass.cpp | 3 +-- 3 files changed, 1 insertion(+), 29 deletions(-) diff --git a/runtime/onnxruntime/src/audio.cpp b/runtime/onnxruntime/src/audio.cpp index b73369247..4ceb56a3a 100644 --- a/runtime/onnxruntime/src/audio.cpp +++ b/runtime/onnxruntime/src/audio.cpp @@ -1293,29 +1293,6 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP int sample_rate = 16000; // sample_rate 是音频的采样率 这里固定为16000 Hz float segment_duration = (static_cast(seg_sample) / sample_rate) * 1000; // 每个分段的持续时间(毫秒) - // for (auto vad_segment : vad_segments) { - // int speech_start_i = -1, speech_end_i = -1; - // if (vad_segment[0] != -1) { - // speech_start_i = vad_segment[0]; - // } - // if (vad_segment[1] != -1) { - // speech_end_i = vad_segment[1]; - // } - - // // 计算并打印语音片段的开始和结束时间 - // if (speech_start_i != -1 && speech_end_i != -1) { - // float start_time = speech_start_i * segment_duration; // 开始时间(秒) - // float end_time = speech_end_i * segment_duration; // 结束时间(秒) - // std::cout << "Speech segment: Start time = " << start_time << "s, End time = " << end_time << "s" << std::endl; - // } else if (speech_start_i != -1) { - // float start_time = speech_start_i * segment_duration; // 仅有开始时间 - // std::cout << "Speech segment: Start time = " << start_time << "s, End time = Unknown" << std::endl; - // } else if (speech_end_i != -1) { - // float end_time = speech_end_i * segment_duration; // 仅有结束时间 - // std::cout << "Speech segment: Start time = Unknown, End time = " << end_time << "s" << std::endl; - // } - // } - for(auto vad_segment: vad_segments){ int speech_start_i=-1, speech_end_i=-1; if(vad_segment[0] != -1){ @@ -1358,7 +1335,6 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP // 转换为 int64_t 类型并赋值给类的成员变量 this->start = static_cast(start_time); this->end = static_cast(end_time); - //std::cout << "Speech segment: Start time = " << this->start << "ms, End time = " << this->end << "ms" << std::endl; speech_start = -1; speech_offline_start = -1; // [70, -1] @@ -1386,7 +1362,6 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP float start_time = speech_start_i * segment_duration; // 仅有开始时间 this->start = static_cast(start_time); - //std::cout << "Speech segment: Start time = " << this->start << "ms, End time = Unknown" << std::endl; }else if(speech_end_i != -1){ // [-1,100] if(speech_start == -1 || speech_offline_start == -1){ LOG(ERROR) <<"Vad start is null while vad end is available. Set vad start 0" ; @@ -1438,7 +1413,6 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP } float end_time = speech_end_i * segment_duration; // 仅有结束时间 this->end = static_cast(end_time); - //std::cout << "Speech segment: Start time = Unknown, End time = " << this->end << "ms" << std::endl; speech_start = -1; speech_offline_start = -1; } diff --git a/runtime/onnxruntime/src/funasrruntime.cpp b/runtime/onnxruntime/src/funasrruntime.cpp index 1727900e4..1eb8230bd 100644 --- a/runtime/onnxruntime/src/funasrruntime.cpp +++ b/runtime/onnxruntime/src/funasrruntime.cpp @@ -525,7 +525,6 @@ audio->Split(vad_online_handle, chunk_len, input_finished, mode); p_result->start = audio->start; p_result->end = audio->end; - //std::cout << "p_result: Start time = " << p_result->start << "ms, End time = " << p_result->end << "ms" << std::endl; funasr::AudioFrame* frame = nullptr; while(audio->FetchChunck(frame) > 0){ diff --git a/runtime/websocket/bin/websocket-server-2pass.cpp b/runtime/websocket/bin/websocket-server-2pass.cpp index 53f499d06..5a04922a9 100644 --- a/runtime/websocket/bin/websocket-server-2pass.cpp +++ b/runtime/websocket/bin/websocket-server-2pass.cpp @@ -283,8 +283,7 @@ void WebSocketServer::on_open(websocketpp::connection_hdl hdl) { data_msg->msg["audio_fs"] = 16000; // default is 16k data_msg->msg["access_num"] = 0; // the number of access for this object, when it is 0, we can free it saftly data_msg->msg["is_eof"]=false; // if this connection is closed - //data_msg->msg["svs_lang"]="auto"; - data_msg->msg["svs_lang"]="zh"; // SenseVoice使用时,只需要中文则设为zh + data_msg->msg["svs_lang"]="auto"; data_msg->msg["svs_itn"]=true; FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, global_beam_, lattice_beam_, am_scale_); From 550ff418b6d086190acf5de88cd95d45ff3f0a16 Mon Sep 17 00:00:00 2001 From: pengkun Date: Fri, 14 Mar 2025 09:25:46 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E8=AF=86=E5=88=AB?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E6=A0=87=E8=AF=86=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- runtime/websocket/bin/websocket-server-2pass.cpp | 9 ++++++++- runtime/websocket/bin/websocket-server-2pass.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/runtime/websocket/bin/websocket-server-2pass.cpp b/runtime/websocket/bin/websocket-server-2pass.cpp index 5a04922a9..23f417841 100644 --- a/runtime/websocket/bin/websocket-server-2pass.cpp +++ b/runtime/websocket/bin/websocket-server-2pass.cpp @@ -81,10 +81,13 @@ nlohmann::json handle_result(FUNASR_RESULT result, websocketpp::connection_hdl& LOG(INFO) << "online_res :" << tmp_online_msg; jsonresult["text"] = tmp_online_msg; jsonresult["mode"] = "2pass-online"; + jsonresult["slice_type"] = 1; + jsonresult["index"] = data_msg->index; // 如果是第一句话的第一个实时结果或新的句子开始 if (!data_msg->is_sentence_started) { data_msg->start_time = FunASRGetTpassStart(result); // 记录句子的开始时间 + jsonresult["slice_type"] = 0; //0:一段话开始识别; 1:一段话识别中; 2:一段话识别结束 data_msg->is_sentence_started = true; } } @@ -100,6 +103,10 @@ nlohmann::json handle_result(FUNASR_RESULT result, websocketpp::connection_hdl& // 句子结束,记录结束时间 jsonresult["start_time"] = data_msg->start_time; jsonresult["end_time"] = data_msg->end_time; + jsonresult["slice_type"] = 2; + jsonresult["index"] = data_msg->index; + + data_msg->index++; //句子序号 data_msg->is_sentence_started = false; // 重置句子状态 } @@ -283,7 +290,7 @@ void WebSocketServer::on_open(websocketpp::connection_hdl hdl) { data_msg->msg["audio_fs"] = 16000; // default is 16k data_msg->msg["access_num"] = 0; // the number of access for this object, when it is 0, we can free it saftly data_msg->msg["is_eof"]=false; // if this connection is closed - data_msg->msg["svs_lang"]="auto"; + data_msg->msg["svs_lang"]="zh"; data_msg->msg["svs_itn"]=true; FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, global_beam_, lattice_beam_, am_scale_); diff --git a/runtime/websocket/bin/websocket-server-2pass.h b/runtime/websocket/bin/websocket-server-2pass.h index d0d0ca85e..6d6be52df 100644 --- a/runtime/websocket/bin/websocket-server-2pass.h +++ b/runtime/websocket/bin/websocket-server-2pass.h @@ -66,6 +66,7 @@ typedef struct { bool is_sentence_started = false; int64_t start_time = 0; int64_t end_time = 0; + int64_t index = 0; } FUNASR_MESSAGE; // See https://wiki.mozilla.org/Security/Server_Side_TLS for more details about From 6ae8e84574616a6cd36d31ef4c6c5b2f1ed3b059 Mon Sep 17 00:00:00 2001 From: pengkun Date: Fri, 14 Mar 2025 14:16:53 +0800 Subject: [PATCH 4/5] =?UTF-8?q?2pass=E6=A8=A1=E5=BC=8F=E8=BF=94=E5=9B=9E?= =?UTF-8?q?=E5=AD=97=E6=AE=B5=E6=96=B0=E5=A2=9E=E5=AF=B9=E8=AF=9D=E5=BC=80?= =?UTF-8?q?=E5=A7=8B=E6=97=B6=E9=97=B4=E6=88=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- runtime/websocket/bin/websocket-server-2pass.cpp | 5 +++++ runtime/websocket/bin/websocket-server-2pass.h | 1 + 2 files changed, 6 insertions(+) diff --git a/runtime/websocket/bin/websocket-server-2pass.cpp b/runtime/websocket/bin/websocket-server-2pass.cpp index 23f417841..4834f3423 100644 --- a/runtime/websocket/bin/websocket-server-2pass.cpp +++ b/runtime/websocket/bin/websocket-server-2pass.cpp @@ -93,6 +93,7 @@ nlohmann::json handle_result(FUNASR_RESULT result, websocketpp::connection_hdl& } data_msg->end_time = FunASRGetTpassEnd(result); // 记录句子的结束时间 + jsonresult["timestamp"] = data_msg->timestamp; std::string tmp_tpass_msg = FunASRGetTpassResult(result, 0); if (tmp_tpass_msg != "") { @@ -198,6 +199,7 @@ void WebSocketServer::do_decoder( jsonresult["wav_name"] = wav_name; jsonresult["is_final"] = false; if (jsonresult["text"] != "") { + LOG(INFO) << "jsonresult: " << jsonresult.dump(4); if (is_ssl) { wss_server_->send(hdl, jsonresult.dump(), websocketpp::frame::opcode::text, ec); @@ -239,6 +241,7 @@ void WebSocketServer::do_decoder( nlohmann::json jsonresult = handle_result(Result, hdl, data_map); jsonresult["wav_name"] = wav_name; jsonresult["is_final"] = true; + LOG(INFO) << "jsonresult: " << jsonresult.dump(4); if (is_ssl) { wss_server_->send(hdl, jsonresult.dump(), websocketpp::frame::opcode::text, ec); @@ -301,6 +304,8 @@ void WebSocketServer::on_open(websocketpp::connection_hdl hdl) { data_msg->is_sentence_started = false; + data_msg->timestamp = getCurrentTimeMillis(); + data_map.emplace(hdl, data_msg); }catch (std::exception const& e) { std::cerr << "Error: " << e.what() << std::endl; diff --git a/runtime/websocket/bin/websocket-server-2pass.h b/runtime/websocket/bin/websocket-server-2pass.h index 6d6be52df..3ba63089c 100644 --- a/runtime/websocket/bin/websocket-server-2pass.h +++ b/runtime/websocket/bin/websocket-server-2pass.h @@ -67,6 +67,7 @@ typedef struct { int64_t start_time = 0; int64_t end_time = 0; int64_t index = 0; + int64_t timestamp = 0; } FUNASR_MESSAGE; // See https://wiki.mozilla.org/Security/Server_Side_TLS for more details about From b643ebd3eaef1b6f17831bc316e2c1346ad9eb89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=20=E5=9D=A4?= Date: Tue, 6 Jan 2026 13:44:24 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=E7=94=9F=E4=BA=A7=E9=80=82=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../.gradle/8.0/checksums/checksums.lock | Bin 0 -> 17 bytes .../.gradle/8.0/checksums/md5-checksums.bin | Bin 0 -> 20197 bytes .../.gradle/8.0/checksums/sha1-checksums.bin | Bin 0 -> 20495 bytes .../dependencies-accessors.lock | Bin 0 -> 17 bytes .../8.0/dependencies-accessors/gc.properties | 0 .../.gradle/8.0/fileChanges/last-build.bin | Bin 0 -> 1 bytes .../.gradle/8.0/fileHashes/fileHashes.lock | Bin 0 -> 17 bytes .../AndroidClient/.gradle/8.0/gc.properties | 0 .../buildOutputCleanup/buildOutputCleanup.lock | Bin 0 -> 17 bytes .../.gradle/buildOutputCleanup/cache.properties | 2 ++ .../AndroidClient/.gradle/vcs-1/gc.properties | 0 .../websocket/bin/websocket-server-2pass.cpp | 8 ++++---- 12 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 runtime/android/AndroidClient/.gradle/8.0/checksums/checksums.lock create mode 100644 runtime/android/AndroidClient/.gradle/8.0/checksums/md5-checksums.bin create mode 100644 runtime/android/AndroidClient/.gradle/8.0/checksums/sha1-checksums.bin create mode 100644 runtime/android/AndroidClient/.gradle/8.0/dependencies-accessors/dependencies-accessors.lock create mode 100644 runtime/android/AndroidClient/.gradle/8.0/dependencies-accessors/gc.properties create mode 100644 runtime/android/AndroidClient/.gradle/8.0/fileChanges/last-build.bin create mode 100644 runtime/android/AndroidClient/.gradle/8.0/fileHashes/fileHashes.lock create mode 100644 runtime/android/AndroidClient/.gradle/8.0/gc.properties create mode 100644 runtime/android/AndroidClient/.gradle/buildOutputCleanup/buildOutputCleanup.lock create mode 100644 runtime/android/AndroidClient/.gradle/buildOutputCleanup/cache.properties create mode 100644 runtime/android/AndroidClient/.gradle/vcs-1/gc.properties diff --git a/runtime/android/AndroidClient/.gradle/8.0/checksums/checksums.lock b/runtime/android/AndroidClient/.gradle/8.0/checksums/checksums.lock new file mode 100644 index 0000000000000000000000000000000000000000..2cd9aa53bb57aebaae1a1d32dba39a53ecd27f41 GIT binary patch literal 17 UcmZQBO#AYE*K6?>1_?@A+faZ?AQ?v%YtK{Py|mbIw2a?9~*d zG<_U5>QhtrbO=Xa01SWuFaQR?02lxRU;qq&0WbgtzyKHk17H9QfB`T72Ef4o#ef@q zA~NV?lon264yNs*D8niAf%>uVjS^+4bu-Y{cJ<`#0HcL=90X+`q;y3Yf@rSZhw;GT?)?rd5cy>;M|AgLzTO;V|N-k zqTF@|$={?8sO4YSdm7~qWh8&EJ}vm{W839;{3DW&$?BUA2rHi9|3CaOS9@(+kr$88 zS5QoH33tycF-qSxqw#j5AM=*w$-=}cd6WwrNzRR_*RIOXwM4nYVUo*))GBOMPw7TE zKau2$TP5N}7U%JG!uKHg+-j?R7Rqmv(0E%blI!HI9IGEJdWdp6Ns=4z`_FRc@$h`? zmXdt&A1_C?b9VfS=i^55m01>1<66g6aK3=#*1F9FQ)8v?p`0g8a>r9fo?W4B^!3d& zdjpcMk5|^{l+DEJZeL4sUz;a=^KGO`(0D-<$s_mp#3xHm#p}y6BRTm?QIb}OfgTKi z0WbgtzyKHk17H9QfB`T72EYIq00UqE41fVJ00zJS7ytuc01SWuFaQR?02lxRU;qq& z0WbgtzyKHk17H9QfB`V@-y1NZOKQ`f`OIhf7%zH~f=63vw(~7_p|Dztq9l^DJ@&WL zU($_ZNO$`GCN6Cb%Gy&DcwpFB>v2xhNdY3_5+(qO!@NT;MvST4I?*}nNN7WxW>D@t z$_o)k>k*;Pyd^G1q^WHSwi;-EEC`!jan!#h6A=mCh%kQ3CglD+mn!&jjm;_t_@akV|V!R?Jq1vgP)2g?q zTF{yoj)*J969gqc#3s7Gb?Ld7Gw+5=dR=s6OivRgdJw^7-gg(9)e@d!ymGrkMpC8s zIlKHNewbj+m~PUH0WsoP-uIeoMSPhM?w$E*1|$85D4a6^P@EVxQ5ScRtF-)*)}~qu z4gYmVPGW*tX1YyYMR#H~ik0tpf3UESc%|)D<$Ald7!wW?0L5kYYlsnpiQ|e4Cf09IP6|^ux{;GPqu1NZ zzy%ZAaE-6n#G(48rS~GP7&R7HZdAyk79gTX9}&`JY@%Sxh{w+gK0duI-Fk;cRZTFl z7T4IvCPXej7(2b&wn@WtOO0uOmOmnjyAYwO!6wSzh(y6_pZhAm?5JCP;<7a&O7IGP z;m#&5NT%GCzS(@Q$1Krb@2(-e^@q7@OYwdk&QUf|vwh18U;q1^n%`U=Sutoij)=;5 zG^@hTtl&?UHYuaAee%t69Ys$@LJbg6lYxl+jqDmqq1?c|g#sD%F43UWQxq&5z+RV z9!WP2pYFu2L{Xig+LMuhnHF=Nt5igI(T_c*MmxT1rE=Ir0Y6FZh?8Z^)X3je-xBcwA%%y6qU zAIQwCIusBg^_lA`Z9r2vtQ%LR6IR;P-nEFn9~hz&@0sB;pxCT*m8%BQ&gA%Av(gAN zdLN>N2;nfQG1-?*`15W$v`A=GUe}Tjv=}Q;L`1JFB4n8TGh#I+&2%JqiZpZ`-<+w2BZO2WxlFgn<5E;J%+SN5Ms5!-l}9M)HIW|N#$A+JJ@s-$ z4;iH^t0pOwN~zmZZcU_7MlY;X zQHFzhm_~hV244^12n>J$FaQR?02lxRU;qq&0WbgtzyKHk17H9QfB`T72EYIq_`etk zKnu~ti!roSr4O_dDayV-Iv9}|5-S`kSTqKo<{FXzA8ZXt9X&76cRS{ODum|_oSvr3 zbDDrTe>&kuHWWDM%{*d_Ij@QE0?Ce&JK26cnEON$Ug-JgvQVZq9doW9;U!@X5gNPW zbTIdeCj9J9ohfCd4*QvT4ibJY*f!ni_@=3tdxsESG0AyyzJ5;-Gyhw{D=Tblmbr9I z!c?uI#Uz!#qUgYisBtUu&2Ykv@$K1R63xDR3=oS^zhv8X-e_kO9JmHp(${M#}2dq}v&b;-OB=4>+0D8g-? z4mL7hv8^1B=Nl7lH*U0ZVD9A>%)PA$|M8Huu3xft4l})+%=r6+2kR8C7RFp?#GJR1@Hqv#rnZ-}$^9a@Cn{@8Qb@Mo zEFRB2OHL$(>u^mv+)^<2ol7|RO;LS*!{?v}17H9QfB`T72EYIq00UqE41fVJ00zJS z7ytuc01SWuFaQR?02lxRU;qq&0WbgtzyKHk17H9QfB`T72EYIq00UqE4E%coW6@B0 zBftpyM_=I8RU(vSO9HHA>iO|bD$Z+LS5lP9_Wa-^DH?zp%N}*??~&4Z-l7t?Mj zv>AFmu(d)SCr97Zrfa38_m-kq8vi!?Ek3 zhT-*S9{^ozC-Xgfe|jH*g89h!v3@DfoNUcf`rB#M?l9{R>BZnw0lm`$9iDunoa#y7 z-n83T^m0DCJ;rdhph>rO&uN@09N(Lwbla3uNo}h$^zW}si4P4NK6g&ss!Q!ETbYy` zr__vnfz(p{7B*R9M&EboN7TvSp`9mc4!4 z=fOf3`KV-^5*yJO^c=27okHq{M3?7!)VeI^sfWXv6#c-W*3T1l3aM<3 zCCSm2t}8+=E)No&p0`OBYT3Y~_`M-Yjo$g9kScN1kJEotqvCcry1-7RRdQ93iE7a6 z63=2%X3D94j+NaTiuj8wFOI%p)koSM^57gY-kVy4Q^wuOsjT4->T}#ZEgF7q_5Z-_ zD7I+cg7&`jrlOcy^qv=mE3|PsIjb(ndTvo_7kmHGWfu|*mKWkwksVHHlq#pJ#0ky$ zM%`=WHAXMf|IV2daBA!pCdKTxQKR>9DAa0yuE%oN^tcC*KCd<(r%pBF zS|;=hMj@4I=x}w~+o{^(o>yBZO^M!jF1ZukA9`KI%w1L&D5n|)ZgplBs<)&Qf`3hz z&^geAy?6~y{Yk&bP-BTur%)?GqF!AUTerBhwsio{U$3gsZN(v+I?wFI5P#z?@79XC zd1!v;UgM6x?hLATx}tWX8oiU~E39C4B_-WbuB98*mRVdfV!msWW77NC<=$h;w%OoR z6|)zG6|9^(d4;E8U|Q+1jdi=ZG_T^Z#}iGo9=g^e=99vjqnt7q7ya_aC&*rWJ!z=D zOKh>#DUJqCJ(i)-sIhpcQ?T(OP*5E&{p8m0lfmSD1J@3rPTcmuDJk;?=zCN-Ro<2n zf9FZn>-$T^UYl2)Q185}iaxva6-xclXw+C}or<-ZCM9nU9VzdldO@72bImjKsk||n zNj;-86s2a1I)y77!8yKgP4rl{`+(OGK{JQ*i%h&~aH_$eH$?UQ#&cNiI7O15#<`N0 z?~-t&E0uS)TecIYp3(0n)YRxsp_WKz(P8_%G>cK(&%9-QUCTwX(9by4@}3@!-Wy@4 zQ%G^|FL+Rv+1Mfs$Z84j&TC4^Ep*4JR;}I;rCF?;isj@DTrUr+-?45&#*AU?U6-D? s1mM&wW*?o}bLEt2>47Topv#^cRdXKiUp_T)Aw@BNE8mf4BZ5x-166(Ug#Z8m literal 0 HcmV?d00001 diff --git a/runtime/android/AndroidClient/.gradle/8.0/dependencies-accessors/dependencies-accessors.lock b/runtime/android/AndroidClient/.gradle/8.0/dependencies-accessors/dependencies-accessors.lock new file mode 100644 index 0000000000000000000000000000000000000000..0024dfd5a4d69baeebb52711ece772bd334699fd GIT binary patch literal 17 TcmZP;zqTlG@8?Ml3{U_7H%J6_ literal 0 HcmV?d00001 diff --git a/runtime/android/AndroidClient/.gradle/8.0/dependencies-accessors/gc.properties b/runtime/android/AndroidClient/.gradle/8.0/dependencies-accessors/gc.properties new file mode 100644 index 000000000..e69de29bb diff --git a/runtime/android/AndroidClient/.gradle/8.0/fileChanges/last-build.bin b/runtime/android/AndroidClient/.gradle/8.0/fileChanges/last-build.bin new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/runtime/android/AndroidClient/.gradle/8.0/fileHashes/fileHashes.lock b/runtime/android/AndroidClient/.gradle/8.0/fileHashes/fileHashes.lock new file mode 100644 index 0000000000000000000000000000000000000000..924fa3ec0252a7d55322c5c9000b697f2d497412 GIT binary patch literal 17 TcmZSP�(bxusmsg["wav_name"].get() << " | online_res :" << tmp_online_msg; jsonresult["text"] = tmp_online_msg; jsonresult["mode"] = "2pass-online"; jsonresult["slice_type"] = 1; @@ -97,7 +97,7 @@ nlohmann::json handle_result(FUNASR_RESULT result, websocketpp::connection_hdl& std::string tmp_tpass_msg = FunASRGetTpassResult(result, 0); if (tmp_tpass_msg != "") { - LOG(INFO) << "offline results : " << tmp_tpass_msg; + LOG(INFO) << "wav_name: " << data_msg->msg["wav_name"].get() << " | offline results : " << tmp_tpass_msg; jsonresult["text"] = tmp_tpass_msg; jsonresult["mode"] = "2pass-offline"; @@ -199,7 +199,7 @@ void WebSocketServer::do_decoder( jsonresult["wav_name"] = wav_name; jsonresult["is_final"] = false; if (jsonresult["text"] != "") { - LOG(INFO) << "jsonresult: " << jsonresult.dump(4); + //LOG(INFO) << "jsonresult: " << jsonresult.dump(4); if (is_ssl) { wss_server_->send(hdl, jsonresult.dump(), websocketpp::frame::opcode::text, ec); @@ -241,7 +241,7 @@ void WebSocketServer::do_decoder( nlohmann::json jsonresult = handle_result(Result, hdl, data_map); jsonresult["wav_name"] = wav_name; jsonresult["is_final"] = true; - LOG(INFO) << "jsonresult: " << jsonresult.dump(4); + //LOG(INFO) << "jsonresult: " << jsonresult.dump(4); if (is_ssl) { wss_server_->send(hdl, jsonresult.dump(), websocketpp::frame::opcode::text, ec);