fix write file error

litongmacos · litongmacos · commit 1ad886dfecac · 2023-11-25T01:00:32.000-10:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -85,7 +85,8 @@ else ()
     target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE libuv::uv)
 endif ()
 
-target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE whisper ZLIB::ZLIB ${USOCKETS_LIBRARY} SampleRate::samplerate)
-
-
+target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE whisper ZLIB::ZLIB ${USOCKETS_LIBRARY}
+        SampleRate::samplerate ${SPEEXDSP_LIBRARY})
+# 链接头文件
+target_include_directories(whisper_server_base_on_uwebsockets PRIVATE ${SPEEXDSP_INCLUDE_DIRS})
 
diff --git a/examples/audio_vad.cpp b/examples/audio_vad.cpp
@@ -32,7 +32,6 @@ int main() {
   std::vector<float> audio_buffer;
   int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧
   SpeexPreprocessState *st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE);
-
   int vad = 1;
   speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad);
 
diff --git a/examples/simplest.cpp b/examples/simplest.cpp
@@ -1,7 +1,7 @@
 #include "../common/common.h"
 
 #include "whisper.h"
-#include "common/utils.h"
+#include "../common/utils.h"
 
 #include <cmath>
 #include <cstdio>
diff --git a/handler/inference_handler.cpp b/handler/inference_handler.cpp
@@ -4,7 +4,7 @@
 #include "../params/whisper_params.h"
 #include "../nlohmann/json.hpp"
 #include "../common/utils.h"
-#include "common/common-m4a.h"
+#include "../common/common-m4a.h"
 
 using json = nlohmann::json;
 
diff --git a/whisper_server_base_on_uwebsockets.cpp b/whisper_server_base_on_uwebsockets.cpp
@@ -8,11 +8,9 @@
 #include <string>
 #include <whisper.h>
 #include <sstream>
-
+#include <speex/speex_preprocess.h>
 using namespace stream_components;
 
-bool processAudio(WhisperService service, std::vector<float> pcm32, const whisper_local_stream_params &params);
-
 int main(int argc, char **argv) {
   // Read parameters...
   whisper_local_stream_params params;
@@ -39,6 +37,8 @@ int main(int argc, char **argv) {
   stream_components::WhisperService whisperService(params.service, params.audio, cparams);
 
   const int port = 8090;
+  std::mutex whisper_mutex;
+
 
   // started handler
   auto started_handler = [](auto *token) {
@@ -65,6 +65,7 @@ int main(int argc, char **argv) {
     thread_local wav_writer wavWriter;
     thread_local std::string filename;
 
+
     nlohmann::json response;
     if (opCode == uWS::OpCode::TEXT) {
       // printf("%s: Received message on /streaming/save: %s\n", get_current_time().c_str(),std::string(message).c_str());
@@ -93,7 +94,7 @@ int main(int argc, char **argv) {
       // process binary message（PCM16 data）
       auto size = message.size();
       std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
-      // printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
+      printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
       // add received PCM16 to audio cache
       std::vector<int16_t> pcm16(size / 2);
       std::memcpy(pcm16.data(), data, size);
@@ -104,14 +105,17 @@ int main(int argc, char **argv) {
   };
 
   // WebSocket /paddlespeech/asr/streaming handler
-  auto ws_streaming_handler = [&whisperService, &params](auto *ws, std::string_view message, uWS::OpCode opCode) {
+  auto ws_streaming_handler = [&whisperService, &params, &whisper_mutex](auto *ws, std::string_view message, uWS::OpCode opCode) {
     thread_local std::vector<float> audioBuffer; //thread-localized variable
     thread_local wav_writer wavWriter;
     thread_local std::string filename;
-    thread_local bool is_last_active = false;
+    thread_local bool last_is_speech = false;
+    thread_local int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧
+    thread_local SpeexPreprocessState *st;
+
     //std::unique_ptr<nlohmann::json> results(new nlohmann::json(nlohmann::json::array()));
     thread_local nlohmann::json final_results;
-    auto thread_id = std::this_thread::get_id();
+    // auto thread_id = std::this_thread::get_id();
     // std::cout << get_current_time().c_str() << ": Handling a message in thread: " << thread_id << std::endl;
     nlohmann::json response;
     if (opCode == uWS::OpCode::TEXT) {
@@ -122,45 +126,50 @@ int main(int argc, char **argv) {
         auto jsonMsg = nlohmann::json::parse(message);
         std::string signal = jsonMsg["signal"];
         if (signal == "start") {
+          printf("%s start\n",get_current_time().c_str());
+
           if (jsonMsg["name"].is_string()) {
             filename = jsonMsg["name"];
           } else {
             filename = std::to_string(get_current_time_millis()) + ".wav";
           }
-          final_results = nlohmann::json(nlohmann::json::array());
           // 发送服务器准备好的消息
           response = {{"status", "ok"},
                       {"signal", "server_ready"}};
           ws->send(response.dump(), uWS::OpCode::TEXT);
           wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
+          st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE);
+          int vad = 1;
+          speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad);
+
         }
         if (signal == "end") {
-          printf("%s end\n");
-          wavWriter.close();
+          printf("%s end\n",get_current_time().c_str());
 //          nlohmann::json response = {{"name",filename},{"signal", signal}};
           response = {{"name",   filename},
                       {"signal", signal}};
-           printf("%s:buffer size:%d\n",get_current_time().c_str(),audioBuffer.size());
+           printf("%s:buffer size:%lu\n",get_current_time().c_str(),audioBuffer.size());
           bool isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
           if (isOk) {
             final_results = get_result(whisperService.ctx);
             response["result"] = final_results;
           }
           ws->send(response.dump(), uWS::OpCode::TEXT);
+          wavWriter.close();
+          speex_preprocess_state_destroy(st);
         }
         // other process logic...
       } catch (const std::exception &e) {
         std::cerr << "JSON parse error: " << e.what() << std::endl;
         auto size = message.size();
       }
     } else if (opCode == uWS::OpCode::BINARY) {
-      int size = message.size();
       // process binary message（PCM16 data）
+      auto size = message.size();
       std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
       printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
       // add received PCM16 to audio cache
       std::vector<int16_t> pcm16(size / 2);
-
       std::memcpy(pcm16.data(), data, size);
       //write to file
       wavWriter.write(pcm16.data(), size / 2);
@@ -172,28 +181,41 @@ int main(int argc, char **argv) {
       //insert to audio_buffer
       audioBuffer.insert(audioBuffer.end(), temp.begin(), temp.end());
 
-       printf("%s:buffer size:%d\n",get_current_time().c_str(),audioBuffer.size());
+      // printf("%s:buffer size:% ld\n",get_current_time().c_str(),audioBuffer.size());
       // 如果开启了VAD
       bool isOk;
       // printf("%s: use_vad: %d\n", get_current_time().c_str(), params.audio.use_vad);
       if (params.audio.use_vad) {
+        whisper_mutex.lock();
+        for (size_t i = 0; i < pcm16.size(); i += chunk_size) {
+          spx_int16_t frame[chunk_size];
+          for (int j = 0; j < chunk_size; ++j) {
+            if (i + j < pcm16.size()) {
+              frame[j] = (spx_int16_t)(pcm16[i + j]);
+            } else {
+              frame[j] = 0; // 对于超出范围的部分填充 0
+            }
+          }
+          int is_speech = speex_preprocess_run(st, frame);
+
+          // printf("%s: is_active: %d,is_last_active %d\n", get_current_time().c_str(), is_speech, last_is_speech);
+          if (!is_speech && last_is_speech) {
+            isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
+            audioBuffer.clear();
+            break;
+          }
+          last_is_speech = is_speech != 0;
 
-        bool is_active = ::vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold,
-                                      params.audio.freq_thold, false);
-        printf("%s: is_active: %d,is_last_active %d\n", get_current_time().c_str(), is_active, is_last_active);
-        if (!is_active && is_last_active) {
-          is_last_active = false;
-          isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
-          audioBuffer.clear();
-        } else {
-          is_last_active = is_active;
         }
+        whisper_mutex.unlock();
       } else {
         // asr
+        whisper_mutex.lock();
         isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
         audioBuffer.clear();
+        whisper_mutex.unlock();
       }
-      printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
+      // printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
       if (isOk) {
         final_results = get_result(whisperService.ctx);
         response["result"] = final_results;
@@ -221,18 +243,6 @@ int main(int argc, char **argv) {
     .listen(port, started_handler).run();
 }
 
-bool processAudio(WhisperService whisperService, std::vector<float> pcm32, const whisper_local_stream_params &params) {
-  if (params.audio.use_vad) {
-    // printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
-    // TODO: 实现VAD处理，
-    //bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
-    return whisperService.process(pcm32.data(), pcm32.size());
-  } else {
-    // asr
-    return whisperService.process(pcm32.data(), pcm32.size());
-  }
-}
-