Skip to content

Commit 1ad886d

Browse files
author
litongmacos
committed
fix write file error
1 parent 089924c commit 1ad886d

File tree

5 files changed

+52
-42
lines changed

5 files changed

+52
-42
lines changed

CMakeLists.txt

+4-3
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ else ()
8585
target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE libuv::uv)
8686
endif ()
8787

88-
target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE whisper ZLIB::ZLIB ${USOCKETS_LIBRARY} SampleRate::samplerate)
89-
90-
88+
target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE whisper ZLIB::ZLIB ${USOCKETS_LIBRARY}
89+
SampleRate::samplerate ${SPEEXDSP_LIBRARY})
90+
# 链接头文件
91+
target_include_directories(whisper_server_base_on_uwebsockets PRIVATE ${SPEEXDSP_INCLUDE_DIRS})
9192

examples/audio_vad.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ int main() {
3232
std::vector<float> audio_buffer;
3333
int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧
3434
SpeexPreprocessState *st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE);
35-
3635
int vad = 1;
3736
speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad);
3837

examples/simplest.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include "../common/common.h"
22

33
#include "whisper.h"
4-
#include "common/utils.h"
4+
#include "../common/utils.h"
55

66
#include <cmath>
77
#include <cstdio>

handler/inference_handler.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include "../params/whisper_params.h"
55
#include "../nlohmann/json.hpp"
66
#include "../common/utils.h"
7-
#include "common/common-m4a.h"
7+
#include "../common/common-m4a.h"
88

99
using json = nlohmann::json;
1010

whisper_server_base_on_uwebsockets.cpp

+46-36
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@
88
#include <string>
99
#include <whisper.h>
1010
#include <sstream>
11-
11+
#include <speex/speex_preprocess.h>
1212
using namespace stream_components;
1313

14-
bool processAudio(WhisperService service, std::vector<float> pcm32, const whisper_local_stream_params &params);
15-
1614
int main(int argc, char **argv) {
1715
// Read parameters...
1816
whisper_local_stream_params params;
@@ -39,6 +37,8 @@ int main(int argc, char **argv) {
3937
stream_components::WhisperService whisperService(params.service, params.audio, cparams);
4038

4139
const int port = 8090;
40+
std::mutex whisper_mutex;
41+
4242

4343
// started handler
4444
auto started_handler = [](auto *token) {
@@ -65,6 +65,7 @@ int main(int argc, char **argv) {
6565
thread_local wav_writer wavWriter;
6666
thread_local std::string filename;
6767

68+
6869
nlohmann::json response;
6970
if (opCode == uWS::OpCode::TEXT) {
7071
// printf("%s: Received message on /streaming/save: %s\n", get_current_time().c_str(),std::string(message).c_str());
@@ -93,7 +94,7 @@ int main(int argc, char **argv) {
9394
// process binary message(PCM16 data)
9495
auto size = message.size();
9596
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
96-
// printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
97+
printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
9798
// add received PCM16 to audio cache
9899
std::vector<int16_t> pcm16(size / 2);
99100
std::memcpy(pcm16.data(), data, size);
@@ -104,14 +105,17 @@ int main(int argc, char **argv) {
104105
};
105106

106107
// WebSocket /paddlespeech/asr/streaming handler
107-
auto ws_streaming_handler = [&whisperService, &params](auto *ws, std::string_view message, uWS::OpCode opCode) {
108+
auto ws_streaming_handler = [&whisperService, &params, &whisper_mutex](auto *ws, std::string_view message, uWS::OpCode opCode) {
108109
thread_local std::vector<float> audioBuffer; //thread-localized variable
109110
thread_local wav_writer wavWriter;
110111
thread_local std::string filename;
111-
thread_local bool is_last_active = false;
112+
thread_local bool last_is_speech = false;
113+
thread_local int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧
114+
thread_local SpeexPreprocessState *st;
115+
112116
//std::unique_ptr<nlohmann::json> results(new nlohmann::json(nlohmann::json::array()));
113117
thread_local nlohmann::json final_results;
114-
auto thread_id = std::this_thread::get_id();
118+
// auto thread_id = std::this_thread::get_id();
115119
// std::cout << get_current_time().c_str() << ": Handling a message in thread: " << thread_id << std::endl;
116120
nlohmann::json response;
117121
if (opCode == uWS::OpCode::TEXT) {
@@ -122,45 +126,50 @@ int main(int argc, char **argv) {
122126
auto jsonMsg = nlohmann::json::parse(message);
123127
std::string signal = jsonMsg["signal"];
124128
if (signal == "start") {
129+
printf("%s start\n",get_current_time().c_str());
130+
125131
if (jsonMsg["name"].is_string()) {
126132
filename = jsonMsg["name"];
127133
} else {
128134
filename = std::to_string(get_current_time_millis()) + ".wav";
129135
}
130-
final_results = nlohmann::json(nlohmann::json::array());
131136
// 发送服务器准备好的消息
132137
response = {{"status", "ok"},
133138
{"signal", "server_ready"}};
134139
ws->send(response.dump(), uWS::OpCode::TEXT);
135140
wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
141+
st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE);
142+
int vad = 1;
143+
speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad);
144+
136145
}
137146
if (signal == "end") {
138-
printf("%s end\n");
139-
wavWriter.close();
147+
printf("%s end\n",get_current_time().c_str());
140148
// nlohmann::json response = {{"name",filename},{"signal", signal}};
141149
response = {{"name", filename},
142150
{"signal", signal}};
143-
printf("%s:buffer size:%d\n",get_current_time().c_str(),audioBuffer.size());
151+
printf("%s:buffer size:%lu\n",get_current_time().c_str(),audioBuffer.size());
144152
bool isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
145153
if (isOk) {
146154
final_results = get_result(whisperService.ctx);
147155
response["result"] = final_results;
148156
}
149157
ws->send(response.dump(), uWS::OpCode::TEXT);
158+
wavWriter.close();
159+
speex_preprocess_state_destroy(st);
150160
}
151161
// other process logic...
152162
} catch (const std::exception &e) {
153163
std::cerr << "JSON parse error: " << e.what() << std::endl;
154164
auto size = message.size();
155165
}
156166
} else if (opCode == uWS::OpCode::BINARY) {
157-
int size = message.size();
158167
// process binary message(PCM16 data)
168+
auto size = message.size();
159169
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
160170
printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
161171
// add received PCM16 to audio cache
162172
std::vector<int16_t> pcm16(size / 2);
163-
164173
std::memcpy(pcm16.data(), data, size);
165174
//write to file
166175
wavWriter.write(pcm16.data(), size / 2);
@@ -172,28 +181,41 @@ int main(int argc, char **argv) {
172181
//insert to audio_buffer
173182
audioBuffer.insert(audioBuffer.end(), temp.begin(), temp.end());
174183

175-
printf("%s:buffer size:%d\n",get_current_time().c_str(),audioBuffer.size());
184+
// printf("%s:buffer size:% ld\n",get_current_time().c_str(),audioBuffer.size());
176185
// 如果开启了VAD
177186
bool isOk;
178187
// printf("%s: use_vad: %d\n", get_current_time().c_str(), params.audio.use_vad);
179188
if (params.audio.use_vad) {
189+
whisper_mutex.lock();
190+
for (size_t i = 0; i < pcm16.size(); i += chunk_size) {
191+
spx_int16_t frame[chunk_size];
192+
for (int j = 0; j < chunk_size; ++j) {
193+
if (i + j < pcm16.size()) {
194+
frame[j] = (spx_int16_t)(pcm16[i + j]);
195+
} else {
196+
frame[j] = 0; // 对于超出范围的部分填充 0
197+
}
198+
}
199+
int is_speech = speex_preprocess_run(st, frame);
200+
201+
// printf("%s: is_active: %d,is_last_active %d\n", get_current_time().c_str(), is_speech, last_is_speech);
202+
if (!is_speech && last_is_speech) {
203+
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
204+
audioBuffer.clear();
205+
break;
206+
}
207+
last_is_speech = is_speech != 0;
180208

181-
bool is_active = ::vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold,
182-
params.audio.freq_thold, false);
183-
printf("%s: is_active: %d,is_last_active %d\n", get_current_time().c_str(), is_active, is_last_active);
184-
if (!is_active && is_last_active) {
185-
is_last_active = false;
186-
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
187-
audioBuffer.clear();
188-
} else {
189-
is_last_active = is_active;
190209
}
210+
whisper_mutex.unlock();
191211
} else {
192212
// asr
213+
whisper_mutex.lock();
193214
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
194215
audioBuffer.clear();
216+
whisper_mutex.unlock();
195217
}
196-
printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
218+
// printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
197219
if (isOk) {
198220
final_results = get_result(whisperService.ctx);
199221
response["result"] = final_results;
@@ -221,18 +243,6 @@ int main(int argc, char **argv) {
221243
.listen(port, started_handler).run();
222244
}
223245

224-
bool processAudio(WhisperService whisperService, std::vector<float> pcm32, const whisper_local_stream_params &params) {
225-
if (params.audio.use_vad) {
226-
// printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
227-
// TODO: 实现VAD处理,
228-
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
229-
return whisperService.process(pcm32.data(), pcm32.size());
230-
} else {
231-
// asr
232-
return whisperService.process(pcm32.data(), pcm32.size());
233-
}
234-
}
235-
236246

237247

238248

0 commit comments

Comments
 (0)