Skip to content

Commit e4adda6

Browse files
authored
Add JavaScript async api for OfflineRecongizer decodeStream. (k2-fsa#3049)
This pull request enhances the sherpa-onnx-node JavaScript bindings by introducing an asynchronous API for offline speech recognition stream decoding. By leveraging Node.js NAPI's AsyncWorker, the decodeStream operation can now be performed in a non-blocking manner, allowing for concurrent processing of multiple audio streams. This significantly improves application responsiveness, especially for batch processing or scenarios where the main thread needs to remain unblocked.
1 parent 6319021 commit e4adda6

File tree

5 files changed

+170
-3
lines changed

5 files changed

+170
-3
lines changed

.github/scripts/test-nodejs-npm.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
1515

1616
node ./test-offline-funasr-nano.js
1717

18+
echo "---async---"
19+
20+
node ./test-offline-funasr-nano_async.js
21+
1822
rm -rf sherpa-onnx-funasr-nano-int8-2025-12-30
1923

2024
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,74 @@ static void OfflineRecognizerSetConfigWrapper(const Napi::CallbackInfo &info) {
632632
FreeConfig(c);
633633
}
634634

635+
class DecodeOfflineStreamAsyncWorker : public Napi::AsyncWorker {
636+
public:
637+
DecodeOfflineStreamAsyncWorker(Napi::Env env,
638+
const SherpaOnnxOfflineRecognizer *recognizer,
639+
const SherpaOnnxOfflineStream *stream,
640+
Napi::Promise::Deferred deferred)
641+
: Napi::AsyncWorker(env),
642+
recognizer_(recognizer),
643+
stream_(stream),
644+
deferred_(deferred) {}
645+
646+
void Execute() override {
647+
try {
648+
SherpaOnnxDecodeOfflineStream(recognizer_, stream_);
649+
} catch (const std::exception &e) {
650+
SetError(e.what());
651+
}
652+
}
653+
654+
void OnOK() override {
655+
const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream_);
656+
Napi::String s = Napi::String::New(Env(), json);
657+
SherpaOnnxDestroyOfflineStreamResultJson(json);
658+
deferred_.Resolve(s);
659+
}
660+
661+
void OnError(const Napi::Error &e) override { deferred_.Reject(e.Value()); }
662+
663+
private:
664+
const SherpaOnnxOfflineRecognizer *recognizer_;
665+
const SherpaOnnxOfflineStream *stream_;
666+
Napi::Promise::Deferred deferred_;
667+
};
668+
669+
static Napi::Value DecodeOfflineStreamAsyncWrapper(
670+
const Napi::CallbackInfo &info) {
671+
Napi::Env env = info.Env();
672+
673+
if (info.Length() != 2) {
674+
std::ostringstream os;
675+
os << "Expect 2 arguments. Given: " << info.Length();
676+
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
677+
return env.Null();
678+
}
679+
680+
if (!info[0].IsExternal() || !info[1].IsExternal()) {
681+
Napi::TypeError::New(env,
682+
"Expected recognizer and stream as external pointers")
683+
.ThrowAsJavaScriptException();
684+
return env.Null();
685+
}
686+
687+
const SherpaOnnxOfflineRecognizer *recognizer =
688+
info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
689+
690+
const SherpaOnnxOfflineStream *stream =
691+
info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
692+
693+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(env);
694+
695+
auto *worker =
696+
new DecodeOfflineStreamAsyncWorker(env, recognizer, stream, deferred);
697+
698+
worker->Queue();
699+
700+
return deferred.Promise();
701+
}
702+
635703
static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
636704
Napi::Env env = info.Env();
637705
if (info.Length() != 2) {
@@ -710,6 +778,9 @@ void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) {
710778
exports.Set(Napi::String::New(env, "decodeOfflineStream"),
711779
Napi::Function::New(env, DecodeOfflineStreamWrapper));
712780

781+
exports.Set(Napi::String::New(env, "decodeOfflineStreamAsync"),
782+
Napi::Function::New(env, DecodeOfflineStreamAsyncWrapper));
783+
713784
exports.Set(Napi::String::New(env, "offlineRecognizerSetConfig"),
714785
Napi::Function::New(env, OfflineRecognizerSetConfigWrapper));
715786

nodejs-addon-examples/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ The following tables list the examples in this folder.
128128
|[./test_asr_non_streaming_omnilingual_asr_ctc.js](./test_asr_non_streaming_omnilingual_asr_ctc.js)|Non-streaming speech recognition from a file using a [Omnilingual-ASR](https://github.com/facebookresearch/omnilingual-asr) CTC model with greedy search|
129129
|[./test_asr_non_streaming_medasr_ctc.js](./test_asr_non_streaming_medasr_ctc.js)|Non-streaming speech recognition from a file using a [Google MedASR](https://github.com/google-health/medasr) CTC model with greedy search|
130130
|[./test_asr_non_streaming_funasr_nano.js](./test_asr_non_streaming_funasr_nano.js)|Non-streaming speech recognition from a file using a [FunASR Nano](https://modelscope.cn/models/FunAudioLLM/Fun-ASR-Nano-2512) model|
131+
|[./test_asr_non_streaming_funasr_nano_async.js](./test_asr_non_streaming_funasr_nano_async.js)|Async non-streaming speech recognition from multiple files using a [FunASR Nano](https://modelscope.cn/models/FunAudioLLM/Fun-ASR-Nano-2512) model|
131132
|[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model|
132133
|[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search|
133134
|[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search|
@@ -430,6 +431,16 @@ npm install naudiodon2
430431
node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js
431432
```
432433

434+
### Asynchronous non-streaming speech recognition with FunASR Nano models
435+
436+
```bash
437+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
438+
tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
439+
rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
440+
441+
node ./test_asr_non_streaming_funasr_nano_async.js
442+
```
443+
433444
### Non-streaming speech recognition with FunASR Nano models
434445

435446
```bash
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright (c) 2026 Xiaomi Corporation
2+
// This file shows how to use the async API to decode multiple files
3+
const path = require('path');
4+
const sherpa_onnx = require('sherpa-onnx-node');
5+
6+
/**
7+
* Create an OfflineRecognizer with FunASR Nano model.
8+
*/
9+
function createRecognizer(modelDir, numThreads = 2, debug = 1) {
10+
const config = {
11+
featConfig: {
12+
sampleRate: 16000,
13+
featureDim: 80,
14+
},
15+
modelConfig: {
16+
funasrNano: {
17+
encoderAdaptor: path.join(modelDir, 'encoder_adaptor.int8.onnx'),
18+
llm: path.join(modelDir, 'llm.int8.onnx'),
19+
embedding: path.join(modelDir, 'embedding.int8.onnx'),
20+
tokenizer: path.join(modelDir, 'Qwen3-0.6B'),
21+
},
22+
tokens: '',
23+
numThreads,
24+
provider: 'cpu',
25+
debug,
26+
},
27+
};
28+
29+
return new sherpa_onnx.OfflineRecognizer(config);
30+
}
31+
32+
/**
33+
* Read a waveform and create a stream for decoding.
34+
*/
35+
function createStreamFromFile(recognizer, file) {
36+
const wave = sherpa_onnx.readWave(file);
37+
const stream = recognizer.createStream();
38+
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
39+
return stream;
40+
}
41+
42+
async function main() {
43+
const modelDir = './sherpa-onnx-funasr-nano-int8-2025-12-30';
44+
const recognizer = createRecognizer(modelDir);
45+
46+
const testFiles = [
47+
'test_wavs/lyrics_en_1.wav',
48+
'test_wavs/lyrics_en_2.wav',
49+
'test_wavs/lyrics_en_3.wav',
50+
].map(f => path.join(modelDir, f));
51+
52+
// Create streams for each file
53+
const streams = testFiles.map(file => createStreamFromFile(recognizer, file));
54+
55+
// Decode all streams concurrently
56+
const results =
57+
await Promise.all(streams.map(stream => recognizer.decodeAsync(stream)));
58+
59+
console.log('Concurrent decode results:');
60+
testFiles.forEach((file, i) => {
61+
console.log(`${file}: ${results[i].text}`);
62+
});
63+
}
64+
65+
main().catch(console.error);

scripts/node-addon-api/lib/non-streaming-asr.js

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
/** @typedef {import('./types').OfflineStreamObject} OfflineStreamObject */
22
/** @typedef {import('./types').Waveform} Waveform */
3-
/** @typedef {import('./types').OfflineRecognizerConfig} OfflineRecognizerConfig */
4-
/** @typedef {import('./types').OfflineRecognizerResult} OfflineRecognizerResult */
3+
/**
4+
* @typedef {import('./types').OfflineRecognizerConfig} OfflineRecognizerConfig
5+
*/
6+
/**
7+
* @typedef {import('./types').OfflineRecognizerResult} OfflineRecognizerResult
8+
*/
59

610
const addon = require('./addon.js');
711

@@ -10,7 +14,8 @@ const addon = require('./addon.js');
1014
*/
1115
class OfflineStream {
1216
/**
13-
* @param {OfflineStreamObject|Object} handle - Internal stream object with `handle` property.
17+
* @param {OfflineStreamObject|Object} handle - Internal stream object with
18+
* `handle` property.
1419
*/
1520
constructor(handle) {
1621
this.handle = handle;
@@ -62,6 +67,17 @@ class OfflineRecognizer {
6267
addon.decodeOfflineStream(this.handle, stream.handle);
6368
}
6469

70+
/**
71+
* Decode an offline stream asynchronously (non-blocking).
72+
* @param {OfflineStream} stream
73+
* @returns {Promise<OfflineRecognizerResult>}
74+
*/
75+
async decodeAsync(stream) {
76+
const jsonStr =
77+
await addon.decodeOfflineStreamAsync(this.handle, stream.handle);
78+
return JSON.parse(jsonStr);
79+
}
80+
6581
/**
6682
* Get recognition result for a stream.
6783
* @param {OfflineStream} stream

0 commit comments

Comments
 (0)