Skip to content

Commit 2848b14

Browse files
authored
Merge pull request #148 from danleh/transformersjs-rebase
New Wasm workloads: Transformers.js ML sentiment analysis and Speech-to-Text
2 parents ad6b578 + 6b52dad commit 2848b14

27 files changed

+175299
-0
lines changed

JetStreamDriver.js

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ class Scripts {
576576
this.add(`
577577
performance.mark ??= function(name) { return { name }};
578578
performance.measure ??= function() {};
579+
performance.timeOrigin ??= performance.now();
579580
`);
580581
}
581582

@@ -2209,6 +2210,56 @@ let BENCHMARKS = [
22092210
worstCaseCount: 2,
22102211
tags: ["Default", "Wasm"],
22112212
}),
2213+
new AsyncBenchmark({
2214+
name: "transformersjs-bert-wasm",
2215+
files: [
2216+
"./polyfills/fast-text-encoding/1.0.3/text.js",
2217+
"./transformersjs/benchmark.js",
2218+
"./transformersjs/task-bert.js",
2219+
],
2220+
preload: {
2221+
transformersJsModule: "./transformersjs/build/transformers.js",
2222+
2223+
onnxJsModule: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.mjs",
2224+
onnxWasmBinary: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.wasm",
2225+
2226+
modelWeights: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/onnx/model_uint8.onnx",
2227+
modelConfig: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/config.json",
2228+
modelTokenizer: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/tokenizer.json",
2229+
modelTokenizerConfig: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/tokenizer_config.json",
2230+
},
2231+
iterations: 30,
2232+
allowUtf16: true,
2233+
tags: ["Default", "Wasm", "transformersjs"],
2234+
}),
2235+
new AsyncBenchmark({
2236+
name: "transformersjs-whisper-wasm",
2237+
files: [
2238+
"./polyfills/fast-text-encoding/1.0.3/text.js",
2239+
"./transformersjs/benchmark.js",
2240+
"./transformersjs/task-whisper.js",
2241+
],
2242+
preload: {
2243+
transformersJsModule: "./transformersjs/build/transformers.js",
2244+
2245+
onnxJsModule: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.mjs",
2246+
onnxWasmBinary: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.wasm",
2247+
2248+
modelEncoderWeights: "./transformersjs/build/models/Xenova/whisper-tiny.en/onnx/encoder_model_quantized.onnx",
2249+
modelDecoderWeights: "./transformersjs/build/models/Xenova/whisper-tiny.en/onnx/decoder_model_merged_quantized.onnx",
2250+
modelConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/config.json",
2251+
modelTokenizer: "./transformersjs/build/models/Xenova/whisper-tiny.en/tokenizer.json",
2252+
modelTokenizerConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/tokenizer_config.json",
2253+
modelPreprocessorConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/preprocessor_config.json",
2254+
modelGenerationConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/generation_config.json",
2255+
2256+
inputFile: "./transformersjs/build/inputs/jfk.raw",
2257+
},
2258+
iterations: 5,
2259+
worstCaseCount: 1,
2260+
allowUtf16: true,
2261+
tags: ["Default", "Wasm", "transformersjs"],
2262+
}),
22122263
new WasmLegacyBenchmark({
22132264
name: "tfjs-wasm",
22142265
files: [

transformersjs/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/util/node_modules/
2+
/util/package-lock.json

transformersjs/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- Two tasks: one text/NLP, one audio processing/speech-to-text.
2+
- Everything in `build/` is generated or an upstream library.
3+
- Everything in `util/` is tooling for building and preparing the benchmark.
4+
5+
# Licenses
6+
7+
- Transformers.js: Apache 2.0, https://github.com/huggingface/transformers.js/blob/main/LICENSE
8+
- ONNX runtime: MIT, https://github.com/microsoft/onnxruntime/blob/main/LICENSE
9+
- `text-encoding` Polyfill: Unlicense OR Apache 2.0, https://github.com/inexorabletash/text-encoding/blob/master/LICENSE.md
10+
- Model `DistilBERT base uncased finetuned SST-2`: Apache 2.0, https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english
11+
- Model `openai/whisper-tiny.en`: Apache 2.0, https://huggingface.co/openai/whisper-tiny.en
12+
- Audio file for speech-to-text task: Public domain, https://www.jfklibrary.org/learn/about-jfk/historic-speeches/inaugural-address

transformersjs/benchmark.js

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Copyright 2025 the V8 project authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
// Polyfills that Transformers.js / the ONNX runtime needs in JavaScript shells.
6+
7+
class URL {
8+
href;
9+
constructor(url, base) {
10+
// DEBUG
11+
// console.log('URL', url, base);
12+
this.href = url;
13+
}
14+
}
15+
globalThis.URL = URL;
16+
17+
// Polyfill fetch for shell-compatibility and to cache / preload model weights etc.
18+
let preload = { /* Initialized in init() below due to async. */ };
19+
const originalFetch = globalThis.fetch ?? function(url) {
20+
throw new Error("no fetch available");
21+
}
22+
globalThis.fetch = async function(url) {
23+
// DEBUG
24+
// console.log('fetch', url);
25+
26+
// Redirect some paths to cached/preloaded resources.
27+
if (preload[url]) {
28+
return {
29+
ok: true,
30+
status: 200,
31+
arrayBuffer() { return preload[url]; },
32+
async blob() {
33+
return {
34+
size: preload[url].byteLength,
35+
async arrayBuffer() { return preload[url]; }
36+
}
37+
},
38+
};
39+
}
40+
41+
// This should only be called in the browser, where fetch() is available.
42+
return originalFetch(url);
43+
};
44+
45+
// JetStream benchmark harness. Reuse for two different Transformers.js tasks.
46+
// Assumes `preloadFiles(module)`, `initPipeline(pipelineFromTransformersJs)`,
47+
// and `doTask(initializedPipeline, inputArrayBuffer)` is in the global scope.
48+
49+
class Benchmark {
50+
transformersJsModule;
51+
wasmBinary;
52+
pipeline;
53+
inputFile;
54+
output;
55+
56+
async init() {
57+
this.transformersJsModule = await JetStream.dynamicImport(JetStream.preload.transformersJsModule);
58+
this.wasmBinary = await JetStream.getBinary(JetStream.preload.onnxWasmBinary);
59+
60+
for (const url of Object.values(JetStream.preload)) {
61+
preload[url] = await JetStream.getBinary(url);
62+
}
63+
64+
if ('inputFile' in JetStream.preload) {
65+
this.inputFile = (await JetStream.getBinary(JetStream.preload.inputFile)).buffer;
66+
// DEBUG
67+
// console.log('inputFile', this.inputFile.byteLength, 'bytes');
68+
}
69+
}
70+
71+
async runIteration() {
72+
// Initialize the inference pipeline in the first iteration.
73+
if (!this.pipeline) {
74+
// TODO: Profile startup only: What is taking so much time here?
75+
let { env, pipeline } = this.transformersJsModule;
76+
77+
env.allowRemoteModels = false;
78+
env.allowLocalModels = true;
79+
env.localModelPath = './transformersjs/build/models/';
80+
81+
// Always select the Wasm backend, nothing else.
82+
delete env.backends.onnx.webgl;
83+
delete env.backends.onnx.webgpu;
84+
85+
// Single-threaded only for now, since we cannot spawn workers in shells.
86+
// TODO: Implement sufficiently powerful workers in shells (or provide
87+
// polyfills).
88+
env.backends.onnx.wasm.numThreads = 1;
89+
90+
// Do not specify path prefix, because this loads the JSEP build by default.
91+
// TODO: Do we want the JSEP build because it's the default online, or the
92+
// non-asyncified one, since it's the smaller / more performant one?
93+
// env.backends.onnx.wasm.wasmPaths = 'build/onnxruntime-web/';
94+
// So instead, give the ONNX runtime files directly:
95+
env.backends.onnx.wasm.wasmPaths = {
96+
// The ONNX runtime module is dynamically imported relative to the
97+
// Transformers.js module above, hence strip the prefix.
98+
// With preloading, this is an (absolute) blob URL, so the replace is a nop.
99+
mjs: JetStream.preload.onnxJsModule.replace('./transformersjs/build/', './')
100+
};
101+
// Give it the wasmBinary directly instead of a path, such that the
102+
// ONNX runtime uses asynchronous (not streaming) Wasm instantiation.
103+
// (To keep the shell and browser results comparable, and streaming
104+
// instantiation is not available in shells.)
105+
env.backends.onnx.wasm.wasmBinary = this.wasmBinary;
106+
107+
this.pipeline = await initPipeline(pipeline);
108+
}
109+
110+
this.output = await doTask(this.pipeline, this.inputFile);
111+
}
112+
113+
validate() {
114+
validate(this.output);
115+
}
116+
}

transformersjs/build.log

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Built on 2025-08-20T13:30:51Z
2+
Installing Node dependencies...
3+
Download and convert audio input(s)...
4+
Converted 4.25s of audio
5+
from 'jfk.wav', 2 channel(s), 44100 Hz, 16 bit, 176000 samples
6+
to 'build/inputs/jfk.raw', 1 channel(s), 16000 Hz, 32 bit float, 68000 samples, 272000 bytes
7+
Download and run model(s)...
8+
Copy library files into build/...
9+
Building done

transformersjs/build.sh

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
rm -rf build/
6+
mkdir -p build/{models,inputs,onnxruntime-web}/
7+
8+
# Optional: clean all node packages as well.
9+
rm -rf util/node_modules/
10+
11+
touch build.log
12+
BUILD_LOG="$(realpath build.log)"
13+
echo "Built on $(date -u '+%Y-%m-%dT%H:%M:%SZ')" | tee "$BUILD_LOG"
14+
15+
echo "Installing Node dependencies..." | tee -a "$BUILD_LOG"
16+
pushd util/
17+
npm install
18+
popd
19+
20+
echo "Download and convert audio input(s)..." | tee -a "$BUILD_LOG"
21+
wget https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav | tee -a "$BUILD_LOG"
22+
# Shorten the audio file to one sentence in the middle, to speed up a single iteration.
23+
node util/convert-audio.mjs jfk.wav build/inputs/jfk.raw 52000 120000 | tee -a "$BUILD_LOG"
24+
rm jfk.wav
25+
26+
echo "Download and run model(s)..." | tee -a "$BUILD_LOG"
27+
# This automatically places the model files in `build/models/`.
28+
node util/test-models.mjs
29+
30+
echo "Copy library files into build/..." | tee -a "$BUILD_LOG"
31+
32+
cp util/node_modules/@huggingface/transformers/dist/transformers.js build/
33+
git apply transformers.js.patch
34+
35+
# Transformers.js packages the ONNX runtime JSEP build by default, even when
36+
# only using the Wasm backend, which would be fine with the non-JSEP build.
37+
# JSEP uses ASYNCIFY, which isn't optimal. And it's a much larger Wasm binary.
38+
# cp util/node_modules/@huggingface/transformers/dist/ort-wasm-simd-threaded.jsep.{mjs,wasm} build/
39+
40+
# There is also an ONNX runtime build in the onnxruntime-web package.
41+
# TODO(dlehmann): Discuss with upstream Transformers.js folks, whether they can
42+
# use the non-JSEP build if one requests the Wasm backend.
43+
# TODO(dlehmann): Measure performance difference between the two.
44+
cp util/node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.{mjs,wasm} build/onnxruntime-web/
45+
46+
# TODO: Compress model data (and maybe Wasm modules) with zstd.
47+
# Either decompress with native APIs available in browsers or JS/Wasm polyfill?
48+
# E.g., https://github.com/101arrowz/fzstd or https://github.com/fabiospampinato/zstandard-wasm or https://github.com/donmccurdy/zstddec-wasm
49+
50+
echo "Building done" | tee -a "$BUILD_LOG"
266 KB
Binary file not shown.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
3+
"activation": "gelu",
4+
"architectures": [
5+
"DistilBertForSequenceClassification"
6+
],
7+
"attention_dropout": 0.1,
8+
"dim": 768,
9+
"dropout": 0.1,
10+
"finetuning_task": "sst-2",
11+
"hidden_dim": 3072,
12+
"id2label": {
13+
"0": "NEGATIVE",
14+
"1": "POSITIVE"
15+
},
16+
"initializer_range": 0.02,
17+
"label2id": {
18+
"NEGATIVE": 0,
19+
"POSITIVE": 1
20+
},
21+
"max_position_embeddings": 512,
22+
"model_type": "distilbert",
23+
"n_heads": 12,
24+
"n_layers": 6,
25+
"output_past": true,
26+
"pad_token_id": 0,
27+
"qa_dropout": 0.1,
28+
"seq_classif_dropout": 0.2,
29+
"sinusoidal_pos_embds": false,
30+
"tie_weights_": true,
31+
"transformers_version": "4.29.2",
32+
"vocab_size": 30522
33+
}
64.2 MB
Binary file not shown.

0 commit comments

Comments
 (0)