WebKit
diff --git a/‎JetStreamDriver.js‎
Lines changed: 51 additions & 0 deletions b/‎JetStreamDriver.js‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎transformersjs/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎transformersjs/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎transformersjs/README.md‎
Lines changed: 12 additions & 0 deletions b/‎transformersjs/README.md‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎transformersjs/benchmark.js‎
Lines changed: 116 additions & 0 deletions b/‎transformersjs/benchmark.js‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎transformersjs/build.log‎
Lines changed: 9 additions & 0 deletions b/‎transformersjs/build.log‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎transformersjs/build.sh‎
Lines changed: 50 additions & 0 deletions b/‎transformersjs/build.sh‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎transformersjs/build/inputs/jfk.raw‎
266 KB b/‎transformersjs/build/inputs/jfk.raw‎
266 KB
diff --git a/‎transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/config.json‎
Lines changed: 33 additions & 0 deletions b/‎transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/config.json‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/onnx/model_uint8.onnx‎
64.2 MB b/‎transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/onnx/model_uint8.onnx‎
64.2 MB
@@ -576,6 +576,7 @@ class Scripts {
         this.add(`
             performance.mark ??= function(name) { return { name }};
             performance.measure ??= function() {};
+            performance.timeOrigin ??= performance.now();
         `);
     }
 
@@ -2209,6 +2210,56 @@ let BENCHMARKS = [
         worstCaseCount: 2,
         tags: ["Default", "Wasm"],
     }),
+    new AsyncBenchmark({
+        name: "transformersjs-bert-wasm",
+        files: [
+            "./polyfills/fast-text-encoding/1.0.3/text.js",
+            "./transformersjs/benchmark.js",
+            "./transformersjs/task-bert.js",
+        ],
+        preload: {
+            transformersJsModule: "./transformersjs/build/transformers.js",
+            
+            onnxJsModule: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.mjs",
+            onnxWasmBinary: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.wasm",
+
+            modelWeights: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/onnx/model_uint8.onnx",
+            modelConfig: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/config.json",
+            modelTokenizer: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/tokenizer.json",
+            modelTokenizerConfig: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/tokenizer_config.json",
+        },
+        iterations: 30,
+        allowUtf16: true,
+        tags: ["Default", "Wasm", "transformersjs"],
+    }),
+    new AsyncBenchmark({
+        name: "transformersjs-whisper-wasm",
+        files: [
+            "./polyfills/fast-text-encoding/1.0.3/text.js",
+            "./transformersjs/benchmark.js",
+            "./transformersjs/task-whisper.js",
+        ],
+        preload: {
+            transformersJsModule: "./transformersjs/build/transformers.js",
+            
+            onnxJsModule: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.mjs",
+            onnxWasmBinary: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.wasm",
+
+            modelEncoderWeights: "./transformersjs/build/models/Xenova/whisper-tiny.en/onnx/encoder_model_quantized.onnx",
+            modelDecoderWeights: "./transformersjs/build/models/Xenova/whisper-tiny.en/onnx/decoder_model_merged_quantized.onnx",
+            modelConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/config.json",
+            modelTokenizer: "./transformersjs/build/models/Xenova/whisper-tiny.en/tokenizer.json",
+            modelTokenizerConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/tokenizer_config.json",
+            modelPreprocessorConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/preprocessor_config.json",
+            modelGenerationConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/generation_config.json",
+
+            inputFile: "./transformersjs/build/inputs/jfk.raw",
+        },
+        iterations: 5,
+        worstCaseCount: 1,
+        allowUtf16: true,
+        tags: ["Default", "Wasm", "transformersjs"],
+    }),
     new WasmLegacyBenchmark({
         name: "tfjs-wasm",
         files: [
 
@@ -0,0 +1,2 @@
+/util/node_modules/
+/util/package-lock.json
@@ -0,0 +1,12 @@
+- Two tasks: one text/NLP, one audio processing/speech-to-text.
+- Everything in `build/` is generated or an upstream library.
+- Everything in `util/` is tooling for building and preparing the benchmark.
+
+# Licenses
+
+- Transformers.js: Apache 2.0, https://github.com/huggingface/transformers.js/blob/main/LICENSE
+- ONNX runtime: MIT, https://github.com/microsoft/onnxruntime/blob/main/LICENSE
+- `text-encoding` Polyfill: Unlicense OR Apache 2.0, https://github.com/inexorabletash/text-encoding/blob/master/LICENSE.md
+- Model `DistilBERT base uncased finetuned SST-2`: Apache 2.0, https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english
+- Model `openai/whisper-tiny.en`: Apache 2.0, https://huggingface.co/openai/whisper-tiny.en
+- Audio file for speech-to-text task: Public domain, https://www.jfklibrary.org/learn/about-jfk/historic-speeches/inaugural-address
@@ -0,0 +1,116 @@
+// Copyright 2025 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Polyfills that Transformers.js / the ONNX runtime needs in JavaScript shells.
+
+class URL {
+  href;
+  constructor(url, base) {
+    // DEBUG
+    // console.log('URL', url, base);
+    this.href = url;
+  }
+}
+globalThis.URL = URL;
+
+// Polyfill fetch for shell-compatibility and to cache / preload model weights etc.
+let preload = { /* Initialized in init() below due to async. */ };
+const originalFetch = globalThis.fetch ?? function(url) {
+  throw new Error("no fetch available");
+}
+globalThis.fetch = async function(url) {
+  // DEBUG
+  // console.log('fetch', url);
+
+  // Redirect some paths to cached/preloaded resources.
+  if (preload[url]) {
+    return {
+      ok: true,
+      status: 200,
+      arrayBuffer() { return preload[url]; },
+      async blob() {
+        return {
+          size: preload[url].byteLength,
+          async arrayBuffer() { return preload[url]; }
+        }
+      },
+    };
+  }
+
+  // This should only be called in the browser, where fetch() is available.
+  return originalFetch(url);
+};
+
+// JetStream benchmark harness. Reuse for two different Transformers.js tasks.
+// Assumes `preloadFiles(module)`, `initPipeline(pipelineFromTransformersJs)`,
+// and `doTask(initializedPipeline, inputArrayBuffer)` is in the global scope.
+
+class Benchmark {
+  transformersJsModule;
+  wasmBinary;
+  pipeline;
+  inputFile;
+  output;
+
+  async init() {
+    this.transformersJsModule = await JetStream.dynamicImport(JetStream.preload.transformersJsModule);
+    this.wasmBinary = await JetStream.getBinary(JetStream.preload.onnxWasmBinary);
+
+    for (const url of Object.values(JetStream.preload)) {
+      preload[url] = await JetStream.getBinary(url);
+    }
+
+    if ('inputFile' in JetStream.preload) {
+      this.inputFile = (await JetStream.getBinary(JetStream.preload.inputFile)).buffer;
+      // DEBUG
+      // console.log('inputFile', this.inputFile.byteLength, 'bytes');
+    }
+  }
+
+  async runIteration() {
+    // Initialize the inference pipeline in the first iteration.
+    if (!this.pipeline) {
+      // TODO: Profile startup only: What is taking so much time here?
+      let { env, pipeline } = this.transformersJsModule;
+    
+      env.allowRemoteModels = false;
+      env.allowLocalModels = true;
+      env.localModelPath = './transformersjs/build/models/';
+
+      // Always select the Wasm backend, nothing else.
+      delete env.backends.onnx.webgl;
+      delete env.backends.onnx.webgpu;
+    
+      // Single-threaded only for now, since we cannot spawn workers in shells.
+      // TODO: Implement sufficiently powerful workers in shells (or provide
+      // polyfills).
+      env.backends.onnx.wasm.numThreads = 1;
+
+      // Do not specify path prefix, because this loads the JSEP build by default.
+      // TODO: Do we want the JSEP build because it's the default online, or the
+      // non-asyncified one, since it's the smaller / more performant one?
+      // env.backends.onnx.wasm.wasmPaths = 'build/onnxruntime-web/';
+      // So instead, give the ONNX runtime files directly:
+      env.backends.onnx.wasm.wasmPaths = {
+        // The ONNX runtime module is dynamically imported relative to the 
+        // Transformers.js module above, hence strip the prefix.
+        // With preloading, this is an (absolute) blob URL, so the replace is a nop.
+        mjs: JetStream.preload.onnxJsModule.replace('./transformersjs/build/', './')
+      };
+      // Give it the wasmBinary directly instead of a path, such that the
+      // ONNX runtime uses asynchronous (not streaming) Wasm instantiation.
+      // (To keep the shell and browser results comparable, and streaming
+      // instantiation is not available in shells.)
+      env.backends.onnx.wasm.wasmBinary = this.wasmBinary;
+
+      this.pipeline = await initPipeline(pipeline);
+    }
+    
+    this.output = await doTask(this.pipeline, this.inputFile);
+  }
+
+  validate() {
+    validate(this.output);
+  }
+}
@@ -0,0 +1,9 @@
+Built on 2025-08-20T13:30:51Z
+Installing Node dependencies...
+Download and convert audio input(s)...
+Converted 4.25s of audio
+  from 'jfk.wav', 2 channel(s), 44100 Hz, 16 bit, 176000 samples
+  to   'build/inputs/jfk.raw', 1 channel(s), 16000 Hz, 32 bit float, 68000 samples, 272000 bytes
+Download and run model(s)...
+Copy library files into build/...
+Building done
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+set -euo pipefail
+
+rm -rf build/
+mkdir -p build/{models,inputs,onnxruntime-web}/
+
+# Optional: clean all node packages as well.
+rm -rf util/node_modules/
+
+touch build.log
+BUILD_LOG="$(realpath build.log)"
+echo "Built on $(date -u '+%Y-%m-%dT%H:%M:%SZ')" | tee "$BUILD_LOG"
+
+echo "Installing Node dependencies..." | tee -a "$BUILD_LOG"
+pushd util/
+npm install
+popd
+
+echo "Download and convert audio input(s)..." | tee -a "$BUILD_LOG"
+wget https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav | tee -a "$BUILD_LOG"
+# Shorten the audio file to one sentence in the middle, to speed up a single iteration.
+node util/convert-audio.mjs jfk.wav build/inputs/jfk.raw 52000 120000 | tee -a "$BUILD_LOG"
+rm jfk.wav
+
+echo "Download and run model(s)..." | tee -a "$BUILD_LOG"
+# This automatically places the model files in `build/models/`.
+node util/test-models.mjs
+
+echo "Copy library files into build/..." | tee -a "$BUILD_LOG"
+
+cp util/node_modules/@huggingface/transformers/dist/transformers.js build/
+git apply transformers.js.patch
+
+# Transformers.js packages the ONNX runtime JSEP build by default, even when
+# only using the Wasm backend, which would be fine with the non-JSEP build.
+# JSEP uses ASYNCIFY, which isn't optimal. And it's a much larger Wasm binary.
+# cp util/node_modules/@huggingface/transformers/dist/ort-wasm-simd-threaded.jsep.{mjs,wasm} build/
+
+# There is also an ONNX runtime build in the onnxruntime-web package.
+# TODO(dlehmann): Discuss with upstream Transformers.js folks, whether they can
+# use the non-JSEP build if one requests the Wasm backend.
+# TODO(dlehmann): Measure performance difference between the two.
+cp util/node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.{mjs,wasm} build/onnxruntime-web/
+
+# TODO: Compress model data (and maybe Wasm modules) with zstd.
+# Either decompress with native APIs available in browsers or JS/Wasm polyfill?
+# E.g., https://github.com/101arrowz/fzstd or https://github.com/fabiospampinato/zstandard-wasm or https://github.com/donmccurdy/zstddec-wasm
+
+echo "Building done" | tee -a "$BUILD_LOG"
@@ -0,0 +1,33 @@
+{
+  "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "finetuning_task": "sst-2",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "NEGATIVE": 0,
+    "POSITIVE": 1
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.29.2",
+  "vocab_size": 30522
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+/util/node_modules/`
	`2`	`+/util/package-lock.json`