tensorflow
diff --git a/‎e2e/integration_tests/constants.ts‎
Lines changed: 2 additions & 1 deletion b/‎e2e/integration_tests/constants.ts‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎e2e/integration_tests/convert_predict.py‎
Lines changed: 43 additions & 0 deletions b/‎e2e/integration_tests/convert_predict.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎e2e/yarn.lock‎
Lines changed: 0 additions & 5 deletions b/‎e2e/yarn.lock‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 0 deletions b/‎package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tfjs-backend-webgpu/src/adapter_info.ts‎
Lines changed: 37 additions & 0 deletions b/‎tfjs-backend-webgpu/src/adapter_info.ts‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎tfjs-backend-webgpu/src/backend_webgpu.ts‎
Lines changed: 4 additions & 1 deletion b/‎tfjs-backend-webgpu/src/backend_webgpu.ts‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎tfjs-backend-webgpu/src/base.ts‎
Lines changed: 3 additions & 1 deletion b/‎tfjs-backend-webgpu/src/base.ts‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tfjs-backend-webgpu/src/conv2d_mm_webgpu.ts‎
Lines changed: 6 additions & 3 deletions b/‎tfjs-backend-webgpu/src/conv2d_mm_webgpu.ts‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎tfjs-backend-webgpu/src/kernels/BatchMatMul_impl.ts‎
Lines changed: 5 additions & 1 deletion b/‎tfjs-backend-webgpu/src/kernels/BatchMatMul_impl.ts‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎tfjs-backend-webgpu/src/kernels/Conv2D_impl.ts‎
Lines changed: 3 additions & 1 deletion b/‎tfjs-backend-webgpu/src/kernels/Conv2D_impl.ts‎
Lines changed: 3 additions & 1 deletion
@@ -37,7 +37,8 @@ export const CONVERT_PREDICT_MODELS = {
     'saved_model_v1', 'saved_model_v2', 'saved_model_v2_with_control_flow',
     'saved_model_with_conv2d', 'saved_model_with_prelu',
     'saved_model_v2_complex64', 'saved_model_v2_with_control_flow_v2',
-    'saved_model_v2_with_tensorlist_ops', 'saved_model_v1_with_hashtable'
+    'saved_model_v2_with_tensorlist_ops', 'saved_model_v1_with_hashtable',
+    'saved_model_v2_with_hashtable'
   ],
   layers_model: ['mobilenet']
 };
 
@@ -427,6 +427,47 @@ def _create_saved_model_v1_with_hashtable(save_dir):
         }
     }
 
+def _create_saved_model_v2_with_hashtable(save_dir):
+  """Test a TF V2 model with HashTable Ops.
+
+  Args:
+    save_dir: directory name of where the saved model will be stored.
+  """
+  class Table(tf.Module):
+    def __init__(self):
+        super(Table, self).__init__()
+        keys = tf.constant(['a', 'b'])
+        vals= tf.constant([0, 1])
+        init = tf.lookup.KeyValueTensorInitializer(keys, vals)
+        self.table = tf.lookup.StaticHashTable(init, -1)
+
+    def initializeTable(self):
+        @tf.function
+        def lookup(input):
+            return self.table.lookup(input)
+
+        return lookup
+
+  model = Table()
+  concrete_fn = model.initializeTable().get_concrete_function(
+    input=tf.TensorSpec([None], tf.string))
+
+  tf.saved_model.save(model, save_dir, signatures={"serving_default": concrete_fn})
+
+  return {
+      "async": False,
+      "inputs": {
+          "Placeholder:0": {
+              "value": ["a", "b", "c"], "shape": [3], "dtype": "string"
+          }
+      },
+      "outputs": {
+          "StatefulPartitionedCall/None_Lookup/LookupTableFindV2:0": {
+              "value": [0, 1, -1], "shape": [3], "dtype": "int32"
+          }
+      }
+  }
+
 def _layers_mobilenet():
   model = tf.keras.applications.MobileNetV2()
   model_path = 'mobilenet'
@@ -471,6 +512,8 @@ def main():
       'saved_model_v2_with_tensorlist_ops', control_flow_v2=True)
   _save_and_convert_model(_create_saved_model_v1_with_hashtable,
       'saved_model_v1_with_hashtable')
+  _save_and_convert_model(_create_saved_model_v2_with_hashtable,
+      'saved_model_v2_with_hashtable')
 
   _layers_mobilenet()
 if __name__ == '__main__':
 
@@ -1012,11 +1012,6 @@
   dependencies:
     detect-browser "*"
 
-"@types/emscripten@~0.0.34":
-  version "0.0.34"
-  resolved "https://registry.yarnpkg.com/@types/emscripten/-/emscripten-0.0.34.tgz#12b4a344274fb102ff2f6c877b37587bc3e46008"
-  integrity sha512-QSb9ojDincskc+uKMI0KXp8e1NALFINCrMlp8VGKGcTSxeEyRTTKyjWw75NYrCZHUsVEEEpr1tYHpbtaC++/sQ==
-
 "@types/jasmine@~3.0.0":
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/@types/jasmine/-/jasmine-3.0.0.tgz#9a6b6755a02fcd6baa088a767557709c79728f98"
 
@@ -74,6 +74,7 @@
     "lint": "tslint -p tsconfig_tslint.json",
     "test": "bazel test //:tests",
     "test-packages-ci": "yarn generate-cloudbuild-for-packages && ./scripts/run-build.sh",
+    "nightly-cloudbuild": "NIGHTLY=true yarn generate-cloudbuild-for-packages && gcloud builds submit . --config=cloudbuild_generated.yml --substitutions=_NIGHTLY=true",
     "generate-cloudbuild-for-packages": "ts-node -s ./scripts/generate_cloudbuild_for_packages.ts",
     "test-generate-cloudbuild": "cd scripts && node --require ts-node/register ../node_modules/jasmine/bin/jasmine.js run generate_cloudbuild_test.ts",
     "test-run-flaky": "jasmine run scripts/run_flaky_test.js",
 
@@ -0,0 +1,37 @@
+/**
+ * @license
+ * Copyright 2022 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+// TODO: Remove it once webgpu/types is successfully upgraded.
+// https://github.com/tensorflow/tfjs/issues/6869
+export interface GPUAdapterInfo {
+  vendor: string;
+  architecture: string;
+}
+
+export class AdapterInfo {
+  private vendor: string;
+
+  constructor(adapterInfo: GPUAdapterInfo) {
+    if (adapterInfo) {
+      this.vendor = adapterInfo.vendor;
+    }
+  }
+
+  isIntel(): boolean {
+    return this.vendor === 'intel';
+  }
+}
@@ -19,6 +19,7 @@ import './flags_webgpu';
 
 import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
 
+import {AdapterInfo, GPUAdapterInfo} from './adapter_info';
 import {BufferManager} from './buffer_manager';
 import {TextureManager} from './texture_manager';
 import * as webgpu_program from './webgpu_program';
@@ -107,6 +108,7 @@ const reshapeDispatch =
 
 export class WebGPUBackend extends KernelBackend {
   bufferManager: BufferManager;
+  adapterInfo: AdapterInfo;
   device: GPUDevice;
   queue: GPUQueue;
   tensorMap: DataStorage<TensorData>;
@@ -135,7 +137,7 @@ export class WebGPUBackend extends KernelBackend {
     return WebGPUBackend.nextDataId++;
   }
 
-  constructor(device: GPUDevice) {
+  constructor(device: GPUDevice, adapterInfo?: GPUAdapterInfo) {
     super();
     if (!webgpu_util.isWebGPUSupported()) {
       throw new Error('WebGPU is not supported on this device');
@@ -146,6 +148,7 @@ export class WebGPUBackend extends KernelBackend {
     this.currentCommandEncoder = null;
     this.currentComputePass = null;
     this.supportTimeQuery = device.features.has('timestamp-query');
+    this.adapterInfo = new AdapterInfo(adapterInfo);
 
     this.bufferManager = new BufferManager(this.device);
     this.textureManager = new TextureManager(this.device);
 
@@ -50,7 +50,9 @@ if (isWebGPUSupported()) {
       deviceDescriptor.requiredFeatures = ['timestamp-query'];
     }
     const device: GPUDevice = await adapter.requestDevice(deviceDescriptor);
-    return new WebGPUBackend(device);
+    // tslint:disable-next-line:no-any
+    const adapterInfo = await (adapter as any).requestAdapterInfo();
+    return new WebGPUBackend(device, adapterInfo);
   }, 3 /*priority*/);
 }
 
 
@@ -176,12 +176,13 @@ export class Conv2DMMProgram implements WebGPUProgram {
   tileInner: number;
   innerElementSize: number;
   isVec4?: boolean;
+  private sequentialAccessByThreads: boolean;
 
   constructor(
       convInfo: backend_util.Conv2DInfo, dimAOuter: number, dimBOuter: number,
       dimInner: number, addBias = false,
       activation: backend_util.Activation = null,
-      hasPreluActivationWeights = false) {
+      hasPreluActivationWeights = false, sequentialAccessByThreads = false) {
     this.outputShape = convInfo.outShape;
     this.isChannelsLast = convInfo.dataFormat === 'channelsLast';
     this.isVec4 =
@@ -229,6 +230,7 @@ export class Conv2DMMProgram implements WebGPUProgram {
       }
     }
 
+    this.sequentialAccessByThreads = sequentialAccessByThreads;
     this.addBias = addBias;
     this.activation = activation;
     this.hasPreluActivationWeights = hasPreluActivationWeights;
@@ -244,7 +246,8 @@ export class Conv2DMMProgram implements WebGPUProgram {
 
     this.shaderKey = `conv2DMM_${this.elementsPerThread}_${this.activation}}_${
         this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${
-        this.innerElementSize}_${this.isChannelsLast}`;
+        this.innerElementSize}_${this.isChannelsLast}_${
+        this.sequentialAccessByThreads}`;
   }
 
   getUserCode(): string {
@@ -254,7 +257,7 @@ export class Conv2DMMProgram implements WebGPUProgram {
             this.tileInner) :
         makeMatMulPackedSource(
             this.elementsPerThread, this.workGroupSize, !this.isChannelsLast,
-            this.tileInner);
+            this.tileInner, false, null, this.sequentialAccessByThreads);
     const elementsSize =
         this.isVec4 ? [this.innerElementSize, 4, 4] : [1, 1, 1];
     const userCode = `
 
@@ -184,9 +184,13 @@ export function batchMatMulImpl({
           activation, preluActivationWeights);
       break;
     case MatMulProgramType.MatMulPackedProgram:
+      // Experiments show that sequential access is more friendly for Intel
+      // GPUs.
+      const sequentialAccessByThreads = backend.adapterInfo.isIntel();
       program = new MatMulPackedProgram(
           a3dShape, outputShape, batchAEqualOne, batchBEqualOne, transposeA,
-          transposeB, bias, activation, preluActivationWeights);
+          transposeB, bias, activation, preluActivationWeights,
+          sequentialAccessByThreads);
       break;
     default:
       throw new Error(`Unsupported MatMulProgramType ${matmulProgramType}.`);
 
@@ -229,9 +229,11 @@ export function conv2DImpl({
         {type: 'int32', data: [dimAOuter]}, {type: 'int32', data: [dimBOuter]},
         {type: 'int32', data: [dimInner]});
 
+    // Experiments show that sequential access is more friendly for Intel GPUs.
+    const sequentialAccessByThreads = backend.adapterInfo.isIntel();
     program = new Conv2DMMProgram(
         convInfo, dimAOuter, dimBOuter, dimInner, hasBias, activation,
-        hasPreluActivationWeights);
+        hasPreluActivationWeights, sequentialAccessByThreads);
   }
 
   const intermediates: TensorInfo[] = [];
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,9 @@ if (isWebGPUSupported()) {`
`50`	`50`	`deviceDescriptor.requiredFeatures = ['timestamp-query'];`
`51`	`51`	`}`
`52`	`52`	`const device: GPUDevice = await adapter.requestDevice(deviceDescriptor);`
`53`		`- return new WebGPUBackend(device);`
	`53`	`+ // tslint:disable-next-line:no-any`
	`54`	`+ const adapterInfo = await (adapter as any).requestAdapterInfo();`
	`55`	`+ return new WebGPUBackend(device, adapterInfo);`
`54`	`56`	`}, 3 /priority/);`
`55`	`57`	`}`
`56`	`58`