tensorflow · qjia7 · Apr 2, 2021 · Mar 31, 2021 · Apr 1, 2021 · Apr 2, 2021
diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -564,7 +564,8 @@ export class WebGPUBackend extends KernelBackend {
       this.uniformDisposalQueue.push(uniformInfo);
     }
 
-    if (env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED')) {
+    if (env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE') as number ===
+        this.commandQueue.length) {
       this.submitQueue();
     }
 

diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -35,9 +35,6 @@ describeWebGPU('backend webgpu cpu forwarding turned on', () => {
   });
 
   it('should not allocate GPU memory when CPU forwarding', async () => {
-    const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', true);
-
     const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
     const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);
 
@@ -65,43 +62,44 @@ describeWebGPU('backend webgpu cpu forwarding turned on', () => {
 
     tf.test_util.expectArraysClose(
         dData, new Float32Array([9, 12, 15, 19, 26, 33]));
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
   });
 });
 
 describeWebGPU('backend webgpu', () => {
-  it('should not leak memory in delayed mode', async () => {
-    const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', false);
-    const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
-    const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);
-
-    const c = tf.mul(a, b);
-
-    const startNumBytes = tf.memory().numBytes;
-    const startNumTensors = tf.memory().numTensors;
-    const startNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;
-
-    const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
-    const d = tf.matMul(c, f);
-
-    const dData = await d.data();
-    const endNumBytes = tf.memory().numBytes;
-    const endNumTensors = tf.memory().numTensors;
-    const endNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;
-
-    expect(endNumBytes - startNumBytes).toEqual(48);
-    expect(endNumTensors - startNumTensors).toEqual(2);
-    expect(endNumBytesInGPU - startNumBytesInGPU).toEqual(24);
-
-    tf.test_util.expectArraysClose(
-        dData, new Float32Array([9, 12, 15, 19, 26, 33]));
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
-  });
-
-  it('should not leak memory in immediate mode', async () => {
-    const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', true);
+  it('should not leak memory when command encoder count is larger than one',
+     async () => {
+       const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
+       tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
+       const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
+       const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);
+
+       const c = tf.mul(a, b);
+
+       const startNumBytes = tf.memory().numBytes;
+       const startNumTensors = tf.memory().numTensors;
+       const startNumBytesInGPU =
+           (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;
+
+       const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
+       const d = tf.matMul(c, f);
+
+       const dData = await d.data();
+       const endNumBytes = tf.memory().numBytes;
+       const endNumTensors = tf.memory().numTensors;
+       const endNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;
+
+       expect(endNumBytes - startNumBytes).toEqual(48);
+       expect(endNumTensors - startNumTensors).toEqual(2);
+       expect(endNumBytesInGPU - startNumBytesInGPU).toEqual(24);
+
+       tf.test_util.expectArraysClose(
+           dData, new Float32Array([9, 12, 15, 19, 26, 33]));
+       tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
+     });
+
+  it('should not leak memory when command encoder count is one', async () => {
+    const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
+    tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
     const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
     const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);
 
@@ -125,12 +123,12 @@ describeWebGPU('backend webgpu', () => {
 
     tf.test_util.expectArraysClose(
         dData, new Float32Array([9, 12, 15, 19, 26, 33]));
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
+    tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
   });
 
-  it('should recycle buffers in immediate mode', () => {
-    const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', true);
+  it('should recycle buffers when command encoder count is one', () => {
+    const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
+    tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
     const backend = tf.backend() as WebGPUBackend;
     const bufferManager = backend.getBufferManager();
     bufferManager.reset();
@@ -164,51 +162,59 @@ describeWebGPU('backend webgpu', () => {
     const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
     expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul).toEqual(0);
     expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul).toEqual(2);
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
+    tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
   });
 
-  it('should not recycle buffers in delayed mode', async () => {
-    const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', false);
-    const backend = tf.backend() as WebGPUBackend;
-    const bufferManager = backend.getBufferManager();
-    bufferManager.reset();
-
-    const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
-    const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);
-
-    const c = tf.mul(a, b);
-    const freeBuffersAfterFirstMul = bufferManager.getNumFreeBuffers();
-    const usedBuffersAfterFirstMul = bufferManager.getNumUsedBuffers();
-
-    const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
-    tf.matMul(c, f);
-    const freeBuffersAfterFirstMatMul = bufferManager.getNumFreeBuffers();
-    const usedBuffersAfterFirstMatMul = bufferManager.getNumUsedBuffers();
-    expect(freeBuffersAfterFirstMatMul - freeBuffersAfterFirstMul).toEqual(0);
-    expect(usedBuffersAfterFirstMatMul - usedBuffersAfterFirstMul).toEqual(2);
-
-    const a2 = tf.tensor2d([2, 4, 6, 8], [2, 2]);
-    const b2 = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);
-
-    const c2 = tf.mul(a2, b2);
-    const freeBuffersAfterSecondMul = bufferManager.getNumFreeBuffers();
-    const usedBuffersAfterSecondMul = bufferManager.getNumUsedBuffers();
-    expect(freeBuffersAfterSecondMul - freeBuffersAfterFirstMatMul).toEqual(0);
-    expect(usedBuffersAfterSecondMul - usedBuffersAfterFirstMatMul).toEqual(3);
-
-    const f2 = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
-    const c3 = tf.matMul(c2, f2);
-    const freeBuffersAfterSecondMatMul = bufferManager.getNumFreeBuffers();
-    const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
-    expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul).toEqual(0);
-    expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul).toEqual(2);
-
-    // Tests happen within a tidy so we need to read a tensor at the end of a
-    // test in delayed mode in order to force flush the disposal queue.
-    await c3.data();
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
-  });
+  it('should not recycle buffers when command encoder count is larger than one',
+     async () => {
+       const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
+       tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
+       const backend = tf.backend() as WebGPUBackend;
+       const bufferManager = backend.getBufferManager();
+       bufferManager.reset();
+
+       const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
+       const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);
+
+       const c = tf.mul(a, b);
+       const freeBuffersAfterFirstMul = bufferManager.getNumFreeBuffers();
+       const usedBuffersAfterFirstMul = bufferManager.getNumUsedBuffers();
+
+       const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
+       tf.matMul(c, f);
+       const freeBuffersAfterFirstMatMul = bufferManager.getNumFreeBuffers();
+       const usedBuffersAfterFirstMatMul = bufferManager.getNumUsedBuffers();
+       expect(freeBuffersAfterFirstMatMul - freeBuffersAfterFirstMul)
+           .toEqual(0);
+       expect(usedBuffersAfterFirstMatMul - usedBuffersAfterFirstMul)
+           .toEqual(2);
+
+       const a2 = tf.tensor2d([2, 4, 6, 8], [2, 2]);
+       const b2 = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);
+
+       const c2 = tf.mul(a2, b2);
+       const freeBuffersAfterSecondMul = bufferManager.getNumFreeBuffers();
+       const usedBuffersAfterSecondMul = bufferManager.getNumUsedBuffers();
+       expect(freeBuffersAfterSecondMul - freeBuffersAfterFirstMatMul)
+           .toEqual(0);
+       expect(usedBuffersAfterSecondMul - usedBuffersAfterFirstMatMul)
+           .toEqual(3);
+
+       const f2 = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
+       const c3 = tf.matMul(c2, f2);
+       const freeBuffersAfterSecondMatMul = bufferManager.getNumFreeBuffers();
+       const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
+       expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul)
+           .toEqual(0);
+       expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul)
+           .toEqual(2);
+
+       // Tests happen within a tidy so we need to read a tensor at the end of a
+       // test if encoder count in queue is larger than one in order to force
+       // flush the disposal queue.
+       await c3.data();
+       tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
+     });
 
   it('readSync should throw if tensors are on the GPU', async () => {
     const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);

diff --git a/tfjs-backend-webgpu/src/flags_webgpu.ts b/tfjs-backend-webgpu/src/flags_webgpu.ts
@@ -19,8 +19,8 @@ import {env} from '@tensorflow/tfjs-core';
 
 const ENV = env();
 
-/** Whether we submit commands to the device queue immediately. */
-ENV.registerFlag('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', () => true);
+/** The command encoder count submitted to the device queue. */
+ENV.registerFlag('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', () => 15);
 
 /**
  * Whether we forward execution to the CPU backend if tensors are small and

diff --git a/tfjs-backend-webgpu/src/matmul_test.ts b/tfjs-backend-webgpu/src/matmul_test.ts
@@ -20,9 +20,9 @@ import {test_util} from '@tensorflow/tfjs-core';
 import {describeWebGPU} from './test_util';
 
 describeWebGPU('matmul', () => {
-  it('it works in delayed mode.', async () => {
-    const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', false);
+  it('it works when command encoder count is larger than one.', async () => {
+    const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
+    tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
     const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);
     const b = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
 
@@ -34,12 +34,12 @@ describeWebGPU('matmul', () => {
     const dData = await d.data();
     test_util.expectArraysClose(
         dData, new Float32Array([0, 12, 7.5, 0, 6.5, 66]));
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
+    tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
   });
 
-  it('it works in immediate mode.', async () => {
-    const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', true);
+  it('it works when command encoder count is one.', async () => {
+    const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
+    tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
     const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);
     const b = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
 
@@ -51,7 +51,7 @@ describeWebGPU('matmul', () => {
     const dData = await d.data();
     test_util.expectArraysClose(
         dData, new Float32Array([0, 12, 7.5, 0, 6.5, 66]));
-    tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
+    tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
   });
 
   // tslint:disable-next-line:max-line-length