Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tfjs-backend-webgpu/src/backend_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,8 @@ export class WebGPUBackend extends KernelBackend {
this.uniformDisposalQueue.push(uniformInfo);
}

if (env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED')) {
if (env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE') as number ===
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should be >= just in case multiple command buffers end up waiting for any reason, or the WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE is set to 0.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: The fact that these are command encoders and not command buffers is I think an accident of history and not strictly necessarily true in the future. I'd pick a more targeted name like
WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE
or something

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

this.commandQueue.length) {
this.submitQueue();
}

Expand Down
172 changes: 89 additions & 83 deletions tfjs-backend-webgpu/src/backend_webgpu_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ describeWebGPU('backend webgpu cpu forwarding turned on', () => {
});

it('should not allocate GPU memory when CPU forwarding', async () => {
const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', true);

const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

Expand Down Expand Up @@ -65,43 +62,44 @@ describeWebGPU('backend webgpu cpu forwarding turned on', () => {

tf.test_util.expectArraysClose(
dData, new Float32Array([9, 12, 15, 19, 26, 33]));
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
});
});

describeWebGPU('backend webgpu', () => {
it('should not leak memory in delayed mode', async () => {
const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', false);
const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c = tf.mul(a, b);

const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const startNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;

const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
const d = tf.matMul(c, f);

const dData = await d.data();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
const endNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;

expect(endNumBytes - startNumBytes).toEqual(48);
expect(endNumTensors - startNumTensors).toEqual(2);
expect(endNumBytesInGPU - startNumBytesInGPU).toEqual(24);

tf.test_util.expectArraysClose(
dData, new Float32Array([9, 12, 15, 19, 26, 33]));
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
});

it('should not leak memory in immediate mode', async () => {
const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', true);
it('should not leak memory when command encoder count is larger than one',
async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c = tf.mul(a, b);

const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const startNumBytesInGPU =
(tf.memory() as WebGPUMemoryInfo).numBytesInGPU;

const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
const d = tf.matMul(c, f);

const dData = await d.data();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
const endNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;

expect(endNumBytes - startNumBytes).toEqual(48);
expect(endNumTensors - startNumTensors).toEqual(2);
expect(endNumBytesInGPU - startNumBytesInGPU).toEqual(24);

tf.test_util.expectArraysClose(
dData, new Float32Array([9, 12, 15, 19, 26, 33]));
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
});

it('should not leak memory when command encoder count is one', async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

Expand All @@ -125,12 +123,12 @@ describeWebGPU('backend webgpu', () => {

tf.test_util.expectArraysClose(
dData, new Float32Array([9, 12, 15, 19, 26, 33]));
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
});

it('should recycle buffers in immediate mode', () => {
const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', true);
it('should recycle buffers when command encoder count is one', () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
const backend = tf.backend() as WebGPUBackend;
const bufferManager = backend.getBufferManager();
bufferManager.reset();
Expand Down Expand Up @@ -164,51 +162,59 @@ describeWebGPU('backend webgpu', () => {
const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul).toEqual(0);
expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul).toEqual(2);
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
});

it('should not recycle buffers in delayed mode', async () => {
const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', false);
const backend = tf.backend() as WebGPUBackend;
const bufferManager = backend.getBufferManager();
bufferManager.reset();

const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c = tf.mul(a, b);
const freeBuffersAfterFirstMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterFirstMul = bufferManager.getNumUsedBuffers();

const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
tf.matMul(c, f);
const freeBuffersAfterFirstMatMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterFirstMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterFirstMatMul - freeBuffersAfterFirstMul).toEqual(0);
expect(usedBuffersAfterFirstMatMul - usedBuffersAfterFirstMul).toEqual(2);

const a2 = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b2 = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c2 = tf.mul(a2, b2);
const freeBuffersAfterSecondMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterSecondMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMul - freeBuffersAfterFirstMatMul).toEqual(0);
expect(usedBuffersAfterSecondMul - usedBuffersAfterFirstMatMul).toEqual(3);

const f2 = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
const c3 = tf.matMul(c2, f2);
const freeBuffersAfterSecondMatMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul).toEqual(0);
expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul).toEqual(2);

// Tests happen within a tidy so we need to read a tensor at the end of a
// test in delayed mode in order to force flush the disposal queue.
await c3.data();
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
});
it('should not recycle buffers when command encoder count is larger than one',
async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
const backend = tf.backend() as WebGPUBackend;
const bufferManager = backend.getBufferManager();
bufferManager.reset();

const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c = tf.mul(a, b);
const freeBuffersAfterFirstMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterFirstMul = bufferManager.getNumUsedBuffers();

const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
tf.matMul(c, f);
const freeBuffersAfterFirstMatMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterFirstMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterFirstMatMul - freeBuffersAfterFirstMul)
.toEqual(0);
expect(usedBuffersAfterFirstMatMul - usedBuffersAfterFirstMul)
.toEqual(2);

const a2 = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b2 = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c2 = tf.mul(a2, b2);
const freeBuffersAfterSecondMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterSecondMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMul - freeBuffersAfterFirstMatMul)
.toEqual(0);
expect(usedBuffersAfterSecondMul - usedBuffersAfterFirstMatMul)
.toEqual(3);

const f2 = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
const c3 = tf.matMul(c2, f2);
const freeBuffersAfterSecondMatMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul)
.toEqual(0);
expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul)
.toEqual(2);

// Tests happen within a tidy so we need to read a tensor at the end of a
// test if encoder count in queue is larger than one in order to force
// flush the disposal queue.
await c3.data();
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
});

it('readSync should throw if tensors are on the GPU', async () => {
const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);
Expand Down
4 changes: 2 additions & 2 deletions tfjs-backend-webgpu/src/flags_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import {env} from '@tensorflow/tfjs-core';

const ENV = env();

/** Whether we submit commands to the device queue immediately. */
ENV.registerFlag('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', () => true);
/** The command encoder count submitted to the device queue. */
ENV.registerFlag('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', () => 15);

/**
* Whether we forward execution to the CPU backend if tensors are small and
Expand Down
16 changes: 8 additions & 8 deletions tfjs-backend-webgpu/src/matmul_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ import {test_util} from '@tensorflow/tfjs-core';
import {describeWebGPU} from './test_util';

describeWebGPU('matmul', () => {
it('it works in delayed mode.', async () => {
const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', false);
it('it works when command encoder count is larger than one.', async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);
const b = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);

Expand All @@ -34,12 +34,12 @@ describeWebGPU('matmul', () => {
const dData = await d.data();
test_util.expectArraysClose(
dData, new Float32Array([0, 12, 7.5, 0, 6.5, 66]));
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
});

it('it works in immediate mode.', async () => {
const savedFlag = tf.env().get('WEBGPU_IMMEDIATE_EXECUTION_ENABLED');
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', true);
it('it works when command encoder count is one.', async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);
const b = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);

Expand All @@ -51,7 +51,7 @@ describeWebGPU('matmul', () => {
const dData = await d.data();
test_util.expectArraysClose(
dData, new Float32Array([0, 12, 7.5, 0, 6.5, 66]));
tf.env().set('WEBGPU_IMMEDIATE_EXECUTION_ENABLED', savedFlag);
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
});

// tslint:disable-next-line:max-line-length
Expand Down