Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address Kai's comment
  • Loading branch information
qjia7 committed Apr 1, 2021
commit c4d64c5f1c9aa42dbacdf20e57ab7baa8c85bd46
4 changes: 2 additions & 2 deletions tfjs-backend-webgpu/src/backend_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -564,8 +564,8 @@ export class WebGPUBackend extends KernelBackend {
this.uniformDisposalQueue.push(uniformInfo);
}

if (env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE') as number ===
this.commandQueue.length) {
if (env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE') as
number <= this.commandQueue.length) {
this.submitQueue();
}

Expand Down
168 changes: 79 additions & 89 deletions tfjs-backend-webgpu/src/backend_webgpu_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,40 +66,9 @@ describeWebGPU('backend webgpu cpu forwarding turned on', () => {
});

describeWebGPU('backend webgpu', () => {
it('should not leak memory when command encoder count is larger than one',
async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c = tf.mul(a, b);

const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const startNumBytesInGPU =
(tf.memory() as WebGPUMemoryInfo).numBytesInGPU;

const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
const d = tf.matMul(c, f);

const dData = await d.data();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
const endNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;

expect(endNumBytes - startNumBytes).toEqual(48);
expect(endNumTensors - startNumTensors).toEqual(2);
expect(endNumBytesInGPU - startNumBytesInGPU).toEqual(24);

tf.test_util.expectArraysClose(
dData, new Float32Array([9, 12, 15, 19, 26, 33]));
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
});

it('should not leak memory when command encoder count is one', async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
it('should not leak memory in delayed mode', async () => {
const savedFlag = tf.env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE');
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', 15);
const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

Expand All @@ -123,12 +92,41 @@ describeWebGPU('backend webgpu', () => {

tf.test_util.expectArraysClose(
dData, new Float32Array([9, 12, 15, 19, 26, 33]));
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', savedFlag);
});

it('should recycle buffers when command encoder count is one', () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
it('should not leak memory in immediate mode', async () => {
const savedFlag = tf.env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE');
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', 1);
const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c = tf.mul(a, b);

const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const startNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;

const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
const d = tf.matMul(c, f);

const dData = await d.data();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
const endNumBytesInGPU = (tf.memory() as WebGPUMemoryInfo).numBytesInGPU;

expect(endNumBytes - startNumBytes).toEqual(48);
expect(endNumTensors - startNumTensors).toEqual(2);
expect(endNumBytesInGPU - startNumBytesInGPU).toEqual(24);

tf.test_util.expectArraysClose(
dData, new Float32Array([9, 12, 15, 19, 26, 33]));
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', savedFlag);
});

it('should recycle buffers in immediate mode', () => {
const savedFlag = tf.env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE');
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', 1);
const backend = tf.backend() as WebGPUBackend;
const bufferManager = backend.getBufferManager();
bufferManager.reset();
Expand Down Expand Up @@ -162,59 +160,51 @@ describeWebGPU('backend webgpu', () => {
const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul).toEqual(0);
expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul).toEqual(2);
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', savedFlag);
});

it('should not recycle buffers when command encoder count is larger than one',
async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
const backend = tf.backend() as WebGPUBackend;
const bufferManager = backend.getBufferManager();
bufferManager.reset();

const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c = tf.mul(a, b);
const freeBuffersAfterFirstMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterFirstMul = bufferManager.getNumUsedBuffers();

const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
tf.matMul(c, f);
const freeBuffersAfterFirstMatMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterFirstMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterFirstMatMul - freeBuffersAfterFirstMul)
.toEqual(0);
expect(usedBuffersAfterFirstMatMul - usedBuffersAfterFirstMul)
.toEqual(2);

const a2 = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b2 = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c2 = tf.mul(a2, b2);
const freeBuffersAfterSecondMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterSecondMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMul - freeBuffersAfterFirstMatMul)
.toEqual(0);
expect(usedBuffersAfterSecondMul - usedBuffersAfterFirstMatMul)
.toEqual(3);

const f2 = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
const c3 = tf.matMul(c2, f2);
const freeBuffersAfterSecondMatMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul)
.toEqual(0);
expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul)
.toEqual(2);

// Tests happen within a tidy so we need to read a tensor at the end of a
// test if encoder count in queue is larger than one in order to force
// flush the disposal queue.
await c3.data();
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
});
it('should not recycle buffers in delayed mode', async () => {
const savedFlag = tf.env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE');
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', 15);
const backend = tf.backend() as WebGPUBackend;
const bufferManager = backend.getBufferManager();
bufferManager.reset();

const a = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c = tf.mul(a, b);
const freeBuffersAfterFirstMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterFirstMul = bufferManager.getNumUsedBuffers();

const f = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
tf.matMul(c, f);
const freeBuffersAfterFirstMatMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterFirstMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterFirstMatMul - freeBuffersAfterFirstMul).toEqual(0);
expect(usedBuffersAfterFirstMatMul - usedBuffersAfterFirstMul).toEqual(2);

const a2 = tf.tensor2d([2, 4, 6, 8], [2, 2]);
const b2 = tf.tensor2d([0.5, 0.5, 0.5, 0.5], [2, 2]);

const c2 = tf.mul(a2, b2);
const freeBuffersAfterSecondMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterSecondMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMul - freeBuffersAfterFirstMatMul).toEqual(0);
expect(usedBuffersAfterSecondMul - usedBuffersAfterFirstMatMul).toEqual(3);

const f2 = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);
const c3 = tf.matMul(c2, f2);
const freeBuffersAfterSecondMatMul = bufferManager.getNumFreeBuffers();
const usedBuffersAfterSecondMatMul = bufferManager.getNumUsedBuffers();
expect(freeBuffersAfterSecondMatMul - freeBuffersAfterSecondMul).toEqual(0);
expect(usedBuffersAfterSecondMatMul - usedBuffersAfterSecondMul).toEqual(2);

// Tests happen within a tidy so we need to read a tensor at the end of a
// test in delayed mode in order to force flush the disposal queue.
await c3.data();
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', savedFlag);
});

it('readSync should throw if tensors are on the GPU', async () => {
const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);
Expand Down
4 changes: 2 additions & 2 deletions tfjs-backend-webgpu/src/flags_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import {env} from '@tensorflow/tfjs-core';

const ENV = env();

/** The command encoder count submitted to the device queue. */
ENV.registerFlag('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', () => 15);
/** The batched command encoders size in the device queue. */
ENV.registerFlag('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', () => 15);

/**
* Whether we forward execution to the CPU backend if tensors are small and
Expand Down
16 changes: 8 additions & 8 deletions tfjs-backend-webgpu/src/matmul_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ import {test_util} from '@tensorflow/tfjs-core';
import {describeWebGPU} from './test_util';

describeWebGPU('matmul', () => {
it('it works when command encoder count is larger than one.', async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 15);
it('it works in delayed mode.', async () => {
const savedFlag = tf.env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE');
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', 15);
const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);
const b = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);

Expand All @@ -34,12 +34,12 @@ describeWebGPU('matmul', () => {
const dData = await d.data();
test_util.expectArraysClose(
dData, new Float32Array([0, 12, 7.5, 0, 6.5, 66]));
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', savedFlag);
});

it('it works when command encoder count is one.', async () => {
const savedFlag = tf.env().get('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE');
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', 1);
it('it works in immediate mode.', async () => {
const savedFlag = tf.env().get('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE');
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', 1);
const a = tf.tensor2d([1, 2, 3, 4], [2, 2]);
const b = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]);

Expand All @@ -51,7 +51,7 @@ describeWebGPU('matmul', () => {
const dData = await d.data();
test_util.expectArraysClose(
dData, new Float32Array([0, 12, 7.5, 0, 6.5, 66]));
tf.env().set('WEBGPU_COMMAND_ENCODER_COUNT_IN_QUEUE', savedFlag);
tf.env().set('WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE', savedFlag);
});

// tslint:disable-next-line:max-line-length
Expand Down