Skip to content

Commit 3eaf530

Browse files
committed
Improve performance of find_chunks_by_dedup_key method
Signed-off-by: Karthik P S <[email protected]>
1 parent f17a42a commit 3eaf530

File tree

5 files changed

+928
-876
lines changed

5 files changed

+928
-876
lines changed

src/sdk/nb.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,7 @@ interface DBCollection {
770770

771771
executeSQL<T>(query: string, params: Array<any>, options?: { query_name?: string, preferred_pool?: string }): Promise<sqlResult<T>>;
772772
name: any;
773+
schema: any;
773774
}
774775

775776
type DBDoc = any;

src/server/object_services/map_server.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,12 @@ class GetMapping {
8585
if (!config.DEDUP_ENABLED) return;
8686
await Promise.all(Object.values(this.chunks_per_bucket).map(async chunks => {
8787
const bucket = chunks[0].bucket;
88-
const dedup_keys = _.compact(_.map(chunks,
89-
chunk => chunk.digest_b64 && Buffer.from(chunk.digest_b64, 'base64')));
88+
const dedup_keys = [];
89+
chunks.forEach(chunk => {
90+
if (chunk?.digest_b64) {
91+
dedup_keys.push(chunk.digest_b64);
92+
}
93+
});
9094
if (!dedup_keys.length) return;
9195
dbg.log0('GetMapping.find_dups: found keys', dedup_keys.length);
9296
const dup_chunks_db = await MDStore.instance().find_chunks_by_dedup_key(bucket, dedup_keys);

src/server/object_services/md_store.js

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ const mime = require('mime-types');
1313
const P = require('../../util/promise');
1414
const dbg = require('../../util/debug_module')(__filename);
1515
const db_client = require('../../util/db_client');
16+
const { decode_json } = require('../../util/postgres_client.js');
1617

1718
const mongo_functions = require('../../util/mongo_functions');
1819
const object_md_schema = require('./schemas/object_md_schema');
@@ -1541,23 +1542,28 @@ class MDStore {
15411542
* @returns {Promise<nb.ChunkSchemaDB[]>}
15421543
*/
15431544
async find_chunks_by_dedup_key(bucket, dedup_keys) {
1544-
// TODO: This is temporary patch because of binary representation in MongoDB and PostgreSQL
1545-
/** @type {nb.ChunkSchemaDB[]} */
1546-
const chunks = await this._chunks.find({
1547-
system: bucket.system._id,
1548-
bucket: bucket._id,
1549-
dedup_key: {
1550-
$in: dedup_keys,
1551-
$exists: true
1552-
},
1553-
deleted: null,
1554-
}, {
1555-
sort: {
1556-
_id: -1 // get newer chunks first
1557-
}
1558-
});
1559-
await this.load_blocks_for_chunks(chunks);
1560-
return chunks;
1545+
const values = [];
1546+
let query = `SELECT * FROM ${this._chunks.name} WHERE (data ->> 'system' = $1 AND data ->> 'bucket' = $2`;
1547+
values.push(`${bucket.system._id}`, `${bucket._id}`);
1548+
1549+
if (dedup_keys.length) {
1550+
query += ` AND (data ->> 'dedup_key' = ANY($3) AND data ? 'dedup_key')`;
1551+
values.push(dedup_keys);
1552+
} else {
1553+
query += ` AND (FALSE AND data ? 'dedup_key')`;
1554+
}
1555+
1556+
query += ` AND (data->'deleted' IS NULL OR data->'deleted' = 'null'::jsonb)) ORDER BY _id DESC;`;
1557+
1558+
try {
1559+
const res = await this._chunks.executeSQL(query, values);
1560+
const chunks = res?.rows.map(row => decode_json(this._chunks.schema, row.data));
1561+
await this.load_blocks_for_chunks(chunks);
1562+
return chunks;
1563+
} catch (err) {
1564+
dbg.error('Error while finding chunks by dedup_key. error is ', err);
1565+
return [];
1566+
}
15611567
}
15621568

15631569
iterate_all_chunks_in_buckets(lower_marker, upper_marker, buckets, limit) {

src/test/integration_tests/db/test_md_store.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,43 @@ mocha.describe('md_store', function() {
380380
return md_store.delete_chunks_by_ids(_.map(chunks, '_id'));
381381
});
382382

383+
mocha.it('find_chunks_by_dedup_key()', async () => {
384+
if (config.DB_TYPE !== 'postgres') return; // feature uses SQL path
385+
const bucket = { _id: md_store.make_md_id(), system: { _id: system_id } };
386+
const chunk = {
387+
_id: md_store.make_md_id(),
388+
system: system_id,
389+
bucket: bucket._id,
390+
frags: [{ _id: md_store.make_md_id() }],
391+
size: 10,
392+
frag_size: 10,
393+
dedup_key: Buffer.from('noobaa')
394+
};
395+
await md_store.insert_chunks([chunk]);
396+
const chunksArr = await md_store.find_chunks_by_dedup_key(bucket, [Buffer.from('noobaa').toString('base64')]);
397+
assert(Array.isArray(chunksArr));
398+
assert(chunksArr.length >= 1);
399+
assert(chunksArr[0].frags[0]?._id?.toString() === chunk.frags[0]._id.toString());
400+
});
401+
402+
mocha.it('find_chunks_by_dedup_key empty dedup_key array passed', async () => {
403+
if (config.DB_TYPE !== 'postgres') return; // feature uses SQL path
404+
const bucket = { _id: md_store.make_md_id(), system: { _id: system_id } };
405+
const chunk = {
406+
_id: md_store.make_md_id(),
407+
system: system_id,
408+
bucket: bucket._id,
409+
frags: [{ _id: md_store.make_md_id() }],
410+
size: 10,
411+
frag_size: 10,
412+
dedup_key: Buffer.from('noobaa')
413+
};
414+
await md_store.insert_chunks([chunk]);
415+
const chunksArr = await md_store.find_chunks_by_dedup_key(bucket, []);
416+
assert(Array.isArray(chunksArr));
417+
assert(chunksArr.length === 0);
418+
});
419+
383420
});
384421

385422

0 commit comments

Comments
 (0)