Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
8eaddbc
feat: track expected output columns in query builder
icewind1991 Jul 31, 2024
b1744e7
fix: don't make ICacheFactory depend on database
icewind1991 Aug 8, 2024
8f57d46
fix: delay calculating global cache prefix untill a cache is created
icewind1991 Aug 20, 2024
4ec53e7
feat: add option to automatically partition queries by specific tables
icewind1991 Jun 13, 2024
82d7eaf
feat: implement distributing partitioned queries over multiple shards
icewind1991 Jul 31, 2024
ecf1cc2
test: mark share test cleanup as running across all shards
icewind1991 Jul 16, 2024
3e51939
fix: only allow pre-defined shards
icewind1991 Jul 18, 2024
22f76fc
test: run sharding tests in ci
icewind1991 Jul 18, 2024
693ee5e
fix: hint storage id in more places
icewind1991 Jul 19, 2024
0e40fa4
fix: run mimetype repair query across all shards
icewind1991 Jul 19, 2024
1b6d76a
test: fix share provider tests for sharding
icewind1991 Jul 19, 2024
5500723
fix: make background scan job compatible with sharding
icewind1991 Jul 25, 2024
ddecae8
fix: adjust systemtag orphan cleanup query to work with sharding
icewind1991 Jul 31, 2024
dc5f0f5
fix: fix share cleanup for deleted groups with sharding
icewind1991 Aug 6, 2024
b264559
fix: implement sharding compatible cleanup for various bits
icewind1991 Aug 15, 2024
57ffbb7
fix: make preload custom proterties sharding compatible
icewind1991 Aug 21, 2024
e2bff39
fix: mark systemconfig value as not being tainted because they are im…
icewind1991 Aug 22, 2024
e5a8f99
chore: Apply php:cs recommendations
artonge Aug 28, 2024
140b36f
fix: Backport to 30
artonge Aug 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: implement sharding compatible cleanup for various bits
Signed-off-by: Robin Appelman <[email protected]>
  • Loading branch information
icewind1991 authored and artonge committed Aug 28, 2024
commit b2645590cef45a99ee7c7d549f65b3432b81e422
86 changes: 69 additions & 17 deletions apps/files/lib/BackgroundJob/DeleteOrphanedItems.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,34 +52,86 @@ public function run($argument) {
* @param string $typeCol
* @return int Number of deleted entries
*/
protected function cleanUp($table, $idCol, $typeCol) {
protected function cleanUp(string $table, string $idCol, string $typeCol): int {
$deletedEntries = 0;

$query = $this->connection->getQueryBuilder();
$query->select('t1.' . $idCol)
->from($table, 't1')
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid'))
->andWhere($query->expr()->isNull('t2.fileid'))
->groupBy('t1.' . $idCol)
->setMaxResults(self::CHUNK_SIZE);

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete($table)
->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid')));
->where($deleteQuery->expr()->eq($idCol, $deleteQuery->createParameter('objectid')));

if ($this->connection->getShardDefinition('filecache')) {
$sourceIdChunks = $this->getItemIds($table, $idCol, $typeCol, 1000);
foreach ($sourceIdChunks as $sourceIdChunk) {
$deletedSources = $this->findMissingSources($sourceIdChunk);
$deleteQuery->setParameter('objectid', $deletedSources, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}
} else {
$query = $this->connection->getQueryBuilder();
$query->select('t1.' . $idCol)
->from($table, 't1')
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid'))
->andWhere($query->expr()->isNull('t2.fileid'))
->groupBy('t1.' . $idCol)
->setMaxResults(self::CHUNK_SIZE);

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete($table)
->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid')));

$deletedInLastChunk = self::CHUNK_SIZE;
while ($deletedInLastChunk === self::CHUNK_SIZE) {
$chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
$deletedInLastChunk = count($chunk);
$deletedInLastChunk = self::CHUNK_SIZE;
while ($deletedInLastChunk === self::CHUNK_SIZE) {
$chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
$deletedInLastChunk = count($chunk);

$deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
$deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}
}

return $deletedEntries;
}

/**
* @param string $table
* @param string $idCol
* @param string $typeCol
* @param int $chunkSize
* @return \Iterator<int[]>
* @throws \OCP\DB\Exception
*/
private function getItemIds(string $table, string $idCol, string $typeCol, int $chunkSize): \Iterator {
$query = $this->connection->getQueryBuilder();
$query->select($idCol)
->from($table)
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->groupBy($idCol)
->andWhere($query->expr()->gt($idCol, $query->createParameter('min_id')))
->setMaxResults($chunkSize);

$minId = 0;
while (true) {
$query->setParameter('min_id', $minId);
$rows = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
if (count($rows) > 0) {
$minId = $rows[count($rows) - 1];
yield $rows;
} else {
break;
}
}
}

private function findMissingSources(array $ids): array {
$qb = $this->connection->getQueryBuilder();
$qb->select('fileid')
->from('filecache')
->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY)));
$found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
return array_diff($ids, $found);
}

/**
* Deleting orphaned system tag mappings
*
Expand Down
88 changes: 62 additions & 26 deletions apps/files/lib/Command/DeleteOrphanedFiles.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,18 @@ protected function configure(): void {

public function execute(InputInterface $input, OutputInterface $output): int {
$deletedEntries = 0;
$fileIdsByStorage = [];

$query = $this->connection->getQueryBuilder();
$query->select('fc.fileid')
->from('filecache', 'fc')
->where($query->expr()->isNull('s.numeric_id'))
->leftJoin('fc', 'storages', 's', $query->expr()->eq('fc.storage', 's.numeric_id'))
->setMaxResults(self::CHUNK_SIZE);
$deletedStorages = array_diff($this->getReferencedStorages(), $this->getExistingStorages());

$deleteExtended = !$input->getOption('skip-filecache-extended');
if ($deleteExtended) {
$fileIdsByStorage = $this->getFileIdsForStorages($deletedStorages);
}

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete('filecache')
->where($deleteQuery->expr()->eq('fileid', $deleteQuery->createParameter('objectid')));
->where($deleteQuery->expr()->in('storage', $deleteQuery->createParameter('storage_ids')));

$deletedInLastChunk = self::CHUNK_SIZE;
while ($deletedInLastChunk === self::CHUNK_SIZE) {
Expand All @@ -61,8 +62,8 @@ public function execute(InputInterface $input, OutputInterface $output): int {

$output->writeln("$deletedEntries orphaned file cache entries deleted");

if (!$input->getOption('skip-filecache-extended')) {
$deletedFileCacheExtended = $this->cleanupOrphanedFileCacheExtended();
if ($deleteExtended) {
$deletedFileCacheExtended = $this->cleanupOrphanedFileCacheExtended($fileIdsByStorage);
$output->writeln("$deletedFileCacheExtended orphaned file cache extended entries deleted");
}

Expand All @@ -72,28 +73,63 @@ public function execute(InputInterface $input, OutputInterface $output): int {
return self::SUCCESS;
}

private function cleanupOrphanedFileCacheExtended(): int {
$deletedEntries = 0;

private function getReferencedStorages(): array {
$query = $this->connection->getQueryBuilder();
$query->select('fce.fileid')
->from('filecache_extended', 'fce')
->leftJoin('fce', 'filecache', 'fc', $query->expr()->eq('fce.fileid', 'fc.fileid'))
->where($query->expr()->isNull('fc.fileid'))
->setMaxResults(self::CHUNK_SIZE);
$query->select('storage')
->from('filecache')
->groupBy('storage')
->runAcrossAllShards();
return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
}

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete('filecache_extended')
->where($deleteQuery->expr()->in('fileid', $deleteQuery->createParameter('idsToDelete')));
private function getExistingStorages(): array {
$query = $this->connection->getQueryBuilder();
$query->select('numeric_id')
->from('storages')
->groupBy('numeric_id');
return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
}

$result = $query->executeQuery();
while ($result->rowCount() > 0) {
$idsToDelete = $result->fetchAll(\PDO::FETCH_COLUMN);
/**
* @param int[] $storageIds
* @return array<int, int[]>
*/
private function getFileIdsForStorages(array $storageIds): array {
$query = $this->connection->getQueryBuilder();
$query->select('storage', 'fileid')
->from('filecache')
->where($query->expr()->in('storage', $query->createParameter('storage_ids')));

$result = [];
$storageIdChunks = array_chunk($storageIds, self::CHUNK_SIZE);
foreach ($storageIdChunks as $storageIdChunk) {
$query->setParameter('storage_ids', $storageIdChunk, IQueryBuilder::PARAM_INT_ARRAY);
$chunk = $query->executeQuery()->fetchAll();
foreach ($chunk as $row) {
$result[$row['storage']][] = $row['fileid'];
}
}
return $result;
}

$deleteQuery->setParameter('idsToDelete', $idsToDelete, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
/**
* @param array<int, int[]> $fileIdsByStorage
* @return int
*/
private function cleanupOrphanedFileCacheExtended(array $fileIdsByStorage): int {
$deletedEntries = 0;

$result = $query->executeQuery();
$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete('filecache_extended')
->where($deleteQuery->expr()->in('fileid', $deleteQuery->createParameter('file_ids')));

foreach ($fileIdsByStorage as $storageId => $fileIds) {
$deleteQuery->hintShardKey('storage', $storageId, true);
$fileChunks = array_chunk($fileIds, self::CHUNK_SIZE);
foreach ($fileChunks as $fileChunk) {
$deleteQuery->setParameter('file_ids', $fileChunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}
}

return $deletedEntries;
Expand Down
14 changes: 10 additions & 4 deletions apps/files/tests/Command/DeleteOrphanedFilesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,19 @@ protected function tearDown(): void {
}

protected function getFile($fileId) {
$stmt = $this->connection->executeQuery('SELECT * FROM `*PREFIX*filecache` WHERE `fileid` = ?', [$fileId]);
return $stmt->fetchAll();
$query = $this->connection->getQueryBuilder();
$query->select('*')
->from('filecache')
->where($query->expr()->eq('fileid', $query->createNamedParameter($fileId)));
return $query->executeQuery()->fetchAll();
}

protected function getMounts($storageId) {
$stmt = $this->connection->executeQuery('SELECT * FROM `*PREFIX*mounts` WHERE `storage_id` = ?', [$storageId]);
return $stmt->fetchAll();
$query = $this->connection->getQueryBuilder();
$query->select('*')
->from('mounts')
->where($query->expr()->eq('storage_id', $query->createNamedParameter($storageId)));
return $query->executeQuery()->fetchAll();
}

/**
Expand Down
41 changes: 41 additions & 0 deletions apps/files_sharing/lib/DeleteOrphanedSharesJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ public function __construct(
* @param array $argument unused argument
*/
public function run($argument) {
if ($this->db->getShardDefinition('filecache')) {
$this->shardingCleanup();
return;
}

$qbSelect = $this->db->getQueryBuilder();
$qbSelect->select('id')
->from('share', 's')
Expand Down Expand Up @@ -96,4 +101,40 @@ public function run($argument) {
}, $this->db);
} while ($deleted >= self::CHUNK_SIZE && $this->time->getTime() <= $cutOff);
}

private function shardingCleanup(): void {
$qb = $this->db->getQueryBuilder();
$qb->selectDistinct('file_source')
->from('share', 's');
$sourceFiles = $qb->executeQuery()->fetchAll(PDO::FETCH_COLUMN);

$deleteQb = $this->db->getQueryBuilder();
$deleteQb->delete('share')
->where(
$deleteQb->expr()->in('file_source', $deleteQb->createParameter('ids'), IQueryBuilder::PARAM_INT_ARRAY)
);

$chunks = array_chunk($sourceFiles, self::CHUNK_SIZE);
foreach ($chunks as $chunk) {
$deletedFiles = $this->findMissingSources($chunk);
$this->atomic(function () use ($deletedFiles, $deleteQb) {
$deleteQb->setParameter('ids', $deletedFiles, IQueryBuilder::PARAM_INT_ARRAY);
$deleted = $deleteQb->executeStatement();
$this->logger->debug("{deleted} orphaned share(s) deleted", [
'app' => 'DeleteOrphanedSharesJob',
'deleted' => $deleted,
]);
return $deleted;
}, $this->db);
}
}

private function findMissingSources(array $ids): array {
$qb = $this->db->getQueryBuilder();
$qb->select('fileid')
->from('filecache')
->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY)));
$found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
return array_diff($ids, $found);
}
}
4 changes: 2 additions & 2 deletions lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ public function executeStatement(?IDBConnection $connection = null): int {
return $this->builder->executeStatement($connection);
}

public function hintShardKey(string $column, mixed $value) {
$this->builder->hintShardKey($column, $value);
public function hintShardKey(string $column, mixed $value, bool $overwrite = false) {
$this->builder->hintShardKey($column, $value, $overwrite);
return $this;
}

Expand Down
6 changes: 5 additions & 1 deletion lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,11 @@ private function registerOrder(string $column, string $order): void {
];
}

public function hintShardKey(string $column, mixed $value) {
public function hintShardKey(string $column, mixed $value, bool $overwrite = false) {
if ($overwrite) {
$this->primaryKeys = [];
$this->shardKeys = [];
}
if ($this->shardDefinition?->isKey($column)) {
$this->primaryKeys[] = $value;
}
Expand Down
Loading