Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
c09ec95
feat: track expected output columns in query builder
icewind1991 Jul 31, 2024
114db05
fix: don't make ICacheFactory depend on database
icewind1991 Aug 8, 2024
c58bdbf
fix: delay calculating global cache prefix untill a cache is created
icewind1991 Aug 20, 2024
f5b3486
feat: add option to automatically partition queries by specific tables
icewind1991 Jun 13, 2024
62f8b65
feat: implement distributing partitioned queries over multiple shards
icewind1991 Jul 31, 2024
ddbeb4c
test: mark share test cleanup as running across all shards
icewind1991 Jul 16, 2024
fc05a67
fix: only allow pre-defined shards
icewind1991 Jul 18, 2024
4d9b563
test: run sharding tests in ci
icewind1991 Jul 18, 2024
390f6a7
fix: hint storage id in more places
icewind1991 Jul 19, 2024
2eaeeee
fix: run mimetype repair query across all shards
icewind1991 Jul 19, 2024
382d102
test: fix share provider tests for sharding
icewind1991 Jul 19, 2024
80a2553
fix: make background scan job compatible with sharding
icewind1991 Jul 25, 2024
e538f46
fix: adjust systemtag orphan cleanup query to work with sharding
icewind1991 Jul 31, 2024
cc091b1
fix: fix share cleanup for deleted groups with sharding
icewind1991 Aug 6, 2024
b21a399
fix: implement sharding compatible cleanup for various bits
icewind1991 Aug 15, 2024
1363e14
fix: make preload custom proterties sharding compatible
icewind1991 Aug 21, 2024
9d02485
fix: mark systemconfig value as not being tainted because they are im…
icewind1991 Aug 22, 2024
2574cbf
chore: Apply php:cs recommendations
artonge Aug 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: make background scan job compatible with sharding
Signed-off-by: Robin Appelman <[email protected]>
  • Loading branch information
icewind1991 authored and artonge committed Aug 28, 2024
commit 80a25531f73c436660458fbe88acd07c9ed6434b
70 changes: 58 additions & 12 deletions apps/files/lib/BackgroundJob/ScanFiles.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ class ScanFiles extends TimedJob {
public const USERS_PER_SESSION = 500;

public function __construct(
IConfig $config,
IConfig $config,
IEventDispatcher $dispatcher,
LoggerInterface $logger,
IDBConnection $connection,
ITimeFactory $time
LoggerInterface $logger,
IDBConnection $connection,
ITimeFactory $time
) {
parent::__construct($time);
// Run once per 10 minutes
Expand Down Expand Up @@ -70,15 +70,61 @@ protected function runScanner(string $user): void {
* @return string|false
*/
private function getUserToScan() {
if ($this->connection->getShardDefinition("filecache")) {
// for sharded filecache, the "LIMIT" from the normal query doesn't work

// first we try it with a "LEFT JOIN" on mounts, this is fast, but might return a storage that isn't mounted.
// we also ask for up to 10 results from different storages to increase the odds of finding a result that is mounted
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds brittle. So we might end up in situations where there is a storage to scan, but it is not returned by this method?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a fallback for that

$query = $this->connection->getQueryBuilder();
$query->select('m.user_id')
->from('filecache', 'f')
->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
->setMaxResults(10)
->groupBy("f.storage")
->runAcrossAllShards();

$result = $query->executeQuery();
while ($res = $result->fetch()) {
if ($res['user_id']) {
return $res['user_id'];
}
}

// as a fallback, we try a slower approach where we find all mounted storages first
// this is essentially doing the inner join manually
$storages = $this->getAllMountedStorages();

$query = $this->connection->getQueryBuilder();
$query->select('m.user_id')
->from('filecache', 'f')
->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->in('f.storage', $query->createNamedParameter($storages, IQueryBuilder::PARAM_INT_ARRAY)))
->setMaxResults(1)
->runAcrossAllShards();
return $query->executeQuery()->fetchOne();
} else {
$query = $this->connection->getQueryBuilder();
$query->select('m.user_id')
->from('filecache', 'f')
->innerJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
->setMaxResults(1)
->runAcrossAllShards();

return $query->executeQuery()->fetchOne();
}
}

private function getAllMountedStorages(): array {
$query = $this->connection->getQueryBuilder();
$query->select('user_id')
->from('filecache', 'f')
->innerJoin('f', 'mounts', 'm', $query->expr()->eq('storage_id', 'storage'))
->where($query->expr()->lt('size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->gt('parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
->setMaxResults(1);

return $query->executeQuery()->fetchOne();
$query->selectDistinct('storage_id')
->from('mounts');
return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
}

/**
Expand Down