From 2e14a7a4a6efb5444fb65e0c2368e3420d024d90 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 21 Sep 2023 13:49:16 +0200 Subject: [PATCH 1/6] optimize query pattern used by storage filter Signed-off-by: Robin Appelman --- lib/composer/composer/LICENSE | 2 - lib/composer/composer/autoload_classmap.php | 5 + lib/composer/composer/autoload_static.php | 5 + lib/private/Files/Cache/SearchBuilder.php | 128 ++++++++++++----- .../QueryOptimizer/FlattenNestedBool.php | 30 ++++ .../FlattenSingleArgumentBinaryOperation.php | 27 ++++ .../MergeDistributiveOperations.php | 99 +++++++++++++ .../Search/QueryOptimizer/OrEqualsToIn.php | 68 +++++++++ .../Search/QueryOptimizer/QueryOptimizer.php | 13 +- .../QueryOptimizer/ReplacingOptimizerStep.php | 31 ++++ .../Files/Search/SearchBinaryOperator.php | 19 ++- lib/private/Files/Search/SearchComparison.php | 10 +- lib/public/Files/Search/ISearchComparison.php | 5 +- tests/lib/Files/Cache/SearchBuilderTest.php | 1 + .../Search/QueryOptimizer/CombinedTests.php | 45 ++++++ .../QueryOptimizer/FlattenNestedBoolTest.php | 42 ++++++ .../MergeDistributiveOperationsTest.php | 133 ++++++++++++++++++ .../QueryOptimizer/OrEqualsToInTest.php | 120 ++++++++++++++++ 18 files changed, 731 insertions(+), 52 deletions(-) create mode 100644 lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php create mode 100644 lib/private/Files/Search/QueryOptimizer/FlattenSingleArgumentBinaryOperation.php create mode 100644 lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php create mode 100644 lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php create mode 100644 lib/private/Files/Search/QueryOptimizer/ReplacingOptimizerStep.php create mode 100644 tests/lib/Files/Search/QueryOptimizer/CombinedTests.php create mode 100644 tests/lib/Files/Search/QueryOptimizer/FlattenNestedBoolTest.php create mode 100644 tests/lib/Files/Search/QueryOptimizer/MergeDistributiveOperationsTest.php create mode 100644 tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php diff --git a/lib/composer/composer/LICENSE b/lib/composer/composer/LICENSE index f27399a042d95..62ecfd8d0046b 100644 --- a/lib/composer/composer/LICENSE +++ b/lib/composer/composer/LICENSE @@ -1,4 +1,3 @@ - Copyright (c) Nils Adermann, Jordi Boggiano Permission is hereby granted, free of charge, to any person obtaining a copy @@ -18,4 +17,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - diff --git a/lib/composer/composer/autoload_classmap.php b/lib/composer/composer/autoload_classmap.php index 1ed555d7a7297..06977abf57fdd 100644 --- a/lib/composer/composer/autoload_classmap.php +++ b/lib/composer/composer/autoload_classmap.php @@ -1406,9 +1406,14 @@ 'OC\\Files\\ObjectStore\\Swift' => $baseDir . '/lib/private/Files/ObjectStore/Swift.php', 'OC\\Files\\ObjectStore\\SwiftFactory' => $baseDir . '/lib/private/Files/ObjectStore/SwiftFactory.php', 'OC\\Files\\ObjectStore\\SwiftV2CachingAuthService' => $baseDir . '/lib/private/Files/ObjectStore/SwiftV2CachingAuthService.php', + 'OC\\Files\\Search\\QueryOptimizer\\FlattenNestedBool' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php', + 'OC\\Files\\Search\\QueryOptimizer\\FlattenSingleArgumentBinaryOperation' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/FlattenSingleArgumentBinaryOperation.php', + 'OC\\Files\\Search\\QueryOptimizer\\MergeDistributiveOperations' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php', + 'OC\\Files\\Search\\QueryOptimizer\\OrEqualsToIn' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php', 'OC\\Files\\Search\\QueryOptimizer\\PathPrefixOptimizer' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/PathPrefixOptimizer.php', 'OC\\Files\\Search\\QueryOptimizer\\QueryOptimizer' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php', 'OC\\Files\\Search\\QueryOptimizer\\QueryOptimizerStep' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/QueryOptimizerStep.php', + 'OC\\Files\\Search\\QueryOptimizer\\ReplacingOptimizerStep' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/ReplacingOptimizerStep.php', 'OC\\Files\\Search\\SearchBinaryOperator' => $baseDir . '/lib/private/Files/Search/SearchBinaryOperator.php', 'OC\\Files\\Search\\SearchComparison' => $baseDir . '/lib/private/Files/Search/SearchComparison.php', 'OC\\Files\\Search\\SearchOrder' => $baseDir . '/lib/private/Files/Search/SearchOrder.php', diff --git a/lib/composer/composer/autoload_static.php b/lib/composer/composer/autoload_static.php index aa5951dc44fe2..9621351352da5 100644 --- a/lib/composer/composer/autoload_static.php +++ b/lib/composer/composer/autoload_static.php @@ -1439,9 +1439,14 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2 'OC\\Files\\ObjectStore\\Swift' => __DIR__ . '/../../..' . '/lib/private/Files/ObjectStore/Swift.php', 'OC\\Files\\ObjectStore\\SwiftFactory' => __DIR__ . '/../../..' . '/lib/private/Files/ObjectStore/SwiftFactory.php', 'OC\\Files\\ObjectStore\\SwiftV2CachingAuthService' => __DIR__ . '/../../..' . '/lib/private/Files/ObjectStore/SwiftV2CachingAuthService.php', + 'OC\\Files\\Search\\QueryOptimizer\\FlattenNestedBool' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php', + 'OC\\Files\\Search\\QueryOptimizer\\FlattenSingleArgumentBinaryOperation' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/FlattenSingleArgumentBinaryOperation.php', + 'OC\\Files\\Search\\QueryOptimizer\\MergeDistributiveOperations' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php', + 'OC\\Files\\Search\\QueryOptimizer\\OrEqualsToIn' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php', 'OC\\Files\\Search\\QueryOptimizer\\PathPrefixOptimizer' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/PathPrefixOptimizer.php', 'OC\\Files\\Search\\QueryOptimizer\\QueryOptimizer' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php', 'OC\\Files\\Search\\QueryOptimizer\\QueryOptimizerStep' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/QueryOptimizerStep.php', + 'OC\\Files\\Search\\QueryOptimizer\\ReplacingOptimizerStep' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/ReplacingOptimizerStep.php', 'OC\\Files\\Search\\SearchBinaryOperator' => __DIR__ . '/../../..' . '/lib/private/Files/Search/SearchBinaryOperator.php', 'OC\\Files\\Search\\SearchComparison' => __DIR__ . '/../../..' . '/lib/private/Files/Search/SearchComparison.php', 'OC\\Files\\Search\\SearchOrder' => __DIR__ . '/../../..' . '/lib/private/Files/Search/SearchOrder.php', diff --git a/lib/private/Files/Cache/SearchBuilder.php b/lib/private/Files/Cache/SearchBuilder.php index 38161ec9cc622..fe021a62e9ed0 100644 --- a/lib/private/Files/Cache/SearchBuilder.php +++ b/lib/private/Files/Cache/SearchBuilder.php @@ -48,6 +48,7 @@ class SearchBuilder { ISearchComparison::COMPARE_LESS_THAN => 'lt', ISearchComparison::COMPARE_LESS_THAN_EQUAL => 'lte', ISearchComparison::COMPARE_DEFINED => 'isNotNull', + ISearchComparison::COMPARE_IN => 'in', ]; protected static $searchOperatorNegativeMap = [ @@ -59,6 +60,34 @@ class SearchBuilder { ISearchComparison::COMPARE_LESS_THAN => 'gte', ISearchComparison::COMPARE_LESS_THAN_EQUAL => 'gt', ISearchComparison::COMPARE_DEFINED => 'isNull', + ISearchComparison::COMPARE_IN => 'notIn', + ]; + + protected static $fieldTypes = [ + 'mimetype' => 'string', + 'mtime' => 'integer', + 'name' => 'string', + 'path' => 'string', + 'size' => 'integer', + 'tagname' => 'string', + 'systemtag' => 'string', + 'favorite' => 'boolean', + 'fileid' => 'integer', + 'storage' => 'integer', + 'share_with' => 'string', + 'share_type' => 'integer', + 'owner' => 'string', + ]; + + protected static $paramTypeMap = [ + 'string' => IQueryBuilder::PARAM_STR, + 'integer' => IQueryBuilder::PARAM_INT, + 'boolean' => IQueryBuilder::PARAM_BOOL, + ]; + protected static $paramArrayTypeMap = [ + 'string' => IQueryBuilder::PARAM_STR_ARRAY, + 'integer' => IQueryBuilder::PARAM_INT_ARRAY, + 'boolean' => IQueryBuilder::PARAM_INT_ARRAY, ]; public const TAG_FAVORITE = '_$!!$_'; @@ -142,31 +171,56 @@ private function searchComparisonToDBExpr( ?IMetadataQuery $metadataQuery = null ) { if ($comparison->getExtra()) { - [$field, $value, $type] = $this->getExtraOperatorField($comparison, $metadataQuery); + [$field, $value, $type, $paramType] = $this->getExtraOperatorField($comparison, $metadataQuery); } else { - [$field, $value, $type] = $this->getOperatorFieldAndValue($comparison); + [$field, $value, $type, $paramType] = $this->getOperatorFieldAndValue($comparison); } if (isset($operatorMap[$type])) { $queryOperator = $operatorMap[$type]; - return $builder->expr()->$queryOperator($field, $this->getParameterForValue($builder, $value)); + return $builder->expr()->$queryOperator($field, $this->getParameterForValue($builder, $value, $paramType)); } else { throw new \InvalidArgumentException('Invalid operator type: ' . $comparison->getType()); } } - private function getOperatorFieldAndValue(ISearchComparison $operator) { + /** + * @param ISearchComparison $operator + * @return list{string, string|integer|\DateTime|(\DateTime|int|string)[], string, string} + */ + private function getOperatorFieldAndValue(ISearchComparison $operator): array { $this->validateComparison($operator); - $field = $operator->getField(); $value = $operator->getValue(); $type = $operator->getType(); + $pathEqHash = $operator->getQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, true); + return $this->getOperatorFieldAndValueInner($field, $value, $type, $pathEqHash); + } + /** + * @param string $field + * @param string|integer|\DateTime|(\DateTime|int|string)[] $value + * @param string $type + * @return list{string, string|integer|\DateTime|(\DateTime|int|string)[], string, string} + */ + private function getOperatorFieldAndValueInner(string $field, mixed $value, string $type, bool $pathEqHash): array { + $paramType = self::$fieldTypes[$field]; + if ($type === ISearchComparison::COMPARE_IN) { + $resultField = $field; + $values = []; + foreach ($value as $arrayValue) { + /** @var string|integer|\DateTime $arrayValue */ + [$arrayField, $arrayValue] = $this->getOperatorFieldAndValueInner($field, $arrayValue, ISearchComparison::COMPARE_EQUAL, $pathEqHash); + $resultField = $arrayField; + $values[] = $arrayValue; + } + return [$resultField, $values, ISearchComparison::COMPARE_IN, $paramType]; + } if ($field === 'mimetype') { $value = (string)$value; - if ($operator->getType() === ISearchComparison::COMPARE_EQUAL) { + if ($type === ISearchComparison::COMPARE_EQUAL) { $value = (int)$this->mimetypeLoader->getId($value); - } elseif ($operator->getType() === ISearchComparison::COMPARE_LIKE) { + } elseif ($type === ISearchComparison::COMPARE_LIKE) { // transform "mimetype='foo/%'" to "mimepart='foo'" if (preg_match('|(.+)/%|', $value, $matches)) { $field = 'mimepart'; @@ -183,6 +237,7 @@ private function getOperatorFieldAndValue(ISearchComparison $operator) { } elseif ($field === 'favorite') { $field = 'tag.category'; $value = self::TAG_FAVORITE; + $paramType = 'string'; } elseif ($field === 'name') { $field = 'file.name'; } elseif ($field === 'tagname') { @@ -191,53 +246,49 @@ private function getOperatorFieldAndValue(ISearchComparison $operator) { $field = 'systemtag.name'; } elseif ($field === 'fileid') { $field = 'file.fileid'; - } elseif ($field === 'path' && $type === ISearchComparison::COMPARE_EQUAL && $operator->getQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, true)) { + } elseif ($field === 'path' && $type === ISearchComparison::COMPARE_EQUAL && $pathEqHash) { $field = 'path_hash'; $value = md5((string)$value); } elseif ($field === 'owner') { $field = 'uid_owner'; } - return [$field, $value, $type]; + return [$field, $value, $type, $paramType]; } private function validateComparison(ISearchComparison $operator) { - $types = [ - 'mimetype' => 'string', - 'mtime' => 'integer', - 'name' => 'string', - 'path' => 'string', - 'size' => 'integer', - 'tagname' => 'string', - 'systemtag' => 'string', - 'favorite' => 'boolean', - 'fileid' => 'integer', - 'storage' => 'integer', - 'share_with' => 'string', - 'share_type' => 'integer', - 'owner' => 'string', - ]; $comparisons = [ - 'mimetype' => ['eq', 'like'], + 'mimetype' => ['eq', 'like', 'in'], 'mtime' => ['eq', 'gt', 'lt', 'gte', 'lte'], - 'name' => ['eq', 'like', 'clike'], - 'path' => ['eq', 'like', 'clike'], + 'name' => ['eq', 'like', 'clike', 'in'], + 'path' => ['eq', 'like', 'clike', 'in'], 'size' => ['eq', 'gt', 'lt', 'gte', 'lte'], 'tagname' => ['eq', 'like'], 'systemtag' => ['eq', 'like'], 'favorite' => ['eq'], - 'fileid' => ['eq'], - 'storage' => ['eq'], + 'fileid' => ['eq', 'in'], + 'storage' => ['eq', 'in'], 'share_with' => ['eq'], 'share_type' => ['eq'], 'owner' => ['eq'], ]; - if (!isset($types[$operator->getField()])) { + if (!isset(self::$fieldTypes[$operator->getField()])) { throw new \InvalidArgumentException('Unsupported comparison field ' . $operator->getField()); } - $type = $types[$operator->getField()]; - if (gettype($operator->getValue()) !== $type) { - throw new \InvalidArgumentException('Invalid type for field ' . $operator->getField()); + $type = self::$fieldTypes[$operator->getField()]; + if ($operator->getType() === ISearchComparison::COMPARE_IN) { + if (!is_array($operator->getValue())) { + throw new \InvalidArgumentException('Invalid type for field ' . $operator->getField()); + } + foreach ($operator->getValue() as $arrayValue) { + if (gettype($arrayValue) !== $type) { + throw new \InvalidArgumentException('Invalid type in array for field ' . $operator->getField()); + } + } + } else { + if (gettype($operator->getValue()) !== $type) { + throw new \InvalidArgumentException('Invalid type for field ' . $operator->getField()); + } } if (!in_array($operator->getType(), $comparisons[$operator->getField()])) { throw new \InvalidArgumentException('Unsupported comparison for field ' . $operator->getField() . ': ' . $operator->getType()); @@ -246,6 +297,7 @@ private function validateComparison(ISearchComparison $operator) { private function getExtraOperatorField(ISearchComparison $operator, IMetadataQuery $metadataQuery): array { + $paramType = self::$fieldTypes[$field]; $field = $operator->getField(); $value = $operator->getValue(); $type = $operator->getType(); @@ -259,17 +311,17 @@ private function getExtraOperatorField(ISearchComparison $operator, IMetadataQue throw new \InvalidArgumentException('Invalid extra type: ' . $operator->getExtra()); } - return [$field, $value, $type]; + return [$field, $value, $type, $paramType]; } - private function getParameterForValue(IQueryBuilder $builder, $value) { + private function getParameterForValue(IQueryBuilder $builder, $value, string $paramType) { if ($value instanceof \DateTime) { $value = $value->getTimestamp(); } - if (is_numeric($value)) { - $type = IQueryBuilder::PARAM_INT; + if (is_array($value)) { + $type = self::$paramArrayTypeMap[$paramType]; } else { - $type = IQueryBuilder::PARAM_STR; + $type = self::$paramTypeMap[$paramType]; } return $builder->createNamedParameter($value, $type); } diff --git a/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php b/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php new file mode 100644 index 0000000000000..7f75731fe94ba --- /dev/null +++ b/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php @@ -0,0 +1,30 @@ +getType() === ISearchBinaryOperator::OPERATOR_OR || + $operator->getType() === ISearchBinaryOperator::OPERATOR_AND + ) + ) { + $newArguments = []; + foreach ($operator->getArguments() as $oldArgument) { + if ($oldArgument instanceof SearchBinaryOperator && $oldArgument->getType() === $operator->getType()) { + $newArguments = array_merge($newArguments, $oldArgument->getArguments()); + } else { + $newArguments[] = $oldArgument; + } + } + $operator->setArguments($newArguments); + } + parent::processOperator($operator); + } + +} diff --git a/lib/private/Files/Search/QueryOptimizer/FlattenSingleArgumentBinaryOperation.php b/lib/private/Files/Search/QueryOptimizer/FlattenSingleArgumentBinaryOperation.php new file mode 100644 index 0000000000000..2c32f2e01746a --- /dev/null +++ b/lib/private/Files/Search/QueryOptimizer/FlattenSingleArgumentBinaryOperation.php @@ -0,0 +1,27 @@ +getArguments()) === 1 && + ( + $operator->getType() === ISearchBinaryOperator::OPERATOR_OR || + $operator->getType() === ISearchBinaryOperator::OPERATOR_AND + ) + ) { + $operator = $operator->getArguments()[0]; + return true; + } + return false; + } +} diff --git a/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php b/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php new file mode 100644 index 0000000000000..62ef2cb2e8ede --- /dev/null +++ b/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php @@ -0,0 +1,99 @@ +isAllSameBinaryOperation($operator->getArguments()) + ) { + $topLevelType = $operator->getType(); + + $groups = $this->groupBinaryOperatorsByChild($operator->getArguments(), 0); + $outerOperations = array_map(function (array $operators) use ($topLevelType) { + if (count($operators) === 1) { + return $operators[0]; + } + /** @var ISearchBinaryOperator $firstArgument */ + $firstArgument = $operators[0]; + $outerType = $firstArgument->getType(); + $extractedLeftHand = $firstArgument->getArguments()[0]; + + $rightHandArguments = array_map(function (ISearchOperator $inner) { + /** @var ISearchBinaryOperator $inner */ + $arguments = $inner->getArguments(); + array_shift($arguments); + if (count($arguments) === 1) { + return $arguments[0]; + } + return new SearchBinaryOperator($inner->getType(), $arguments); + }, $operators); + $extractedRightHand = new SearchBinaryOperator($topLevelType, $rightHandArguments); + return new SearchBinaryOperator( + $outerType, + [$extractedLeftHand, $extractedRightHand] + ); + }, $groups); + $operator = new SearchBinaryOperator($topLevelType, $outerOperations); + parent::processOperator($operator); + return true; + } + return parent::processOperator($operator); + } + + /** + * Check that a list of operators is all the same type of (non-empty) binary operators + * + * @param ISearchOperator[] $operators + * @return bool + * @psalm-assert-if-true SearchBinaryOperator[] $operators + */ + private function isAllSameBinaryOperation(array $operators): bool { + $operation = null; + foreach ($operators as $operator) { + if (!$operator instanceof SearchBinaryOperator) { + return false; + } + if (!$operator->getArguments()) { + return false; + } + if ($operation === null) { + $operation = $operator->getType(); + } else { + if ($operation !== $operator->getType()) { + return false; + } + } + } + return true; + } + + /** + * Group a list of binary search operators that have a common argument + * + * @param SearchBinaryOperator[] $operators + * @return SearchBinaryOperator[][] + */ + private function groupBinaryOperatorsByChild(array $operators, int $index = 0): array { + $result = []; + foreach ($operators as $operator) { + /** @var SearchBinaryOperator|SearchComparison $child */ + $child = $operator->getArguments()[$index]; + ; + $childKey = (string) $child; + $result[$childKey][] = $operator; + } + return array_values($result); + } +} diff --git a/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php b/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php new file mode 100644 index 0000000000000..550d85fbda09f --- /dev/null +++ b/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php @@ -0,0 +1,68 @@ +getType() === ISearchBinaryOperator::OPERATOR_OR + ) { + $groups = $this->groupEqualsComparisonsByField($operator->getArguments()); + $newParts = array_map(function (array $group) { + if (count($group) > 1) { + // because of the logic from `groupEqualsComparisonsByField` we now that group is all comparisons on the same field + /** @var ISearchComparison[] $group */ + $field = $group[0]->getField(); + $values = array_map(function (ISearchComparison $comparison) { + /** @var string|integer|bool|\DateTime $value */ + $value = $comparison->getValue(); + return $value; + }, $group); + $in = new SearchComparison(ISearchComparison::COMPARE_IN, $field, $values); + $in->setQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, $group[0]->getQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, true)); + return $in; + } else { + return $group[0]; + } + }, $groups); + if (count($newParts) === 1) { + $operator = $newParts[0]; + } else { + $operator = new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_OR, $newParts); + } + parent::processOperator($operator); + return true; + } + parent::processOperator($operator); + return false; + } + + /** + * Non-equals operators are put in a separate group for each + * + * @param ISearchOperator[] $operators + * @return ISearchOperator[][] + */ + private function groupEqualsComparisonsByField(array $operators): array { + $result = []; + foreach ($operators as $operator) { + if ($operator instanceof ISearchComparison && $operator->getType() === ISearchComparison::COMPARE_EQUAL) { + $key = $operator->getField() . $operator->getQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, true); + $result[$key][] = $operator; + } else { + $result[] = [$operator]; + } + } + return array_values($result); + } +} diff --git a/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php b/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php index 160b27b7f11a8..6240ef3367eea 100644 --- a/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php +++ b/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php @@ -29,15 +29,18 @@ class QueryOptimizer { /** @var QueryOptimizerStep[] */ private $steps = []; - public function __construct( - PathPrefixOptimizer $pathPrefixOptimizer - ) { + public function __construct() { + // note that the order here is relevant $this->steps = [ - $pathPrefixOptimizer + new PathPrefixOptimizer(), + new MergeDistributiveOperations(), + new FlattenSingleArgumentBinaryOperation(), + new OrEqualsToIn(), + new FlattenNestedBool(), ]; } - public function processOperator(ISearchOperator $operator) { + public function processOperator(ISearchOperator &$operator) { foreach ($this->steps as $step) { $step->inspectOperator($operator); } diff --git a/lib/private/Files/Search/QueryOptimizer/ReplacingOptimizerStep.php b/lib/private/Files/Search/QueryOptimizer/ReplacingOptimizerStep.php new file mode 100644 index 0000000000000..546061522bc22 --- /dev/null +++ b/lib/private/Files/Search/QueryOptimizer/ReplacingOptimizerStep.php @@ -0,0 +1,31 @@ +getArguments(); + foreach ($arguments as &$argument) { + $modified = $modified || $this->processOperator($argument); + } + if ($modified) { + $operator->setArguments($arguments); + } + } + return false; + } +} diff --git a/lib/private/Files/Search/SearchBinaryOperator.php b/lib/private/Files/Search/SearchBinaryOperator.php index d7bba8f1b4edb..2b01ad58e5f5b 100644 --- a/lib/private/Files/Search/SearchBinaryOperator.php +++ b/lib/private/Files/Search/SearchBinaryOperator.php @@ -28,7 +28,7 @@ class SearchBinaryOperator implements ISearchBinaryOperator { /** @var string */ private $type; - /** @var ISearchOperator[] */ + /** @var (SearchBinaryOperator|SearchComparison)[] */ private $arguments; private $hints = []; @@ -36,7 +36,7 @@ class SearchBinaryOperator implements ISearchBinaryOperator { * SearchBinaryOperator constructor. * * @param string $type - * @param ISearchOperator[] $arguments + * @param (SearchBinaryOperator|SearchComparison)[] $arguments */ public function __construct($type, array $arguments) { $this->type = $type; @@ -57,6 +57,14 @@ public function getArguments() { return $this->arguments; } + /** + * @param ISearchOperator[] $arguments + * @return void + */ + public function setArguments(array $arguments): void { + $this->arguments = $arguments; + } + public function getQueryHint(string $name, $default) { return $this->hints[$name] ?? $default; } @@ -64,4 +72,11 @@ public function getQueryHint(string $name, $default) { public function setQueryHint(string $name, $value): void { $this->hints[$name] = $value; } + + public function __toString(): string { + if ($this->type === ISearchBinaryOperator::OPERATOR_NOT) { + return '(not ' . $this->arguments[0] . ')'; + } + return '(' . implode(' ' . $this->type . ' ', $this->arguments) . ')'; + } } diff --git a/lib/private/Files/Search/SearchComparison.php b/lib/private/Files/Search/SearchComparison.php index d94b3e9dfab30..8ca05236105ab 100644 --- a/lib/private/Files/Search/SearchComparison.php +++ b/lib/private/Files/Search/SearchComparison.php @@ -33,7 +33,7 @@ class SearchComparison implements ISearchComparison { public function __construct( private string $type, private string $field, - private \DateTime|int|string|bool $value, + private \DateTime|int|string|bool|array $value, private string $extra = '' ) { } @@ -53,9 +53,9 @@ public function getField(): string { } /** - * @return \DateTime|int|string|bool + * @return \DateTime|int|string|bool|(\DateTime|int|string)[] */ - public function getValue(): string|int|bool|\DateTime { + public function getValue(): string|int|bool|\DateTime|array { return $this->value; } @@ -78,4 +78,8 @@ public function setQueryHint(string $name, $value): void { public static function escapeLikeParameter(string $param): string { return addcslashes($param, '\\_%'); } + + public function __toString(): string { + return $this->field . ' ' . $this->type . ' ' . json_encode($this->value); + } } diff --git a/lib/public/Files/Search/ISearchComparison.php b/lib/public/Files/Search/ISearchComparison.php index 44657fd16bdd9..65a245ead2ea9 100644 --- a/lib/public/Files/Search/ISearchComparison.php +++ b/lib/public/Files/Search/ISearchComparison.php @@ -67,6 +67,7 @@ interface ISearchComparison extends ISearchOperator { * @since 28.0.0 */ public const COMPARE_DEFINED = 'is-defined'; + public const COMPARE_IN = 'in'; /** * @since 23.0.0 @@ -102,8 +103,8 @@ public function getExtra(): string; /** * Get the value to compare the field with * - * @return string|integer|bool|\DateTime + * @return string|integer|bool|\DateTime|(\DateTime|int|string)[] * @since 12.0.0 */ - public function getValue(): string|int|bool|\DateTime; + public function getValue(): string|int|bool|\DateTime|array; } diff --git a/tests/lib/Files/Cache/SearchBuilderTest.php b/tests/lib/Files/Cache/SearchBuilderTest.php index 5eb1a0252f0d3..45fa17bd2270a 100644 --- a/tests/lib/Files/Cache/SearchBuilderTest.php +++ b/tests/lib/Files/Cache/SearchBuilderTest.php @@ -154,6 +154,7 @@ public function comparisonProvider() { [new SearchComparison(ISearchComparison::COMPARE_LIKE, 'name', 'foo%'), [0, 1]], [new SearchComparison(ISearchComparison::COMPARE_EQUAL, 'mimetype', 'image/jpg'), [0]], [new SearchComparison(ISearchComparison::COMPARE_LIKE, 'mimetype', 'image/%'), [0, 1]], + [new SearchComparison(ISearchComparison::COMPARE_IN, 'mimetype', ['image/jpg', 'image/png']), [0, 1]], [new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ new SearchComparison(ISearchComparison::COMPARE_EQUAL, 'size', 50), new SearchComparison(ISearchComparison::COMPARE_LESS_THAN, 'mtime', 125) diff --git a/tests/lib/Files/Search/QueryOptimizer/CombinedTests.php b/tests/lib/Files/Search/QueryOptimizer/CombinedTests.php new file mode 100644 index 0000000000000..c1d0428176d54 --- /dev/null +++ b/tests/lib/Files/Search/QueryOptimizer/CombinedTests.php @@ -0,0 +1,45 @@ +optimizer = new QueryOptimizer(); + } + + public function testBasicOrOfAnds() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ]) + ] + ); + $this->assertEquals('((storage eq 1 and path eq "foo") or (storage eq 1 and path eq "bar") or (storage eq 1 and path eq "asd"))', $operator->__toString()); + + $this->optimizer->processOperator($operator); + + $this->assertEquals('(storage eq 1 and path in ["foo","bar","asd"])', $operator->__toString()); + } +} diff --git a/tests/lib/Files/Search/QueryOptimizer/FlattenNestedBoolTest.php b/tests/lib/Files/Search/QueryOptimizer/FlattenNestedBoolTest.php new file mode 100644 index 0000000000000..a21f2a19b9041 --- /dev/null +++ b/tests/lib/Files/Search/QueryOptimizer/FlattenNestedBoolTest.php @@ -0,0 +1,42 @@ +optimizer = new FlattenNestedBool(); + $this->simplifier = new FlattenSingleArgumentBinaryOperation(); + } + + public function testOrs() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_OR, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ]) + ] + ); + $this->assertEquals('(path eq "foo" or (path eq "bar" or path eq "asd"))', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('(path eq "foo" or path eq "bar" or path eq "asd")', $operator->__toString()); + } +} diff --git a/tests/lib/Files/Search/QueryOptimizer/MergeDistributiveOperationsTest.php b/tests/lib/Files/Search/QueryOptimizer/MergeDistributiveOperationsTest.php new file mode 100644 index 0000000000000..49a241a41af0f --- /dev/null +++ b/tests/lib/Files/Search/QueryOptimizer/MergeDistributiveOperationsTest.php @@ -0,0 +1,133 @@ +optimizer = new MergeDistributiveOperations(); + $this->simplifier = new FlattenSingleArgumentBinaryOperation(); + } + + public function testBasicOrOfAnds() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ]) + ] + ); + $this->assertEquals('((storage eq 1 and path eq "foo") or (storage eq 1 and path eq "bar") or (storage eq 1 and path eq "asd"))', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('(storage eq 1 and (path eq "foo" or path eq "bar" or path eq "asd"))', $operator->__toString()); + } + + public function testDontTouchIfNotSame() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 2), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 3), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ]) + ] + ); + $this->assertEquals('((storage eq 1 and path eq "foo") or (storage eq 2 and path eq "bar") or (storage eq 3 and path eq "asd"))', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('((storage eq 1 and path eq "foo") or (storage eq 2 and path eq "bar") or (storage eq 3 and path eq "asd"))', $operator->__toString()); + } + + public function testMergePartial() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 2), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ]) + ] + ); + $this->assertEquals('((storage eq 1 and path eq "foo") or (storage eq 1 and path eq "bar") or (storage eq 2 and path eq "asd"))', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('((storage eq 1 and (path eq "foo" or path eq "bar")) or (storage eq 2 and path eq "asd"))', $operator->__toString()); + } + + public function testOptimizeInside() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_AND, + [ + new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ]) + ] + ), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "mimetype", "text") + ] + ); + $this->assertEquals('(((storage eq 1 and path eq "foo") or (storage eq 1 and path eq "bar") or (storage eq 1 and path eq "asd")) and mimetype eq "text")', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('((storage eq 1 and (path eq "foo" or path eq "bar" or path eq "asd")) and mimetype eq "text")', $operator->__toString()); + } +} diff --git a/tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php b/tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php new file mode 100644 index 0000000000000..23b2a6ca07a5b --- /dev/null +++ b/tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php @@ -0,0 +1,120 @@ +optimizer = new OrEqualsToIn(); + $this->simplifier = new FlattenSingleArgumentBinaryOperation(); + } + + public function testOrs() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ] + ); + $this->assertEquals('(path eq "foo" or path eq "bar" or path eq "asd")', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('path in ["foo","bar","asd"]', $operator->__toString()); + } + + public function testOrsMultipleFields() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "fileid", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "fileid", 2), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "mimetype", "asd"), + ] + ); + $this->assertEquals('(path eq "foo" or path eq "bar" or fileid eq 1 or fileid eq 2 or mimetype eq "asd")', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('(path in ["foo","bar"] or fileid in [1,2] or mimetype eq "asd")', $operator->__toString()); + } + + public function testPreserveHints() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ] + ); + foreach ($operator->getArguments() as $argument) { + $argument->setQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, false); + } + $this->assertEquals('(path eq "foo" or path eq "bar" or path eq "asd")', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('path in ["foo","bar","asd"]', $operator->__toString()); + $this->assertEquals(false, $operator->getQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, true)); + } + + public function testOrSomeEq() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + new SearchComparison(ISearchComparison::COMPARE_LIKE, "path", "asd%"), + ] + ); + $this->assertEquals('(path eq "foo" or path eq "bar" or path like "asd%")', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('(path in ["foo","bar"] or path like "asd%")', $operator->__toString()); + } + + public function testOrsInside() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_AND, + [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "mimetype", "text"), + new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + ] + ) + ] + ); + $this->assertEquals('(mimetype eq "text" and (path eq "foo" or path eq "bar" or path eq "asd"))', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('(mimetype eq "text" and path in ["foo","bar","asd"])', $operator->__toString()); + } +} From 2dcd0a875920be09944dc51f360303c11f3e858a Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Fri, 22 Sep 2023 10:17:18 +0200 Subject: [PATCH 2/6] hopefully improve propagation of 'path eq hash' hint into 'in' statements Signed-off-by: Robin Appelman --- .../Files/Search/QueryOptimizer/FlattenNestedBool.php | 1 - lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php | 8 +++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php b/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php index 7f75731fe94ba..c573e3af3c055 100644 --- a/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php +++ b/lib/private/Files/Search/QueryOptimizer/FlattenNestedBool.php @@ -26,5 +26,4 @@ public function processOperator(ISearchOperator &$operator) { } parent::processOperator($operator); } - } diff --git a/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php b/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php index 550d85fbda09f..d39eb2e29a96c 100644 --- a/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php +++ b/lib/private/Files/Search/QueryOptimizer/OrEqualsToIn.php @@ -29,7 +29,10 @@ public function processOperator(ISearchOperator &$operator): bool { return $value; }, $group); $in = new SearchComparison(ISearchComparison::COMPARE_IN, $field, $values); - $in->setQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, $group[0]->getQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, true)); + $pathEqHash = array_reduce($group, function ($pathEqHash, ISearchComparison $comparison) { + return $comparison->getQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, true) && $pathEqHash; + }, true); + $in->setQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, $pathEqHash); return $in; } else { return $group[0]; @@ -57,8 +60,7 @@ private function groupEqualsComparisonsByField(array $operators): array { $result = []; foreach ($operators as $operator) { if ($operator instanceof ISearchComparison && $operator->getType() === ISearchComparison::COMPARE_EQUAL) { - $key = $operator->getField() . $operator->getQueryHint(ISearchComparison::HINT_PATH_EQ_HASH, true); - $result[$key][] = $operator; + $result[$operator->getField()][] = $operator; } else { $result[] = [$operator]; } From 7ca516773f2866b3a6bb2e8cb63b5df95d8da03e Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Mon, 5 Feb 2024 18:56:43 +0100 Subject: [PATCH 3/6] add a search query step to split IN statements that are to large for oci Signed-off-by: Robin Appelman --- .../Search/QueryOptimizer/QueryOptimizer.php | 1 + .../Search/QueryOptimizer/SplitLargeIn.php | 33 ++++++++++++++ .../Files/Search/SearchIntegrationTest.php | 44 +++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php create mode 100644 tests/lib/Files/Search/SearchIntegrationTest.php diff --git a/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php b/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php index 6240ef3367eea..86cd784b760d2 100644 --- a/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php +++ b/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php @@ -37,6 +37,7 @@ public function __construct() { new FlattenSingleArgumentBinaryOperation(), new OrEqualsToIn(), new FlattenNestedBool(), + new SplitLargeIn(), ]; } diff --git a/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php b/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php new file mode 100644 index 0000000000000..450ffae42f1ce --- /dev/null +++ b/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php @@ -0,0 +1,33 @@ +getType() === ISearchComparison::COMPARE_IN && + count($operator->getValue()) > 1000 + ) { + $chunks = array_chunk($operator->getValue(), 1000); + $chunkComparisons = array_map(function(array $values) use ($operator) { + return new SearchComparison(ISearchComparison::COMPARE_IN, $operator->getField(), $values); + }, $chunks); + + $operator = new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_OR, $chunkComparisons); + return true; + } + parent::processOperator($operator); + return false; + } +} + diff --git a/tests/lib/Files/Search/SearchIntegrationTest.php b/tests/lib/Files/Search/SearchIntegrationTest.php new file mode 100644 index 0000000000000..74018a597d904 --- /dev/null +++ b/tests/lib/Files/Search/SearchIntegrationTest.php @@ -0,0 +1,44 @@ +storage = new Temporary([]); + $this->cache = $this->storage->getCache(); + $this->storage->getScanner()->scan(''); + } + + + public function testThousandAndOneFilters() { + $id = $this->cache->put("file10", ['size' => 1, 'mtime' => 50, 'mimetype' => 'foo/folder']); + + $comparisons = []; + for($i = 1; $i <= 1001; $i++) { + $comparisons[] = new SearchComparison(ISearchComparison::COMPARE_EQUAL, "name", "file$i"); + } + $operator = new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_OR, $comparisons); + $query = new SearchQuery($operator, 10, 0, []); + + $results = $this->cache->searchQuery($query); + + $this->assertCount(1, $results); + $this->assertEquals($id, $results[0]->getId()); + } +} From 63ffaab95ec7a893ec510e7fde802f47ba4a8889 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 6 Feb 2024 10:59:17 +0100 Subject: [PATCH 4/6] fix types + autoloader Signed-off-by: Robin Appelman --- lib/composer/composer/autoload_classmap.php | 1 + lib/composer/composer/autoload_static.php | 1 + lib/private/Files/Cache/SearchBuilder.php | 19 ++++++++++++++----- .../Search/QueryOptimizer/SplitLargeIn.php | 3 +-- lib/private/Files/Search/SearchComparison.php | 7 ++++--- lib/public/Files/Search/ISearchComparison.php | 5 ++++- 6 files changed, 25 insertions(+), 11 deletions(-) diff --git a/lib/composer/composer/autoload_classmap.php b/lib/composer/composer/autoload_classmap.php index 06977abf57fdd..c027075d1605a 100644 --- a/lib/composer/composer/autoload_classmap.php +++ b/lib/composer/composer/autoload_classmap.php @@ -1414,6 +1414,7 @@ 'OC\\Files\\Search\\QueryOptimizer\\QueryOptimizer' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php', 'OC\\Files\\Search\\QueryOptimizer\\QueryOptimizerStep' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/QueryOptimizerStep.php', 'OC\\Files\\Search\\QueryOptimizer\\ReplacingOptimizerStep' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/ReplacingOptimizerStep.php', + 'OC\\Files\\Search\\QueryOptimizer\\SplitLargeIn' => $baseDir . '/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php', 'OC\\Files\\Search\\SearchBinaryOperator' => $baseDir . '/lib/private/Files/Search/SearchBinaryOperator.php', 'OC\\Files\\Search\\SearchComparison' => $baseDir . '/lib/private/Files/Search/SearchComparison.php', 'OC\\Files\\Search\\SearchOrder' => $baseDir . '/lib/private/Files/Search/SearchOrder.php', diff --git a/lib/composer/composer/autoload_static.php b/lib/composer/composer/autoload_static.php index 9621351352da5..238b667b5f4f3 100644 --- a/lib/composer/composer/autoload_static.php +++ b/lib/composer/composer/autoload_static.php @@ -1447,6 +1447,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2 'OC\\Files\\Search\\QueryOptimizer\\QueryOptimizer' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php', 'OC\\Files\\Search\\QueryOptimizer\\QueryOptimizerStep' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/QueryOptimizerStep.php', 'OC\\Files\\Search\\QueryOptimizer\\ReplacingOptimizerStep' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/ReplacingOptimizerStep.php', + 'OC\\Files\\Search\\QueryOptimizer\\SplitLargeIn' => __DIR__ . '/../../..' . '/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php', 'OC\\Files\\Search\\SearchBinaryOperator' => __DIR__ . '/../../..' . '/lib/private/Files/Search/SearchBinaryOperator.php', 'OC\\Files\\Search\\SearchComparison' => __DIR__ . '/../../..' . '/lib/private/Files/Search/SearchComparison.php', 'OC\\Files\\Search\\SearchOrder' => __DIR__ . '/../../..' . '/lib/private/Files/Search/SearchOrder.php', diff --git a/lib/private/Files/Cache/SearchBuilder.php b/lib/private/Files/Cache/SearchBuilder.php index fe021a62e9ed0..9d306db7748ad 100644 --- a/lib/private/Files/Cache/SearchBuilder.php +++ b/lib/private/Files/Cache/SearchBuilder.php @@ -37,8 +37,12 @@ /** * Tools for transforming search queries into database queries + * + * @psalm-import-type ParamSingleValue from ISearchComparison + * @psalm-import-type ParamValue from ISearchComparison */ class SearchBuilder { + /** @var array */ protected static $searchOperatorMap = [ ISearchComparison::COMPARE_LIKE => 'iLike', ISearchComparison::COMPARE_LIKE_CASE_SENSITIVE => 'like', @@ -51,6 +55,7 @@ class SearchBuilder { ISearchComparison::COMPARE_IN => 'in', ]; + /** @var array */ protected static $searchOperatorNegativeMap = [ ISearchComparison::COMPARE_LIKE => 'notLike', ISearchComparison::COMPARE_LIKE_CASE_SENSITIVE => 'notLike', @@ -63,6 +68,7 @@ class SearchBuilder { ISearchComparison::COMPARE_IN => 'notIn', ]; + /** @var array */ protected static $fieldTypes = [ 'mimetype' => 'string', 'mtime' => 'integer', @@ -79,11 +85,14 @@ class SearchBuilder { 'owner' => 'string', ]; + /** @var array */ protected static $paramTypeMap = [ 'string' => IQueryBuilder::PARAM_STR, 'integer' => IQueryBuilder::PARAM_INT, 'boolean' => IQueryBuilder::PARAM_BOOL, ]; + + /** @var array */ protected static $paramArrayTypeMap = [ 'string' => IQueryBuilder::PARAM_STR_ARRAY, 'integer' => IQueryBuilder::PARAM_INT_ARRAY, @@ -186,7 +195,7 @@ private function searchComparisonToDBExpr( /** * @param ISearchComparison $operator - * @return list{string, string|integer|\DateTime|(\DateTime|int|string)[], string, string} + * @return list{string, ParamValue, string, string} */ private function getOperatorFieldAndValue(ISearchComparison $operator): array { $this->validateComparison($operator); @@ -199,9 +208,9 @@ private function getOperatorFieldAndValue(ISearchComparison $operator): array { /** * @param string $field - * @param string|integer|\DateTime|(\DateTime|int|string)[] $value + * @param ParamValue $value * @param string $type - * @return list{string, string|integer|\DateTime|(\DateTime|int|string)[], string, string} + * @return list{string, ParamValue, string, string} */ private function getOperatorFieldAndValueInner(string $field, mixed $value, string $type, bool $pathEqHash): array { $paramType = self::$fieldTypes[$field]; @@ -209,7 +218,7 @@ private function getOperatorFieldAndValueInner(string $field, mixed $value, stri $resultField = $field; $values = []; foreach ($value as $arrayValue) { - /** @var string|integer|\DateTime $arrayValue */ + /** @var ParamSingleValue $arrayValue */ [$arrayField, $arrayValue] = $this->getOperatorFieldAndValueInner($field, $arrayValue, ISearchComparison::COMPARE_EQUAL, $pathEqHash); $resultField = $arrayField; $values[] = $arrayValue; @@ -297,7 +306,7 @@ private function validateComparison(ISearchComparison $operator) { private function getExtraOperatorField(ISearchComparison $operator, IMetadataQuery $metadataQuery): array { - $paramType = self::$fieldTypes[$field]; + $paramType = self::$fieldTypes[$operator->getField()]; $field = $operator->getField(); $value = $operator->getValue(); $type = $operator->getType(); diff --git a/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php b/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php index 450ffae42f1ce..6f85037b3e946 100644 --- a/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php +++ b/lib/private/Files/Search/QueryOptimizer/SplitLargeIn.php @@ -19,7 +19,7 @@ public function processOperator(ISearchOperator &$operator): bool { count($operator->getValue()) > 1000 ) { $chunks = array_chunk($operator->getValue(), 1000); - $chunkComparisons = array_map(function(array $values) use ($operator) { + $chunkComparisons = array_map(function (array $values) use ($operator) { return new SearchComparison(ISearchComparison::COMPARE_IN, $operator->getField(), $values); }, $chunks); @@ -30,4 +30,3 @@ public function processOperator(ISearchOperator &$operator): bool { return false; } } - diff --git a/lib/private/Files/Search/SearchComparison.php b/lib/private/Files/Search/SearchComparison.php index 8ca05236105ab..b3c4832d776ae 100644 --- a/lib/private/Files/Search/SearchComparison.php +++ b/lib/private/Files/Search/SearchComparison.php @@ -27,12 +27,16 @@ use OCP\Files\Search\ISearchComparison; +/** + * @psalm-import-type ParamValue from ISearchComparison + */ class SearchComparison implements ISearchComparison { private array $hints = []; public function __construct( private string $type, private string $field, + /** @var ParamValue $value */ private \DateTime|int|string|bool|array $value, private string $extra = '' ) { @@ -52,9 +56,6 @@ public function getField(): string { return $this->field; } - /** - * @return \DateTime|int|string|bool|(\DateTime|int|string)[] - */ public function getValue(): string|int|bool|\DateTime|array { return $this->value; } diff --git a/lib/public/Files/Search/ISearchComparison.php b/lib/public/Files/Search/ISearchComparison.php index 65a245ead2ea9..a3842ffaec6c2 100644 --- a/lib/public/Files/Search/ISearchComparison.php +++ b/lib/public/Files/Search/ISearchComparison.php @@ -26,6 +26,9 @@ /** * @since 12.0.0 + * + * @psalm-type ParamSingleValue = \DateTime|int|string|bool + * @psalm-type ParamValue = ParamSingleValue|list */ interface ISearchComparison extends ISearchOperator { /** @@ -103,7 +106,7 @@ public function getExtra(): string; /** * Get the value to compare the field with * - * @return string|integer|bool|\DateTime|(\DateTime|int|string)[] + * @return ParamValue * @since 12.0.0 */ public function getValue(): string|int|bool|\DateTime|array; From 1c87cee5ad8754aec87cb1749f5a495cd8d80961 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 6 Feb 2024 14:31:21 +0100 Subject: [PATCH 5/6] add extra flatten step to improve "or eq" -> "in" optimization Signed-off-by: Robin Appelman --- lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php | 1 + tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php b/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php index 86cd784b760d2..46d27e38081f1 100644 --- a/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php +++ b/lib/private/Files/Search/QueryOptimizer/QueryOptimizer.php @@ -35,6 +35,7 @@ public function __construct() { new PathPrefixOptimizer(), new MergeDistributiveOperations(), new FlattenSingleArgumentBinaryOperation(), + new FlattenNestedBool(), new OrEqualsToIn(), new FlattenNestedBool(), new SplitLargeIn(), diff --git a/tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php b/tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php index 23b2a6ca07a5b..3d3160079cd79 100644 --- a/tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php +++ b/tests/lib/Files/Search/QueryOptimizer/OrEqualsToInTest.php @@ -83,16 +83,16 @@ public function testOrSomeEq() { ISearchBinaryOperator::OPERATOR_OR, [ new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + new SearchComparison(ISearchComparison::COMPARE_LIKE, "path", "foo%"), new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), - new SearchComparison(ISearchComparison::COMPARE_LIKE, "path", "asd%"), ] ); - $this->assertEquals('(path eq "foo" or path eq "bar" or path like "asd%")', $operator->__toString()); + $this->assertEquals('(path eq "foo" or path like "foo%" or path eq "bar")', $operator->__toString()); $this->optimizer->processOperator($operator); $this->simplifier->processOperator($operator); - $this->assertEquals('(path in ["foo","bar"] or path like "asd%")', $operator->__toString()); + $this->assertEquals('(path in ["foo","bar"] or path like "foo%")', $operator->__toString()); } public function testOrsInside() { From 3890aa54be4fc4b577528616249623d2aa531970 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 15 Feb 2024 18:16:13 +0100 Subject: [PATCH 6/6] add some comments for the distributive operation and add another test Signed-off-by: Robin Appelman --- .../MergeDistributiveOperations.php | 27 ++++++++++++++++--- lib/public/Files/Search/ISearchComparison.php | 4 +++ .../MergeDistributiveOperationsTest.php | 27 +++++++++++++++++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php b/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php index 62ef2cb2e8ede..922b0ccb17c63 100644 --- a/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php +++ b/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php @@ -10,7 +10,11 @@ /** * Attempt to transform * - * (A AND B) OR (A AND C) into A AND (B OR C) + * (A AND B) OR (A AND C) OR (A AND D AND E) into A AND (B OR C OR (D AND E)) + * + * This is always valid because logical 'AND' and 'OR' are distributive[1]. + * + * [1]: https://en.wikipedia.org/wiki/Distributive_property */ class MergeDistributiveOperations extends ReplacingOptimizerStep { public function processOperator(ISearchOperator &$operator): bool { @@ -18,18 +22,29 @@ public function processOperator(ISearchOperator &$operator): bool { $operator instanceof SearchBinaryOperator && $this->isAllSameBinaryOperation($operator->getArguments()) ) { + // either 'AND' or 'OR' $topLevelType = $operator->getType(); + // split the arguments into groups that share a first argument + // (we already know that all arguments are binary operators with at least 1 child) $groups = $this->groupBinaryOperatorsByChild($operator->getArguments(), 0); $outerOperations = array_map(function (array $operators) use ($topLevelType) { + // no common operations, no need to change anything if (count($operators) === 1) { return $operators[0]; } /** @var ISearchBinaryOperator $firstArgument */ $firstArgument = $operators[0]; - $outerType = $firstArgument->getType(); + + // we already checked that all arguments have the same type, so this type applies for all, either 'AND' or 'OR' + $innerType = $firstArgument->getType(); + + // the common operation we move out ('A' from the example) $extractedLeftHand = $firstArgument->getArguments()[0]; + // for each argument we remove the extracted operation to get the leftovers ('B', 'C' and '(D AND E)' in the example) + // note that we leave them inside the "inner" binary operation for when the "inner" operation contained more than two parts + // in the (common) case where the "inner" operation only has 1 item left it will be cleaned up in a follow up step $rightHandArguments = array_map(function (ISearchOperator $inner) { /** @var ISearchBinaryOperator $inner */ $arguments = $inner->getArguments(); @@ -39,12 +54,17 @@ public function processOperator(ISearchOperator &$operator): bool { } return new SearchBinaryOperator($inner->getType(), $arguments); }, $operators); + + // combine the extracted operation ('A') with the remaining bit ('(B OR C OR (D AND E))') + // note that because of how distribution work, we use the "outer" type "inside" and the "inside" type "outside". $extractedRightHand = new SearchBinaryOperator($topLevelType, $rightHandArguments); return new SearchBinaryOperator( - $outerType, + $innerType, [$extractedLeftHand, $extractedRightHand] ); }, $groups); + + // combine all groups again $operator = new SearchBinaryOperator($topLevelType, $outerOperations); parent::processOperator($operator); return true; @@ -90,7 +110,6 @@ private function groupBinaryOperatorsByChild(array $operators, int $index = 0): foreach ($operators as $operator) { /** @var SearchBinaryOperator|SearchComparison $child */ $child = $operator->getArguments()[$index]; - ; $childKey = (string) $child; $result[$childKey][] = $operator; } diff --git a/lib/public/Files/Search/ISearchComparison.php b/lib/public/Files/Search/ISearchComparison.php index a3842ffaec6c2..eb93ef70bf61d 100644 --- a/lib/public/Files/Search/ISearchComparison.php +++ b/lib/public/Files/Search/ISearchComparison.php @@ -70,6 +70,10 @@ interface ISearchComparison extends ISearchOperator { * @since 28.0.0 */ public const COMPARE_DEFINED = 'is-defined'; + + /** + * @since 29.0.0 + */ public const COMPARE_IN = 'in'; /** diff --git a/tests/lib/Files/Search/QueryOptimizer/MergeDistributiveOperationsTest.php b/tests/lib/Files/Search/QueryOptimizer/MergeDistributiveOperationsTest.php index 49a241a41af0f..4c7ecc9d46f6c 100644 --- a/tests/lib/Files/Search/QueryOptimizer/MergeDistributiveOperationsTest.php +++ b/tests/lib/Files/Search/QueryOptimizer/MergeDistributiveOperationsTest.php @@ -130,4 +130,31 @@ public function testOptimizeInside() { $this->assertEquals('((storage eq 1 and (path eq "foo" or path eq "bar" or path eq "asd")) and mimetype eq "text")', $operator->__toString()); } + + public function testMoveInnerOperations() { + $operator = new SearchBinaryOperator( + ISearchBinaryOperator::OPERATOR_OR, + [ + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "foo"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "bar"), + ]), + new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [ + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "storage", 1), + new SearchComparison(ISearchComparison::COMPARE_EQUAL, "path", "asd"), + new SearchComparison(ISearchComparison::COMPARE_GREATER_THAN, "size", "100"), + ]) + ] + ); + $this->assertEquals('((storage eq 1 and path eq "foo") or (storage eq 1 and path eq "bar") or (storage eq 1 and path eq "asd" and size gt "100"))', $operator->__toString()); + + $this->optimizer->processOperator($operator); + $this->simplifier->processOperator($operator); + + $this->assertEquals('(storage eq 1 and (path eq "foo" or path eq "bar" or (path eq "asd" and size gt "100")))', $operator->__toString()); + } }