handle ordering in folder search

Signed-off-by: Robin Appelman <robin@icewind.nl>
This commit is contained in:
Robin Appelman 2021-03-19 14:26:44 +01:00
parent 671b99251e
commit 055af29371
No known key found for this signature in database
GPG Key ID: 42B69D8A64526EFB
4 changed files with 98 additions and 55 deletions

View File

@ -32,7 +32,6 @@
namespace OC\Files\Node;
use OC\DB\QueryBuilder\Literal;
use OC\Files\Mount\MountPoint;
use OC\Files\Search\SearchBinaryOperator;
use OC\Files\Search\SearchComparison;
use OC\Files\Search\SearchQuery;
@ -232,23 +231,18 @@ class Folder extends Node implements \OCP\Files\Folder {
$query = $this->queryFromOperator(new SearchComparison(ISearchComparison::COMPARE_LIKE, 'name', '%' . $query . '%'));
}
// Limit+offset for queries without ordering
// Limit+offset for queries with ordering
//
// assume a setup where the root mount matches 15 items,
// sub mount1 matches 7 items and mount2 matches 1 item
// a search with (0..10) returns 10 results from root with internal offset 0 and limit 10
// follow up search with (10..20) returns 5 results from root with internal offset 10 and limit 10
// and 5 results from mount1 with internal offset 0 and limit 5
// next search with (20..30) return 0 results from root with internal offset 20 and limit 10
// 2 results from mount1 with internal offset 5[1] and limit 5
// and 1 result from mount2 with internal offset 0[1] and limit 8
// Because we currently can't do ordering between the results from different storages in sql
// The only way to do ordering is requesting the $limit number of entries from all storages
// sorting them and returning the first $limit entries.
//
// Because of the difficulty of calculating the offset for a sub-query if the previous one returns no results
// (we don't know how many results the previous sub-query has skipped with it's own offset)
// we instead discard the offset for the sub-queries and filter it afterwards and add the offset to limit.
// this is sub-optimal but shouldn't hurt to much since large offsets are uncommon in practice
// For offset we have the same problem, we don't know how many entries from each storage should be skipped
// by a given $offset, so instead we query $offset + $limit from each storage and return entries $offset..($offset+$limit)
// after merging and sorting them.
//
// All of this is only possible for queries without ordering
// This is suboptimal but because limit and offset tend to be fairly small in real world use cases it should
// still be significantly better than disabling paging altogether
$limitToHome = $query->limitToHome();
if ($limitToHome && count(explode('/', $this->path)) !== 3) {
@ -264,13 +258,12 @@ class Folder extends Node implements \OCP\Files\Folder {
$internalPath = $internalPath . '/';
}
$subQueryLimit = $query->getLimit() > 0 ? $query->getLimit() + $query->getOffset() : PHP_INT_MAX;
$subQueryOffset = $query->getOffset();
$subQueryLimit = $query->getLimit() > 0 ? $query->getLimit() + $query->getOffset() : 0;
$rootQuery = new SearchQuery(
new SearchBinaryOperator(ISearchBinaryOperator::OPERATOR_AND, [
new SearchComparison(ISearchComparison::COMPARE_LIKE, 'path', $internalPath . '%'),
$query->getSearchOperation(),
]
new SearchComparison(ISearchComparison::COMPARE_LIKE, 'path', $internalPath . '%'),
$query->getSearchOperation(),
]
),
$subQueryLimit,
0,
@ -283,11 +276,6 @@ class Folder extends Node implements \OCP\Files\Folder {
$cache = $storage->getCache('');
$results = $cache->searchQuery($rootQuery);
$count = count($results);
$results = array_slice($results, $subQueryOffset, $subQueryLimit);
$subQueryOffset = max(0, $subQueryOffset - $count);
$subQueryLimit = max(0, $subQueryLimit - $count);
foreach ($results as $result) {
$files[] = $this->cacheEntryToFileInfo($mount, '', $internalPath, $result);
}
@ -295,10 +283,6 @@ class Folder extends Node implements \OCP\Files\Folder {
if (!$limitToHome) {
$mounts = $this->root->getMountsIn($this->path);
foreach ($mounts as $mount) {
if ($subQueryLimit <= 0) {
break;
}
$subQuery = new SearchQuery(
$query->getSearchOperation(),
$subQueryLimit,
@ -313,12 +297,6 @@ class Folder extends Node implements \OCP\Files\Folder {
$relativeMountPoint = ltrim(substr($mount->getMountPoint(), $rootLength), '/');
$results = $cache->searchQuery($subQuery);
$count = count($results);
$results = array_slice($results, $subQueryOffset, $subQueryLimit);
$subQueryOffset -= $count;
$subQueryLimit -= $count;
foreach ($results as $result) {
$files[] = $this->cacheEntryToFileInfo($mount, $relativeMountPoint, '', $result);
}
@ -326,6 +304,20 @@ class Folder extends Node implements \OCP\Files\Folder {
}
}
$order = $query->getOrder();
if ($order) {
usort($files, function (FileInfo $a,FileInfo $b) use ($order) {
foreach ($order as $orderField) {
$cmp = $orderField->sortFileInfo($a, $b);
if ($cmp !== 0) {
return $cmp;
}
}
return 0;
});
}
$files = array_values(array_slice($files, $query->getOffset(), $query->getLimit() > 0 ? $query->getLimit() : null));
return array_map(function (FileInfo $file) {
return $this->createNode($file->getPath(), $file);
}, $files);

View File

@ -23,6 +23,7 @@
namespace OC\Files\Search;
use OCP\Files\FileInfo;
use OCP\Files\Search\ISearchOrder;
class SearchOrder implements ISearchOrder {
@ -55,4 +56,28 @@ class SearchOrder implements ISearchOrder {
public function getField() {
return $this->field;
}
public function sortFileInfo(FileInfo $a, FileInfo $b): int {
$cmp = $this->sortFileInfoNoDirection($a, $b);
return $cmp * ($this->direction === ISearchOrder::DIRECTION_ASCENDING ? 1 : -1);
}
private function sortFileInfoNoDirection(FileInfo $a, FileInfo $b): int {
switch ($this->field) {
case 'name':
return $a->getName() <=> $b->getName();
case 'mimetype':
return $a->getMimetype() <=> $b->getMimetype();
case 'mtime':
return $a->getMtime() <=> $b->getMtime();
case 'size':
return $a->getSize() <=> $b->getSize();
case 'fileid':
return $a->getId() <=> $b->getId();
case 'permissions':
return $a->getPermissions() <=> $b->getPermissions();
default:
return 0;
}
}
}

View File

@ -24,6 +24,8 @@
namespace OCP\Files\Search;
use OCP\Files\FileInfo;
/**
* @since 12.0.0
*/
@ -46,4 +48,14 @@ interface ISearchOrder {
* @since 12.0.0
*/
public function getField();
/**
* Apply the sorting on 2 FileInfo objects
*
* @param FileInfo $a
* @param FileInfo $b
* @return int -1 if $a < $b, 0 if $a = $b, 1 if $a > $b (for ascending, reverse for descending)
* @since 22.0.0
*/
public function sortFileInfo(FileInfo $a, FileInfo $b): int;
}

View File

@ -18,6 +18,7 @@ use OC\Files\Node\Folder;
use OC\Files\Node\Node;
use OC\Files\Node\Root;
use OC\Files\Search\SearchComparison;
use OC\Files\Search\SearchOrder;
use OC\Files\Search\SearchQuery;
use OC\Files\Storage\Temporary;
use OC\Files\Storage\Wrapper\Jail;
@ -25,6 +26,7 @@ use OC\Files\View;
use OCP\Files\Mount\IMountPoint;
use OCP\Files\NotFoundException;
use OCP\Files\Search\ISearchComparison;
use OCP\Files\Search\ISearchOrder;
use OCP\Files\Search\ISearchQuery;
use OCP\Files\Storage;
@ -929,22 +931,34 @@ class FolderTest extends NodeTest {
public function offsetLimitProvider() {
return [
[0, 10, [10, 11, 12, 13, 14, 15, 16, 17]],
[0, 5, [10, 11, 12, 13, 14]],
[0, 2, [10, 11]],
[3, 2, [13, 14]],
[3, 5, [13, 14, 15, 16, 17]],
[5, 2, [15, 16]],
[6, 2, [16, 17]],
[7, 2, [17]],
[10, 2, []]
[0, 10, [10, 11, 12, 13, 14, 15, 16, 17], []],
[0, 5, [10, 11, 12, 13, 14], []],
[0, 2, [10, 11], []],
[3, 2, [13, 14], []],
[3, 5, [13, 14, 15, 16, 17], []],
[5, 2, [15, 16], []],
[6, 2, [16, 17], []],
[7, 2, [17], []],
[10, 2, [], []],
[0, 5, [16, 10, 14, 11, 12], [new SearchOrder(ISearchOrder::DIRECTION_ASCENDING, 'mtime')]],
[3, 2, [11, 12], [new SearchOrder(ISearchOrder::DIRECTION_ASCENDING, 'mtime')]],
[0, 5, [14, 15, 16, 10, 11], [
new SearchOrder(ISearchOrder::DIRECTION_DESCENDING, 'size'),
new SearchOrder(ISearchOrder::DIRECTION_ASCENDING, 'mtime')
]],
];
}
/**
* @dataProvider offsetLimitProvider
* @param int $offset
* @param int $limit
* @param int[] $expectedIds
* @param ISearchOrder[] $ordering
* @throws NotFoundException
* @throws \OCP\Files\InvalidPathException
*/
public function testSearchSubStoragesLimitOffset(int $offset, int $limit, array $expectedIds) {
public function testSearchSubStoragesLimitOffset(int $offset, int $limit, array $expectedIds, array $ordering) {
$manager = $this->createMock(Manager::class);
/**
* @var \OC\Files\View | \PHPUnit\Framework\MockObject\MockObject $view
@ -996,26 +1010,26 @@ class FolderTest extends NodeTest {
$cache->method('searchQuery')
->willReturnCallback(function (ISearchQuery $query) {
return array_slice([
new CacheEntry(['fileid' => 10, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 55, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 11, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 55, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 12, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 55, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 13, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 55, 'mimetype' => 'text/plain'])
new CacheEntry(['fileid' => 10, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 10, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 11, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 20, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 12, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 30, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 13, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 40, 'mimetype' => 'text/plain']),
], $query->getOffset(), $query->getOffset() + $query->getLimit());
});
$subCache1->method('searchQuery')
->willReturnCallback(function (ISearchQuery $query) {
return array_slice([
new CacheEntry(['fileid' => 14, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 55, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 15, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 55, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 14, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 300, 'mtime' => 15, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 15, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 300, 'mtime' => 50, 'mimetype' => 'text/plain']),
], $query->getOffset(), $query->getOffset() + $query->getLimit());
});
$subCache2->method('searchQuery')
->willReturnCallback(function (ISearchQuery $query) {
return array_slice([
new CacheEntry(['fileid' => 16, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 55, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 17, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 55, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 16, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 5, 'mimetype' => 'text/plain']),
new CacheEntry(['fileid' => 17, 'path' => 'foo/qwerty', 'name' => 'qwerty', 'size' => 200, 'mtime' => 60, 'mimetype' => 'text/plain']),
], $query->getOffset(), $query->getOffset() + $query->getLimit());
});
@ -1030,9 +1044,9 @@ class FolderTest extends NodeTest {
$node = new Folder($root, $view, '/bar/foo');
$comparison = new SearchComparison(ISearchComparison::COMPARE_LIKE, 'name', '%foo%');
$query = new SearchQuery($comparison, $limit, $offset, []);
$query = new SearchQuery($comparison, $limit, $offset, $ordering);
$result = $node->search($query);
$ids = array_map(function(Node $info) {
$ids = array_map(function (Node $info) {
return $info->getId();
}, $result);
$this->assertEquals($expectedIds, $ids);