From 30c09d0c8bd8827dc3175f6590a5549dadf0773e Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Fri, 14 Jun 2013 15:30:41 +0200 Subject: [PATCH 1/5] split of scanning the childs of a folder --- lib/files/cache/scanner.php | 41 ++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/lib/files/cache/scanner.php b/lib/files/cache/scanner.php index 8f9a792195..816c856ac2 100644 --- a/lib/files/cache/scanner.php +++ b/lib/files/cache/scanner.php @@ -8,6 +8,8 @@ namespace OC\Files\Cache; +use OC\Files\Filesystem; + class Scanner { /** * @var \OC\Files\Storage\Storage $storage @@ -63,8 +65,8 @@ class Scanner { * @return array with metadata of the scanned file */ public function scanFile($file, $checkExisting = false) { - if ( ! self::isPartialFile($file) - and ! \OC\Files\Filesystem::isFileBlacklisted($file) + if (!self::isPartialFile($file) + and !Filesystem::isFileBlacklisted($file) ) { \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId)); $data = $this->getData($file); @@ -84,7 +86,8 @@ class Scanner { $data['size'] = $cacheData['size']; } if ($data['mtime'] === $cacheData['mtime'] && - $data['size'] === $cacheData['size']) { + $data['size'] === $cacheData['size'] + ) { $data['etag'] = $cacheData['etag']; } // Only update metadata that has changed @@ -100,38 +103,42 @@ class Scanner { } /** - * scan all the files in a folder and store them in the cache + * scan a folder and all it's children * * @param string $path * @param bool $recursive - * @param bool $onlyChilds * @return int the size of the scanned folder or -1 if the size is unknown at this stage */ - public function scan($path, $recursive = self::SCAN_RECURSIVE, $onlyChilds = false) { - \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_folder', array('path' => $path, 'storage' => $this->storageId)); - $childQueue = array(); - if (!$onlyChilds) { - $this->scanFile($path); - } + public function scan($path, $recursive = self::SCAN_RECURSIVE) { + $this->scanFile($path); + return $this->scanChildren($path, $recursive); + } + /** + * scan all the files and folders in a folder + * + * @param string $path + * @param bool $recursive + * @return int the size of the scanned folder or -1 if the size is unknown at this stage + */ + public function scanChildren($path, $recursive = self::SCAN_RECURSIVE) { + \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_folder', array('path' => $path, 'storage' => $this->storageId)); $size = 0; + $childQueue = array(); if ($this->storage->is_dir($path) && ($dh = $this->storage->opendir($path))) { \OC_DB::beginTransaction(); while ($file = readdir($dh)) { $child = ($path) ? $path . '/' . $file : $file; - if (!\OC\Files\Filesystem::isIgnoredDir($file)) { + if (!Filesystem::isIgnoredDir($file)) { $data = $this->scanFile($child, $recursive === self::SCAN_SHALLOW); if ($data) { if ($data['size'] === -1) { if ($recursive === self::SCAN_RECURSIVE) { $childQueue[] = $child; - $data['size'] = 0; } else { $size = -1; } - } - - if ($size !== -1) { + } else if ($size !== -1) { $size += $data['size']; } } @@ -139,7 +146,7 @@ class Scanner { } \OC_DB::commit(); foreach ($childQueue as $child) { - $childSize = $this->scan($child, self::SCAN_RECURSIVE, true); + $childSize = $this->scanChildren($child, self::SCAN_RECURSIVE); if ($childSize === -1) { $size = -1; } else { From f10a4db88997b3848a8c149d35dbb68c1b8c5f60 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Fri, 14 Jun 2013 16:53:08 +0200 Subject: [PATCH 2/5] scanner: give more percision about what data is reused during scanning --- lib/files/cache/scanner.php | 46 +++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/lib/files/cache/scanner.php b/lib/files/cache/scanner.php index 816c856ac2..061778cd85 100644 --- a/lib/files/cache/scanner.php +++ b/lib/files/cache/scanner.php @@ -29,6 +29,9 @@ class Scanner { const SCAN_RECURSIVE = true; const SCAN_SHALLOW = false; + const REUSE_ETAG = 1; + const REUSE_SIZE = 2; + public function __construct(\OC\Files\Storage\Storage $storage) { $this->storage = $storage; $this->storageId = $this->storage->getId(); @@ -61,10 +64,10 @@ class Scanner { * scan a single file and store it in the cache * * @param string $file - * @param bool $checkExisting check existing folder sizes in the cache instead of always using -1 for folder size + * @param int $reuseExisting * @return array with metadata of the scanned file */ - public function scanFile($file, $checkExisting = false) { + public function scanFile($file, $reuseExisting = 0) { if (!self::isPartialFile($file) and !Filesystem::isFileBlacklisted($file) ) { @@ -81,21 +84,22 @@ class Scanner { } } $newData = $data; - if ($cacheData = $this->cache->get($file)) { - if ($checkExisting && $data['size'] === -1) { - $data['size'] = $cacheData['size']; - } - if ($data['mtime'] === $cacheData['mtime'] && - $data['size'] === $cacheData['size'] - ) { - $data['etag'] = $cacheData['etag']; + if ($reuseExisting and $cacheData = $this->cache->get($file)) { + // only reuse data if the file hasn't explicitly changed + if ($data['mtime'] === $cacheData['mtime']) { + if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) { + $data['size'] = $cacheData['size']; + } + if ($reuseExisting & self::REUSE_ETAG) { + $data['etag'] = $cacheData['etag']; + } } // Only update metadata that has changed $newData = array_diff($data, $cacheData); } - if (!empty($newData)) { - $this->cache->put($file, $newData); - } + } + if (!empty($newData)) { + $this->cache->put($file, $newData); } return $data; } @@ -107,10 +111,14 @@ class Scanner { * * @param string $path * @param bool $recursive + * @param int $reuse * @return int the size of the scanned folder or -1 if the size is unknown at this stage */ - public function scan($path, $recursive = self::SCAN_RECURSIVE) { - $this->scanFile($path); + public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1) { + if ($reuse === -1) { + $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0; + } + $this->scanFile($path, $reuse); return $this->scanChildren($path, $recursive); } @@ -119,9 +127,13 @@ class Scanner { * * @param string $path * @param bool $recursive + * @param int $reuse * @return int the size of the scanned folder or -1 if the size is unknown at this stage */ - public function scanChildren($path, $recursive = self::SCAN_RECURSIVE) { + public function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1) { + if ($reuse === -1) { + $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0; + } \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_folder', array('path' => $path, 'storage' => $this->storageId)); $size = 0; $childQueue = array(); @@ -130,7 +142,7 @@ class Scanner { while ($file = readdir($dh)) { $child = ($path) ? $path . '/' . $file : $file; if (!Filesystem::isIgnoredDir($file)) { - $data = $this->scanFile($child, $recursive === self::SCAN_SHALLOW); + $data = $this->scanFile($child, $reuse); if ($data) { if ($data['size'] === -1) { if ($recursive === self::SCAN_RECURSIVE) { From 398fe8bf3255df7ac9d301522401c4a746a0e7f9 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Fri, 14 Jun 2013 17:04:17 +0200 Subject: [PATCH 3/5] reuse etag when doing a forced rescan --- apps/files/ajax/scan.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/files/ajax/scan.php b/apps/files/ajax/scan.php index 391b98608b..6659cd459c 100644 --- a/apps/files/ajax/scan.php +++ b/apps/files/ajax/scan.php @@ -24,7 +24,7 @@ foreach ($mountPoints as $mountPoint) { ScanListener::$mountPoints[$storage->getId()] = $mountPoint; $scanner = $storage->getScanner(); if ($force) { - $scanner->scan(''); + $scanner->scan('', \OC\Files\Cache\Scanner::SCAN_RECURSIVE, \OC\Files\Cache\Scanner::REUSE_ETAG); } else { $scanner->backgroundScan(); } From 2ed0e6e91573489b9db0c37d1d4f3ae1c1393f00 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Mon, 17 Jun 2013 18:03:57 +0200 Subject: [PATCH 4/5] add tests for reusing existing data in scanner --- lib/files/cache/scanner.php | 6 ++---- tests/lib/files/cache/scanner.php | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/lib/files/cache/scanner.php b/lib/files/cache/scanner.php index 061778cd85..e7fbd856d5 100644 --- a/lib/files/cache/scanner.php +++ b/lib/files/cache/scanner.php @@ -119,7 +119,7 @@ class Scanner { $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0; } $this->scanFile($path, $reuse); - return $this->scanChildren($path, $recursive); + return $this->scanChildren($path, $recursive, $reuse); } /** @@ -165,9 +165,7 @@ class Scanner { $size += $childSize; } } - if ($size !== -1) { - $this->cache->put($path, array('size' => $size)); - } + $this->cache->put($path, array('size' => $size)); } return $size; } diff --git a/tests/lib/files/cache/scanner.php b/tests/lib/files/cache/scanner.php index 3885c99e6d..3dacefa2b8 100644 --- a/tests/lib/files/cache/scanner.php +++ b/tests/lib/files/cache/scanner.php @@ -104,7 +104,7 @@ class Scanner extends \PHPUnit_Framework_TestCase { $this->assertNotEquals($cachedDataFolder['size'], -1); } - function testBackgroundScan(){ + function testBackgroundScan() { $this->fillTestFolders(); $this->storage->mkdir('folder2'); $this->storage->file_put_contents('folder2/bar.txt', 'foobar'); @@ -126,6 +126,24 @@ class Scanner extends \PHPUnit_Framework_TestCase { $this->assertFalse($this->cache->getIncomplete()); } + public function testReuseExisting() { + $this->fillTestFolders(); + + $this->scanner->scan(''); + $oldData = $this->cache->get(''); + $this->storage->unlink('folder/bar.txt'); + $this->scanner->scan('', \OC\Files\Cache\Scanner::SCAN_SHALLOW, \OC\Files\Cache\Scanner::REUSE_SIZE); + $newData = $this->cache->get(''); + $this->assertNotEquals($oldData['etag'], $newData['etag']); + $this->assertEquals($oldData['size'], $newData['size']); + + $oldData = $newData; + $this->scanner->scan('', \OC\Files\Cache\Scanner::SCAN_SHALLOW, \OC\Files\Cache\Scanner::REUSE_ETAG); + $newData = $this->cache->get(''); + $this->assertEquals($oldData['etag'], $newData['etag']); + $this->assertEquals(-1, $newData['size']); + } + function setUp() { $this->storage = new \OC\Files\Storage\Temporary(array()); $this->scanner = new \OC\Files\Cache\Scanner($this->storage); From 0b74e71de83801475a029951cdc85d2942307856 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Wed, 19 Jun 2013 00:26:47 +0200 Subject: [PATCH 5/5] fix updaters test cases --- tests/lib/files/cache/updater.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/lib/files/cache/updater.php b/tests/lib/files/cache/updater.php index dad3cd7e65..198d601b1b 100644 --- a/tests/lib/files/cache/updater.php +++ b/tests/lib/files/cache/updater.php @@ -69,6 +69,7 @@ class Updater extends \PHPUnit_Framework_TestCase { public function testWrite() { $textSize = strlen("dummy file data\n"); $imageSize = filesize(\OC::$SERVERROOT . '/core/img/logo.png'); + $this->cache->put('foo.txt', array('mtime' => 100)); $rootCachedData = $this->cache->get(''); $this->assertEquals(3 * $textSize + $imageSize, $rootCachedData['size']); @@ -77,11 +78,9 @@ class Updater extends \PHPUnit_Framework_TestCase { $cachedData = $this->cache->get('foo.txt'); $this->assertEquals(3, $cachedData['size']); $this->assertNotEquals($fooCachedData['etag'], $cachedData['etag']); - $mtime = $cachedData['mtime']; $cachedData = $this->cache->get(''); $this->assertEquals(2 * $textSize + $imageSize + 3, $cachedData['size']); $this->assertNotEquals($rootCachedData['etag'], $cachedData['etag']); - $this->assertGreaterThanOrEqual($rootCachedData['mtime'], $mtime); $rootCachedData = $cachedData; $this->assertFalse($this->cache->inCache('bar.txt'));