2012-09-16 18:52:32 +04:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* Copyright (c) 2012 Robin Appelman <icewind@owncloud.com>
|
|
|
|
* This file is licensed under the Affero General Public License version 3 or
|
|
|
|
* later.
|
|
|
|
* See the COPYING-README file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
namespace OC\Files\Cache;
|
|
|
|
|
2013-06-14 17:30:41 +04:00
|
|
|
use OC\Files\Filesystem;
|
2013-07-19 18:32:43 +04:00
|
|
|
use OC\Hooks\BasicEmitter;
|
2013-06-14 17:30:41 +04:00
|
|
|
|
2013-07-19 18:32:43 +04:00
|
|
|
/**
|
|
|
|
* Class Scanner
|
|
|
|
*
|
|
|
|
* Hooks available in scope \OC\Files\Cache\Scanner:
|
|
|
|
* - scanFile(string $path, string $storageId)
|
|
|
|
* - scanFolder(string $path, string $storageId)
|
2013-11-07 19:22:29 +04:00
|
|
|
* - postScanFile(string $path, string $storageId)
|
|
|
|
* - postScanFolder(string $path, string $storageId)
|
2013-07-19 18:32:43 +04:00
|
|
|
*
|
|
|
|
* @package OC\Files\Cache
|
|
|
|
*/
|
|
|
|
class Scanner extends BasicEmitter {
|
2012-10-03 13:24:49 +04:00
|
|
|
/**
|
|
|
|
* @var \OC\Files\Storage\Storage $storage
|
|
|
|
*/
|
|
|
|
private $storage;
|
|
|
|
|
2012-11-22 01:44:43 +04:00
|
|
|
/**
|
|
|
|
* @var string $storageId
|
|
|
|
*/
|
|
|
|
private $storageId;
|
|
|
|
|
2012-10-03 13:24:49 +04:00
|
|
|
/**
|
|
|
|
* @var \OC\Files\Cache\Cache $cache
|
|
|
|
*/
|
|
|
|
private $cache;
|
|
|
|
|
2013-09-19 23:37:52 +04:00
|
|
|
/**
|
|
|
|
* @var \OC\Files\Cache\Permissions $permissionsCache
|
|
|
|
*/
|
|
|
|
private $permissionsCache;
|
|
|
|
|
2012-09-16 18:52:32 +04:00
|
|
|
const SCAN_RECURSIVE = true;
|
|
|
|
const SCAN_SHALLOW = false;
|
|
|
|
|
2013-06-14 18:53:08 +04:00
|
|
|
const REUSE_ETAG = 1;
|
|
|
|
const REUSE_SIZE = 2;
|
|
|
|
|
2012-10-03 13:24:49 +04:00
|
|
|
public function __construct(\OC\Files\Storage\Storage $storage) {
|
|
|
|
$this->storage = $storage;
|
2012-11-22 01:44:43 +04:00
|
|
|
$this->storageId = $this->storage->getId();
|
2012-11-18 17:10:28 +04:00
|
|
|
$this->cache = $storage->getCache();
|
2013-09-19 23:37:52 +04:00
|
|
|
$this->permissionsCache = $storage->getPermissionsCache();
|
2012-10-03 13:24:49 +04:00
|
|
|
}
|
|
|
|
|
2012-09-16 18:52:32 +04:00
|
|
|
/**
|
|
|
|
* get all the metadata of a file or folder
|
|
|
|
* *
|
2012-09-26 19:52:02 +04:00
|
|
|
*
|
2012-10-03 13:24:49 +04:00
|
|
|
* @param string $path
|
2014-05-11 21:13:51 +04:00
|
|
|
* @return array an array of metadata of the file
|
2012-09-16 18:52:32 +04:00
|
|
|
*/
|
2012-10-03 13:24:49 +04:00
|
|
|
public function getData($path) {
|
2013-10-24 16:24:56 +04:00
|
|
|
if (!$this->storage->isReadable($path)) {
|
|
|
|
//cant read, nothing we can do
|
2013-11-04 15:58:30 +04:00
|
|
|
\OCP\Util::writeLog('OC\Files\Cache\Scanner', "!!! Path '$path' is not readable !!!", \OCP\Util::DEBUG);
|
2013-10-24 16:24:56 +04:00
|
|
|
return null;
|
|
|
|
}
|
2012-09-16 18:52:32 +04:00
|
|
|
$data = array();
|
2012-10-03 13:24:49 +04:00
|
|
|
$data['mimetype'] = $this->storage->getMimeType($path);
|
|
|
|
$data['mtime'] = $this->storage->filemtime($path);
|
2012-09-16 18:52:32 +04:00
|
|
|
if ($data['mimetype'] == 'httpd/unix-directory') {
|
|
|
|
$data['size'] = -1; //unknown
|
|
|
|
} else {
|
2012-10-03 13:24:49 +04:00
|
|
|
$data['size'] = $this->storage->filesize($path);
|
2012-09-16 18:52:32 +04:00
|
|
|
}
|
2012-12-31 01:32:55 +04:00
|
|
|
$data['etag'] = $this->storage->getETag($path);
|
2013-02-10 15:27:35 +04:00
|
|
|
$data['storage_mtime'] = $data['mtime'];
|
2012-09-16 18:52:32 +04:00
|
|
|
return $data;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* scan a single file and store it in the cache
|
|
|
|
*
|
2012-10-03 13:24:49 +04:00
|
|
|
* @param string $file
|
2013-06-14 18:53:08 +04:00
|
|
|
* @param int $reuseExisting
|
2013-07-29 20:27:19 +04:00
|
|
|
* @param bool $parentExistsInCache
|
2014-05-11 21:13:51 +04:00
|
|
|
* @return array an array of metadata of the scanned file
|
2012-09-16 18:52:32 +04:00
|
|
|
*/
|
2013-07-29 20:27:19 +04:00
|
|
|
public function scanFile($file, $reuseExisting = 0, $parentExistsInCache = false) {
|
2013-06-14 17:30:41 +04:00
|
|
|
if (!self::isPartialFile($file)
|
|
|
|
and !Filesystem::isFileBlacklisted($file)
|
2013-05-10 14:00:13 +04:00
|
|
|
) {
|
2013-07-19 18:32:43 +04:00
|
|
|
$this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId));
|
2013-03-22 16:52:07 +04:00
|
|
|
\OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId));
|
|
|
|
$data = $this->getData($file);
|
|
|
|
if ($data) {
|
2013-07-29 20:27:19 +04:00
|
|
|
if ($file and !$parentExistsInCache) {
|
2013-03-22 16:52:07 +04:00
|
|
|
$parent = dirname($file);
|
2013-04-29 17:43:48 +04:00
|
|
|
if ($parent === '.' or $parent === '/') {
|
2013-03-22 16:52:07 +04:00
|
|
|
$parent = '';
|
|
|
|
}
|
|
|
|
if (!$this->cache->inCache($parent)) {
|
|
|
|
$this->scanFile($parent);
|
|
|
|
}
|
2012-11-25 05:29:57 +04:00
|
|
|
}
|
2013-05-29 18:40:05 +04:00
|
|
|
$newData = $data;
|
2013-09-19 23:37:52 +04:00
|
|
|
$cacheData = $this->cache->get($file);
|
|
|
|
if ($cacheData) {
|
2013-10-24 16:24:56 +04:00
|
|
|
if (isset($cacheData['fileid'])) {
|
|
|
|
$this->permissionsCache->remove($cacheData['fileid']);
|
|
|
|
}
|
2013-09-24 02:59:23 +04:00
|
|
|
if ($reuseExisting) {
|
|
|
|
// prevent empty etag
|
2014-04-22 17:24:38 +04:00
|
|
|
if (empty($cacheData['etag'])) {
|
2013-09-24 02:59:23 +04:00
|
|
|
$etag = $data['etag'];
|
|
|
|
$propagateETagChange = true;
|
2014-04-22 17:24:38 +04:00
|
|
|
} else {
|
|
|
|
$etag = $cacheData['etag'];
|
|
|
|
$propagateETagChange = false;
|
2013-06-14 18:53:08 +04:00
|
|
|
}
|
2013-09-24 02:59:23 +04:00
|
|
|
// only reuse data if the file hasn't explicitly changed
|
2014-01-14 16:54:07 +04:00
|
|
|
if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
|
2013-09-24 02:59:23 +04:00
|
|
|
if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
|
|
|
|
$data['size'] = $cacheData['size'];
|
|
|
|
}
|
|
|
|
if ($reuseExisting & self::REUSE_ETAG) {
|
|
|
|
$data['etag'] = $etag;
|
|
|
|
if ($propagateETagChange) {
|
|
|
|
$parent = $file;
|
|
|
|
while ($parent !== '') {
|
|
|
|
$parent = dirname($parent);
|
|
|
|
if ($parent === '.') {
|
|
|
|
$parent = '';
|
|
|
|
}
|
|
|
|
$parentCacheData = $this->cache->get($parent);
|
|
|
|
$this->cache->update($parentCacheData['fileid'], array(
|
|
|
|
'etag' => $this->storage->getETag($parent),
|
|
|
|
));
|
2013-09-17 01:32:17 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-06-14 18:53:08 +04:00
|
|
|
}
|
2013-09-24 02:59:23 +04:00
|
|
|
// Only update metadata that has changed
|
2013-10-25 14:40:31 +04:00
|
|
|
$newData = array_diff_assoc($data, $cacheData);
|
2013-10-24 16:24:56 +04:00
|
|
|
if (isset($newData['etag'])) {
|
2013-10-24 20:13:21 +04:00
|
|
|
$cacheDataString = print_r($cacheData, true);
|
|
|
|
$dataString = print_r($data, true);
|
2013-11-04 15:58:30 +04:00
|
|
|
\OCP\Util::writeLog('OC\Files\Cache\Scanner',
|
|
|
|
"!!! No reuse of etag for '$file' !!! \ncache: $cacheDataString \ndata: $dataString",
|
|
|
|
\OCP\Util::DEBUG);
|
2013-10-24 16:24:56 +04:00
|
|
|
}
|
2013-03-22 16:52:07 +04:00
|
|
|
}
|
2013-05-29 18:19:03 +04:00
|
|
|
}
|
2013-07-28 23:32:48 +04:00
|
|
|
if (!empty($newData)) {
|
2014-03-03 19:48:28 +04:00
|
|
|
$data['fileid'] = $this->cache->put($file, $newData);
|
2013-11-07 19:22:29 +04:00
|
|
|
$this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId));
|
|
|
|
\OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId));
|
2013-07-28 23:32:48 +04:00
|
|
|
}
|
2013-08-06 17:59:06 +04:00
|
|
|
} else {
|
|
|
|
$this->cache->remove($file);
|
2013-03-26 19:03:40 +04:00
|
|
|
}
|
2013-03-22 16:52:07 +04:00
|
|
|
return $data;
|
2012-10-03 13:24:49 +04:00
|
|
|
}
|
2013-03-22 16:52:07 +04:00
|
|
|
return null;
|
2012-09-16 18:52:32 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2013-06-14 17:30:41 +04:00
|
|
|
* scan a folder and all it's children
|
2012-09-16 18:52:32 +04:00
|
|
|
*
|
2012-10-03 13:24:49 +04:00
|
|
|
* @param string $path
|
2013-03-24 05:06:50 +04:00
|
|
|
* @param bool $recursive
|
2013-06-14 18:53:08 +04:00
|
|
|
* @param int $reuse
|
2014-05-11 21:13:51 +04:00
|
|
|
* @return array an array of the meta data of the scanned file or folder
|
2012-09-16 18:52:32 +04:00
|
|
|
*/
|
2013-06-14 18:53:08 +04:00
|
|
|
public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1) {
|
|
|
|
if ($reuse === -1) {
|
|
|
|
$reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0;
|
|
|
|
}
|
2014-02-28 17:23:07 +04:00
|
|
|
$data = $this->scanFile($path, $reuse);
|
|
|
|
$size = $this->scanChildren($path, $recursive, $reuse);
|
|
|
|
$data['size'] = $size;
|
|
|
|
return $data;
|
2013-06-14 17:30:41 +04:00
|
|
|
}
|
2012-10-03 13:40:09 +04:00
|
|
|
|
2013-06-14 17:30:41 +04:00
|
|
|
/**
|
|
|
|
* scan all the files and folders in a folder
|
|
|
|
*
|
|
|
|
* @param string $path
|
|
|
|
* @param bool $recursive
|
2013-06-14 18:53:08 +04:00
|
|
|
* @param int $reuse
|
2013-06-14 17:30:41 +04:00
|
|
|
* @return int the size of the scanned folder or -1 if the size is unknown at this stage
|
|
|
|
*/
|
2013-06-14 18:53:08 +04:00
|
|
|
public function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1) {
|
|
|
|
if ($reuse === -1) {
|
|
|
|
$reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0;
|
|
|
|
}
|
2013-07-19 18:32:43 +04:00
|
|
|
$this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId));
|
2012-09-16 18:52:32 +04:00
|
|
|
$size = 0;
|
2013-06-14 17:30:41 +04:00
|
|
|
$childQueue = array();
|
2013-06-20 02:42:34 +04:00
|
|
|
$existingChildren = array();
|
|
|
|
if ($this->cache->inCache($path)) {
|
|
|
|
$children = $this->cache->getFolderContents($path);
|
|
|
|
foreach ($children as $child) {
|
|
|
|
$existingChildren[] = $child['name'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$newChildren = array();
|
2013-02-03 02:18:29 +04:00
|
|
|
if ($this->storage->is_dir($path) && ($dh = $this->storage->opendir($path))) {
|
2013-10-21 16:48:08 +04:00
|
|
|
$exceptionOccurred = false;
|
2013-01-17 00:58:17 +04:00
|
|
|
\OC_DB::beginTransaction();
|
2013-09-19 23:37:52 +04:00
|
|
|
if (is_resource($dh)) {
|
2013-09-04 15:06:04 +04:00
|
|
|
while (($file = readdir($dh)) !== false) {
|
|
|
|
$child = ($path) ? $path . '/' . $file : $file;
|
|
|
|
if (!Filesystem::isIgnoredDir($file)) {
|
|
|
|
$newChildren[] = $file;
|
2013-10-21 16:48:08 +04:00
|
|
|
try {
|
|
|
|
$data = $this->scanFile($child, $reuse, true);
|
|
|
|
if ($data) {
|
|
|
|
if ($data['size'] === -1) {
|
|
|
|
if ($recursive === self::SCAN_RECURSIVE) {
|
|
|
|
$childQueue[] = $child;
|
|
|
|
} else {
|
|
|
|
$size = -1;
|
|
|
|
}
|
|
|
|
} else if ($size !== -1) {
|
|
|
|
$size += $data['size'];
|
2013-09-04 15:06:04 +04:00
|
|
|
}
|
2012-11-25 05:29:57 +04:00
|
|
|
}
|
2012-10-03 15:07:19 +04:00
|
|
|
}
|
2013-10-21 16:48:08 +04:00
|
|
|
catch (\Doctrine\DBAL\DBALException $ex){
|
|
|
|
// might happen if inserting duplicate while a scanning
|
|
|
|
// process is running in parallel
|
|
|
|
// log and ignore
|
|
|
|
\OC_Log::write('core', 'Exception while scanning file "' . $child . '": ' . $ex->getMessage(), \OC_Log::DEBUG);
|
|
|
|
$exceptionOccurred = true;
|
|
|
|
}
|
2012-09-16 18:52:32 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-06-20 02:42:34 +04:00
|
|
|
$removedChildren = \array_diff($existingChildren, $newChildren);
|
|
|
|
foreach ($removedChildren as $childName) {
|
|
|
|
$child = ($path) ? $path . '/' . $childName : $childName;
|
|
|
|
$this->cache->remove($child);
|
|
|
|
}
|
2013-01-17 00:58:17 +04:00
|
|
|
\OC_DB::commit();
|
2013-10-21 16:48:08 +04:00
|
|
|
if ($exceptionOccurred){
|
|
|
|
// It might happen that the parallel scan process has already
|
|
|
|
// inserted mimetypes but those weren't available yet inside the transaction
|
|
|
|
// To make sure to have the updated mime types in such cases,
|
|
|
|
// we reload them here
|
|
|
|
$this->cache->loadMimetypes();
|
|
|
|
}
|
|
|
|
|
2013-01-17 00:58:17 +04:00
|
|
|
foreach ($childQueue as $child) {
|
2013-08-12 17:37:39 +04:00
|
|
|
$childSize = $this->scanChildren($child, self::SCAN_RECURSIVE, $reuse);
|
2013-01-17 00:58:17 +04:00
|
|
|
if ($childSize === -1) {
|
|
|
|
$size = -1;
|
|
|
|
} else {
|
|
|
|
$size += $childSize;
|
|
|
|
}
|
|
|
|
}
|
2013-06-17 20:03:57 +04:00
|
|
|
$this->cache->put($path, array('size' => $size));
|
2012-10-03 15:07:19 +04:00
|
|
|
}
|
2013-11-07 19:22:29 +04:00
|
|
|
$this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId));
|
2012-09-16 18:52:32 +04:00
|
|
|
return $size;
|
|
|
|
}
|
2013-02-10 21:15:23 +04:00
|
|
|
|
2013-02-10 17:16:45 +04:00
|
|
|
/**
|
2014-05-19 19:50:53 +04:00
|
|
|
* check if the file should be ignored when scanning
|
2013-02-10 17:16:45 +04:00
|
|
|
* NOTE: files with a '.part' extension are ignored as well!
|
|
|
|
* prevents unfinished put requests to be scanned
|
2014-05-12 00:51:30 +04:00
|
|
|
* @param string $file
|
2013-02-10 17:16:45 +04:00
|
|
|
* @return boolean
|
|
|
|
*/
|
2013-05-10 14:00:13 +04:00
|
|
|
public static function isPartialFile($file) {
|
|
|
|
if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
|
2013-02-10 17:16:45 +04:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2012-11-22 02:18:58 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* walk over any folders that are not fully scanned yet and scan them
|
|
|
|
*/
|
|
|
|
public function backgroundScan() {
|
2013-05-13 19:17:08 +04:00
|
|
|
$lastPath = null;
|
|
|
|
while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
|
2013-12-05 18:23:34 +04:00
|
|
|
$this->scan($path, self::SCAN_RECURSIVE, self::REUSE_ETAG);
|
2012-11-22 02:18:58 +04:00
|
|
|
$this->cache->correctFolderSize($path);
|
2013-05-13 19:17:08 +04:00
|
|
|
$lastPath = $path;
|
2012-11-22 02:18:58 +04:00
|
|
|
}
|
|
|
|
}
|
2012-09-16 18:52:32 +04:00
|
|
|
}
|