e5dce8942a785c1acad744c9b88299cf8c6eeab4
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Resource / Index / Indexer.php
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Core\Resource\ResourceStorage;
18 use TYPO3\CMS\Core\Resource\File;
19
20 /**
21 * The New FAL Indexer
22 */
23 class Indexer {
24
25 /**
26 * @var array
27 */
28 protected $filesToUpdate = array();
29
30 /**
31 * @var int[]
32 */
33 protected $identifiedFileUids = array();
34
35 /**
36 * @var ResourceStorage
37 */
38 protected $storage = NULL;
39
40 /**
41 * @param ResourceStorage $storage
42 */
43 public function __construct(ResourceStorage $storage) {
44 $this->storage = $storage;
45 }
46
47 /**
48 * Create index entry
49 *
50 * @param string $identifier
51 * @return File
52 * @throws \InvalidArgumentException
53 */
54 public function createIndexEntry($identifier) {
55 if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
56 throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
57 }
58 $fileProperties = $this->gatherFileInformationArray($identifier);
59 $record = $this->getFileIndexRepository()->addRaw($fileProperties);
60 $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
61 $this->extractRequiredMetaData($fileObject);
62 return $fileObject;
63 }
64
65 /**
66 * Update index entry
67 *
68 * @param File $fileObject
69 * @return void
70 */
71 public function updateIndexEntry(File $fileObject) {
72 $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
73 $fileObject->updateProperties($updatedInformation);
74 $this->getFileIndexRepository()->update($fileObject);
75 $this->extractRequiredMetaData($fileObject);
76 }
77
78 /**
79 * @return void
80 */
81 public function processChangesInStorages() {
82 // get all file-identifiers from the storage
83 $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder()->getIdentifier(), TRUE, TRUE);
84 $this->detectChangedFilesInStorage($availableFiles);
85 $this->processChangedAndNewFiles();
86
87 $this->detectMissingFiles();
88 }
89
90 /**
91 * @param int $maximumFileCount
92 * @return void
93 */
94 public function runMetaDataExtraction($maximumFileCount = -1) {
95 $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
96
97 $extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
98 foreach ($fileIndexRecords as $indexRecord) {
99 $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
100
101 $newMetaData = array(
102 0 => $fileObject->_getMetaData()
103 );
104 foreach ($extractionServices as $service) {
105 if ($service->canProcess($fileObject)) {
106 $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
107 }
108 }
109 ksort($newMetaData);
110 $metaData = array();
111 foreach ($newMetaData as $data) {
112 $metaData = array_merge($metaData, $data);
113 }
114 $fileObject->_updateMetaDataProperties($metaData);
115 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
116 $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
117 }
118 }
119
120 /**
121 * Since by now all files in filesystem have been looked at it is save to assume,
122 * that files that are in indexed but not touched in this run are missing
123 */
124 protected function detectMissingFiles() {
125 $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
126 $this->storage,
127 $this->identifiedFileUids
128 );
129
130 foreach ($indexedNotExistentFiles as $record) {
131 if (!$this->storage->hasFile($record['identifier'])) {
132 $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
133 }
134 }
135 }
136
137 /**
138 * Adds updated files to the processing queue
139 *
140 * @param array $fileIdentifierArray
141 * @return void
142 */
143 protected function detectChangedFilesInStorage(array $fileIdentifierArray) {
144 foreach ($fileIdentifierArray as $fileIdentifier) {
145 // skip processed files
146 if (strpos($fileIdentifier, $this->storage->getProcessingFolder()->getIdentifier()) === 0) {
147 continue;
148 }
149 // Get the modification time for file-identifier from the storage
150 $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, array('mtime'));
151 // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
152 $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
153
154 if ($indexRecord !== FALSE) {
155 $this->identifiedFileUids[] = $indexRecord['uid'];
156
157 if ($indexRecord['modification_date'] < $modificationTime['mtime'] || $indexRecord['missing']) {
158 $this->filesToUpdate[$fileIdentifier] = $indexRecord;
159 }
160 } else {
161 $this->filesToUpdate[$fileIdentifier] = NULL;
162 }
163 }
164 }
165
166 /**
167 * Processes the Files which have been detected as "changed or new"
168 * in the storage
169 *
170 * @return void
171 */
172 protected function processChangedAndNewFiles() {
173 foreach ($this->filesToUpdate AS $identifier => $data) {
174 if ($data == NULL) {
175 // search for files with same content hash in indexed storage
176 $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
177 $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
178 $fileObject = NULL;
179 if (!empty($files)) {
180 foreach ($files as $fileIndexEntry) {
181 // check if file is missing then we assume it's moved/renamed
182 if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
183 $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
184 $fileObject->updateProperties(array(
185 'identifier' => $identifier
186 ));
187 $this->updateIndexEntry($fileObject);
188 $this->identifiedFileUids[] = $fileObject->getUid();
189 break;
190 }
191 }
192 }
193 // create new index when no missing file with same content hash is found
194 if ($fileObject === NULL) {
195 $fileObject = $this->createIndexEntry($identifier);
196 $this->identifiedFileUids[] = $fileObject->getUid();
197 }
198 } else {
199 // update existing file
200 $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
201 $this->updateIndexEntry($fileObject);
202 }
203 }
204 }
205
206 /**
207 * Since the core desperately needs image sizes in metadata table put them there
208 * This should be called after every "content" update and "record" creation
209 *
210 * @param File $fileObject
211 */
212 protected function extractRequiredMetaData(File $fileObject) {
213 // since the core desperately needs image sizes in metadata table do this manually
214 // prevent doing this for remote storages, remote storages must provide the data with extractors
215 if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
216 $rawFileLocation = $fileObject->getForLocalProcessing(FALSE);
217 $metaData = array();
218 list($metaData['width'], $metaData['height']) = getimagesize($rawFileLocation);
219 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
220 $fileObject->_updateMetaDataProperties($metaData);
221 }
222 }
223
224 /****************************
225 *
226 * UTILITY
227 *
228 ****************************/
229
230 /**
231 * Collects the information to be cached in sys_file
232 *
233 * @param string $identifier
234 * @return array
235 */
236 protected function gatherFileInformationArray($identifier) {
237 $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
238 $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
239 $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
240 $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
241 $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
242 $fileInfo['missing'] = 0;
243
244 return $fileInfo;
245 }
246
247 /**
248 * Maps the mimetype to a sys_file table type
249 *
250 * @param string $mimeType
251 * @return string
252 */
253 protected function getFileType($mimeType) {
254 list($fileType) = explode('/', $mimeType);
255 switch (strtolower($fileType)) {
256 case 'text':
257 $type = File::FILETYPE_TEXT;
258 break;
259 case 'image':
260 $type = File::FILETYPE_IMAGE;
261 break;
262 case 'audio':
263 $type = File::FILETYPE_AUDIO;
264 break;
265 case 'video':
266 $type = File::FILETYPE_VIDEO;
267 break;
268 case 'application':
269 case 'software':
270 $type = File::FILETYPE_APPLICATION;
271 break;
272 default:
273 $type = File::FILETYPE_UNKNOWN;
274 }
275 return $type;
276 }
277
278 /**
279 * However it happened, the properties of a file object which
280 * are persisted to the database are named different than the
281 * properties the driver returns in getFileInfo.
282 * Therefore a mapping must happen.
283 *
284 * @param array $fileInfo
285 *
286 * @return array
287 */
288 protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo) {
289 $mappingInfo = array(
290 // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
291 'size' => 'size',
292 'atime' => NULL,
293 'mtime' => 'modification_date',
294 'ctime' => 'creation_date',
295 'mimetype' => 'mime_type'
296 );
297 $mappedFileInfo = array();
298 foreach ($fileInfo as $key => $value) {
299 if (array_key_exists($key, $mappingInfo)) {
300 if ($mappingInfo[$key] !== NULL) {
301 $mappedFileInfo[$mappingInfo[$key]] = $value;
302 }
303 } else {
304 $mappedFileInfo[$key] = $value;
305 }
306 }
307 return $mappedFileInfo;
308 }
309
310
311 /**
312 * Returns an instance of the FileIndexRepository
313 *
314 * @return FileIndexRepository
315 */
316 protected function getFileIndexRepository() {
317 return FileIndexRepository::getInstance();
318 }
319
320 /**
321 * Returns an instance of the FileIndexRepository
322 *
323 * @return MetaDataRepository
324 */
325 protected function getMetaDataRepository() {
326 return MetaDataRepository::getInstance();
327 }
328
329 /**
330 * Returns the ResourceFactory
331 *
332 * @return \TYPO3\CMS\Core\Resource\ResourceFactory
333 */
334 protected function getResourceFactory() {
335 return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
336 }
337
338 /**
339 * Returns an instance of the FileIndexRepository
340 *
341 * @return ExtractorRegistry
342 */
343 protected function getExtractorRegistry() {
344 return ExtractorRegistry::getInstance();
345 }
346
347 }