7b631f5cbae31e87d0b9dbce6139043d6c924899
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Resource / Index / Indexer.php
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Core\Resource\File;
18 use TYPO3\CMS\Core\Resource\ResourceFactory;
19 use TYPO3\CMS\Core\Resource\ResourceStorage;
20 use TYPO3\CMS\Core\Type\File\ImageInfo;
21 use TYPO3\CMS\Core\Utility\GeneralUtility;
22 use TYPO3\CMS\Core\Utility\PathUtility;
23
24 /**
25 * The New FAL Indexer
26 */
27 class Indexer
28 {
29 /**
30 * @var array
31 */
32 protected $filesToUpdate = [];
33
34 /**
35 * @var int[]
36 */
37 protected $identifiedFileUids = [];
38
39 /**
40 * @var ResourceStorage
41 */
42 protected $storage = null;
43
44 /**
45 * @var ExtractorInterface[]
46 */
47 protected $extractionServices = null;
48
49 /**
50 * @param ResourceStorage $storage
51 */
52 public function __construct(ResourceStorage $storage)
53 {
54 $this->storage = $storage;
55 }
56
57 /**
58 * Create index entry
59 *
60 * @param string $identifier
61 * @return File
62 * @throws \InvalidArgumentException
63 */
64 public function createIndexEntry($identifier)
65 {
66 if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
67 throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
68 }
69 $fileProperties = $this->gatherFileInformationArray($identifier);
70 $record = $this->getFileIndexRepository()->addRaw($fileProperties);
71 $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
72 $this->extractRequiredMetaData($fileObject);
73 return $fileObject;
74 }
75
76 /**
77 * Update index entry
78 *
79 * @param File $fileObject
80 */
81 public function updateIndexEntry(File $fileObject)
82 {
83 $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
84 $fileObject->updateProperties($updatedInformation);
85 $this->getFileIndexRepository()->update($fileObject);
86 $this->extractRequiredMetaData($fileObject);
87 }
88
89 /**
90 */
91 public function processChangesInStorages()
92 {
93 // get all file-identifiers from the storage
94 $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
95 $this->detectChangedFilesInStorage($availableFiles);
96 $this->processChangedAndNewFiles();
97
98 $this->detectMissingFiles();
99 }
100
101 /**
102 * @param int $maximumFileCount
103 */
104 public function runMetaDataExtraction($maximumFileCount = -1)
105 {
106 $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
107 foreach ($fileIndexRecords as $indexRecord) {
108 $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
109 $this->extractMetaData($fileObject);
110 }
111 }
112
113 /**
114 * Extract metadata for given fileObject
115 *
116 * @param File $fileObject
117 */
118 public function extractMetaData(File $fileObject)
119 {
120 $newMetaData = [
121 0 => $fileObject->_getMetaData()
122 ];
123
124 // Loop through available extractors and fetch metadata for the given file.
125 foreach ($this->getExtractionServices() as $service) {
126 if ($this->isFileTypeSupportedByExtractor($fileObject, $service) && $service->canProcess($fileObject)) {
127 $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
128 }
129 }
130
131 // Sort metadata by priority so that merging happens in order of precedence.
132 ksort($newMetaData);
133
134 // Merge the collected metadata.
135 $metaData = [];
136 foreach ($newMetaData as $data) {
137 $metaData = array_merge($metaData, $data);
138 }
139 $fileObject->_updateMetaDataProperties($metaData);
140 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
141 $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
142 }
143
144 /**
145 * Get available extraction services
146 *
147 * @return ExtractorInterface[]
148 */
149 protected function getExtractionServices()
150 {
151 if ($this->extractionServices === null) {
152 $this->extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
153 }
154 return $this->extractionServices;
155 }
156
157 /**
158 * Since by now all files in filesystem have been looked at it is save to assume,
159 * that files that are in indexed but not touched in this run are missing
160 */
161 protected function detectMissingFiles()
162 {
163 $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
164 $this->storage,
165 $this->identifiedFileUids
166 );
167
168 foreach ($indexedNotExistentFiles as $record) {
169 if (!$this->storage->hasFile($record['identifier'])) {
170 $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
171 }
172 }
173 }
174
175 /**
176 * Check whether the extractor service supports this file according to file type restrictions.
177 *
178 * @param File $file
179 * @param ExtractorInterface $extractor
180 * @return bool
181 */
182 protected function isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
183 {
184 $isSupported = true;
185 $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
186 if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
187 $isSupported = false;
188 }
189 return $isSupported;
190 }
191
192 /**
193 * Adds updated files to the processing queue
194 *
195 * @param array $fileIdentifierArray
196 */
197 protected function detectChangedFilesInStorage(array $fileIdentifierArray)
198 {
199 foreach ($fileIdentifierArray as $fileIdentifier) {
200 // skip processed files
201 if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
202 continue;
203 }
204 // Get the modification time for file-identifier from the storage
205 $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
206 // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
207 $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
208
209 if ($indexRecord !== false) {
210 $this->identifiedFileUids[] = $indexRecord['uid'];
211
212 if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
213 $this->filesToUpdate[$fileIdentifier] = $indexRecord;
214 }
215 } else {
216 $this->filesToUpdate[$fileIdentifier] = null;
217 }
218 }
219 }
220
221 /**
222 * Processes the Files which have been detected as "changed or new"
223 * in the storage
224 */
225 protected function processChangedAndNewFiles()
226 {
227 foreach ($this->filesToUpdate as $identifier => $data) {
228 if ($data == null) {
229 // search for files with same content hash in indexed storage
230 $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
231 $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
232 $fileObject = null;
233 if (!empty($files)) {
234 foreach ($files as $fileIndexEntry) {
235 // check if file is missing then we assume it's moved/renamed
236 if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
237 $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
238 $fileObject->updateProperties([
239 'identifier' => $identifier
240 ]);
241 $this->updateIndexEntry($fileObject);
242 $this->identifiedFileUids[] = $fileObject->getUid();
243 break;
244 }
245 }
246 }
247 // create new index when no missing file with same content hash is found
248 if ($fileObject === null) {
249 $fileObject = $this->createIndexEntry($identifier);
250 $this->identifiedFileUids[] = $fileObject->getUid();
251 }
252 } else {
253 // update existing file
254 $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
255 $this->updateIndexEntry($fileObject);
256 }
257 }
258 }
259
260 /**
261 * Since the core desperately needs image sizes in metadata table put them there
262 * This should be called after every "content" update and "record" creation
263 *
264 * @param File $fileObject
265 */
266 protected function extractRequiredMetaData(File $fileObject)
267 {
268 // since the core desperately needs image sizes in metadata table do this manually
269 // prevent doing this for remote storages, remote storages must provide the data with extractors
270 if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
271 $rawFileLocation = $fileObject->getForLocalProcessing(false);
272 $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
273 $metaData = [
274 'width' => $imageInfo->getWidth(),
275 'height' => $imageInfo->getHeight(),
276 ];
277 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
278 $fileObject->_updateMetaDataProperties($metaData);
279 }
280 }
281
282 /****************************
283 *
284 * UTILITY
285 *
286 ****************************/
287
288 /**
289 * Collects the information to be cached in sys_file
290 *
291 * @param string $identifier
292 * @return array
293 */
294 protected function gatherFileInformationArray($identifier)
295 {
296 $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
297 $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
298 $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
299 $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
300 $fileInfo['extension'] = PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
301 $fileInfo['missing'] = 0;
302
303 return $fileInfo;
304 }
305
306 /**
307 * Maps the mimetype to a sys_file table type
308 *
309 * @param string $mimeType
310 * @return string
311 */
312 protected function getFileType($mimeType)
313 {
314 list($fileType) = explode('/', $mimeType);
315 switch (strtolower($fileType)) {
316 case 'text':
317 $type = File::FILETYPE_TEXT;
318 break;
319 case 'image':
320 $type = File::FILETYPE_IMAGE;
321 break;
322 case 'audio':
323 $type = File::FILETYPE_AUDIO;
324 break;
325 case 'video':
326 $type = File::FILETYPE_VIDEO;
327 break;
328 case 'application':
329 case 'software':
330 $type = File::FILETYPE_APPLICATION;
331 break;
332 default:
333 $type = File::FILETYPE_UNKNOWN;
334 }
335 return $type;
336 }
337
338 /**
339 * However it happened, the properties of a file object which
340 * are persisted to the database are named different than the
341 * properties the driver returns in getFileInfo.
342 * Therefore a mapping must happen.
343 *
344 * @param array $fileInfo
345 *
346 * @return array
347 */
348 protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
349 {
350 $mappingInfo = [
351 // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
352 'size' => 'size',
353 'atime' => null,
354 'mtime' => 'modification_date',
355 'ctime' => 'creation_date',
356 'mimetype' => 'mime_type'
357 ];
358 $mappedFileInfo = [];
359 foreach ($fileInfo as $key => $value) {
360 if (array_key_exists($key, $mappingInfo)) {
361 if ($mappingInfo[$key] !== null) {
362 $mappedFileInfo[$mappingInfo[$key]] = $value;
363 }
364 } else {
365 $mappedFileInfo[$key] = $value;
366 }
367 }
368 return $mappedFileInfo;
369 }
370
371 /**
372 * Returns an instance of the FileIndexRepository
373 *
374 * @return FileIndexRepository
375 */
376 protected function getFileIndexRepository()
377 {
378 return FileIndexRepository::getInstance();
379 }
380
381 /**
382 * Returns an instance of the FileIndexRepository
383 *
384 * @return MetaDataRepository
385 */
386 protected function getMetaDataRepository()
387 {
388 return MetaDataRepository::getInstance();
389 }
390
391 /**
392 * Returns the ResourceFactory
393 *
394 * @return ResourceFactory
395 */
396 protected function getResourceFactory()
397 {
398 return ResourceFactory::getInstance();
399 }
400
401 /**
402 * Returns an instance of the FileIndexRepository
403 *
404 * @return ExtractorRegistry
405 */
406 protected function getExtractorRegistry()
407 {
408 return ExtractorRegistry::getInstance();
409 }
410 }