[!!!][TASK] Remove deprecated code in EXT:core
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Resource / Index / Indexer.php
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Core\Resource\Exception\IllegalFileExtensionException;
18 use TYPO3\CMS\Core\Resource\Exception\InsufficientFileAccessPermissionsException;
19 use TYPO3\CMS\Core\Resource\File;
20 use TYPO3\CMS\Core\Resource\ResourceFactory;
21 use TYPO3\CMS\Core\Resource\ResourceStorage;
22 use TYPO3\CMS\Core\Type\File\ImageInfo;
23 use TYPO3\CMS\Core\Utility\GeneralUtility;
24
25 /**
26 * The New FAL Indexer
27 */
28 class Indexer
29 {
30 /**
31 * @var array
32 */
33 protected $filesToUpdate = [];
34
35 /**
36 * @var int[]
37 */
38 protected $identifiedFileUids = [];
39
40 /**
41 * @var ResourceStorage
42 */
43 protected $storage;
44
45 /**
46 * @var ExtractorInterface[]
47 */
48 protected $extractionServices;
49
50 /**
51 * @param ResourceStorage $storage
52 */
53 public function __construct(ResourceStorage $storage)
54 {
55 $this->storage = $storage;
56 }
57
58 /**
59 * Create index entry
60 *
61 * @param string $identifier
62 * @return File
63 * @throws \InvalidArgumentException
64 */
65 public function createIndexEntry($identifier)
66 {
67 if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
68 throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
69 }
70 $fileProperties = $this->gatherFileInformationArray($identifier);
71 $record = $this->getFileIndexRepository()->addRaw($fileProperties);
72 $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
73 $this->extractRequiredMetaData($fileObject);
74
75 if ($this->storage->autoExtractMetadataEnabled()) {
76 $this->extractMetaData($fileObject);
77 }
78
79 return $fileObject;
80 }
81
82 /**
83 * Update index entry
84 *
85 * @param File $fileObject
86 */
87 public function updateIndexEntry(File $fileObject)
88 {
89 $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
90 $fileObject->updateProperties($updatedInformation);
91 $this->getFileIndexRepository()->update($fileObject);
92 $this->extractRequiredMetaData($fileObject);
93 }
94
95 /**
96 */
97 public function processChangesInStorages()
98 {
99 // get all file-identifiers from the storage
100 $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
101 $this->detectChangedFilesInStorage($availableFiles);
102 $this->processChangedAndNewFiles();
103
104 $this->detectMissingFiles();
105 }
106
107 /**
108 * @param int $maximumFileCount
109 */
110 public function runMetaDataExtraction($maximumFileCount = -1)
111 {
112 $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
113 foreach ($fileIndexRecords as $indexRecord) {
114 $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
115 // Check for existence of file before extraction
116 if ($fileObject->exists()) {
117 try {
118 $this->extractMetaData($fileObject);
119 } catch (InsufficientFileAccessPermissionsException $e) {
120 // We skip files that are not accessible
121 } catch (IllegalFileExtensionException $e) {
122 // We skip files that have an extension that we don't allow
123 }
124 } else {
125 // Mark file as missing and continue with next record
126 $this->getFileIndexRepository()->markFileAsMissing($indexRecord['uid']);
127 }
128 }
129 }
130
131 /**
132 * Extract metadata for given fileObject
133 *
134 * @param File $fileObject
135 */
136 public function extractMetaData(File $fileObject)
137 {
138 $newMetaData = [
139 0 => $fileObject->_getMetaData()
140 ];
141
142 // Loop through available extractors and fetch metadata for the given file.
143 foreach ($this->getExtractionServices() as $service) {
144 if ($this->isFileTypeSupportedByExtractor($fileObject, $service) && $service->canProcess($fileObject)) {
145 $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
146 }
147 }
148
149 // Sort metadata by priority so that merging happens in order of precedence.
150 ksort($newMetaData);
151
152 // Merge the collected metadata.
153 $metaData = [];
154 foreach ($newMetaData as $data) {
155 $metaData = array_merge($metaData, $data);
156 }
157 $fileObject->_updateMetaDataProperties($metaData);
158 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
159 $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
160 }
161
162 /**
163 * Get available extraction services
164 *
165 * @return ExtractorInterface[]
166 */
167 protected function getExtractionServices()
168 {
169 if ($this->extractionServices === null) {
170 $this->extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
171 }
172 return $this->extractionServices;
173 }
174
175 /**
176 * Since by now all files in filesystem have been looked at it is save to assume,
177 * that files that are in indexed but not touched in this run are missing
178 */
179 protected function detectMissingFiles()
180 {
181 $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
182 $this->storage,
183 $this->identifiedFileUids
184 );
185
186 foreach ($indexedNotExistentFiles as $record) {
187 if (!$this->storage->hasFile($record['identifier'])) {
188 $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
189 }
190 }
191 }
192
193 /**
194 * Check whether the extractor service supports this file according to file type restrictions.
195 *
196 * @param File $file
197 * @param ExtractorInterface $extractor
198 * @return bool
199 */
200 protected function isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
201 {
202 $isSupported = true;
203 $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
204 if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
205 $isSupported = false;
206 }
207 return $isSupported;
208 }
209
210 /**
211 * Adds updated files to the processing queue
212 *
213 * @param array $fileIdentifierArray
214 */
215 protected function detectChangedFilesInStorage(array $fileIdentifierArray)
216 {
217 foreach ($fileIdentifierArray as $fileIdentifier) {
218 // skip processed files
219 if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
220 continue;
221 }
222 // Get the modification time for file-identifier from the storage
223 $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
224 // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
225 $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
226
227 if ($indexRecord !== false) {
228 $this->identifiedFileUids[] = $indexRecord['uid'];
229
230 if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
231 $this->filesToUpdate[$fileIdentifier] = $indexRecord;
232 }
233 } else {
234 $this->filesToUpdate[$fileIdentifier] = null;
235 }
236 }
237 }
238
239 /**
240 * Processes the Files which have been detected as "changed or new"
241 * in the storage
242 */
243 protected function processChangedAndNewFiles()
244 {
245 foreach ($this->filesToUpdate as $identifier => $data) {
246 if ($data == null) {
247 // search for files with same content hash in indexed storage
248 $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
249 $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
250 $fileObject = null;
251 if (!empty($files)) {
252 foreach ($files as $fileIndexEntry) {
253 // check if file is missing then we assume it's moved/renamed
254 if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
255 $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
256 $fileObject->updateProperties([
257 'identifier' => $identifier
258 ]);
259 $this->updateIndexEntry($fileObject);
260 $this->identifiedFileUids[] = $fileObject->getUid();
261 break;
262 }
263 }
264 }
265 // create new index when no missing file with same content hash is found
266 if ($fileObject === null) {
267 $fileObject = $this->createIndexEntry($identifier);
268 $this->identifiedFileUids[] = $fileObject->getUid();
269 }
270 } else {
271 // update existing file
272 $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
273 $this->updateIndexEntry($fileObject);
274 }
275 }
276 }
277
278 /**
279 * Since the core desperately needs image sizes in metadata table put them there
280 * This should be called after every "content" update and "record" creation
281 *
282 * @param File $fileObject
283 */
284 protected function extractRequiredMetaData(File $fileObject)
285 {
286 // since the core desperately needs image sizes in metadata table do this manually
287 // prevent doing this for remote storages, remote storages must provide the data with extractors
288 if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
289 $rawFileLocation = $fileObject->getForLocalProcessing(false);
290 $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
291 $metaData = [
292 'width' => $imageInfo->getWidth(),
293 'height' => $imageInfo->getHeight(),
294 ];
295 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
296 $fileObject->_updateMetaDataProperties($metaData);
297 }
298 }
299
300 /****************************
301 *
302 * UTILITY
303 *
304 ****************************/
305
306 /**
307 * Collects the information to be cached in sys_file
308 *
309 * @param string $identifier
310 * @return array
311 */
312 protected function gatherFileInformationArray($identifier)
313 {
314 $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
315 $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
316 $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
317 $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
318 $fileInfo['missing'] = 0;
319
320 return $fileInfo;
321 }
322
323 /**
324 * Maps the mimetype to a sys_file table type
325 *
326 * @param string $mimeType
327 * @return string
328 */
329 protected function getFileType($mimeType)
330 {
331 list($fileType) = explode('/', $mimeType);
332 switch (strtolower($fileType)) {
333 case 'text':
334 $type = File::FILETYPE_TEXT;
335 break;
336 case 'image':
337 $type = File::FILETYPE_IMAGE;
338 break;
339 case 'audio':
340 $type = File::FILETYPE_AUDIO;
341 break;
342 case 'video':
343 $type = File::FILETYPE_VIDEO;
344 break;
345 case 'application':
346 case 'software':
347 $type = File::FILETYPE_APPLICATION;
348 break;
349 default:
350 $type = File::FILETYPE_UNKNOWN;
351 }
352 return $type;
353 }
354
355 /**
356 * However it happened, the properties of a file object which
357 * are persisted to the database are named different than the
358 * properties the driver returns in getFileInfo.
359 * Therefore a mapping must happen.
360 *
361 * @param array $fileInfo
362 *
363 * @return array
364 */
365 protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
366 {
367 $mappingInfo = [
368 // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
369 'size' => 'size',
370 'atime' => null,
371 'mtime' => 'modification_date',
372 'ctime' => 'creation_date',
373 'mimetype' => 'mime_type'
374 ];
375 $mappedFileInfo = [];
376 foreach ($fileInfo as $key => $value) {
377 if (array_key_exists($key, $mappingInfo)) {
378 if ($mappingInfo[$key] !== null) {
379 $mappedFileInfo[$mappingInfo[$key]] = $value;
380 }
381 } else {
382 $mappedFileInfo[$key] = $value;
383 }
384 }
385 return $mappedFileInfo;
386 }
387
388 /**
389 * Returns an instance of the FileIndexRepository
390 *
391 * @return FileIndexRepository
392 */
393 protected function getFileIndexRepository()
394 {
395 return FileIndexRepository::getInstance();
396 }
397
398 /**
399 * Returns an instance of the FileIndexRepository
400 *
401 * @return MetaDataRepository
402 */
403 protected function getMetaDataRepository()
404 {
405 return MetaDataRepository::getInstance();
406 }
407
408 /**
409 * Returns the ResourceFactory
410 *
411 * @return ResourceFactory
412 */
413 protected function getResourceFactory()
414 {
415 return ResourceFactory::getInstance();
416 }
417
418 /**
419 * Returns an instance of the FileIndexRepository
420 *
421 * @return ExtractorRegistry
422 */
423 protected function getExtractorRegistry()
424 {
425 return ExtractorRegistry::getInstance();
426 }
427 }