[TASK] Removes extra empty lines
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Resource / Index / Indexer.php
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Core\Resource\File;
18 use TYPO3\CMS\Core\Resource\ResourceStorage;
19 use TYPO3\CMS\Core\Type\File\ImageInfo;
20 use TYPO3\CMS\Core\Utility\GeneralUtility;
21
22 /**
23 * The New FAL Indexer
24 */
25 class Indexer
26 {
27 /**
28 * @var array
29 */
30 protected $filesToUpdate = array();
31
32 /**
33 * @var int[]
34 */
35 protected $identifiedFileUids = array();
36
37 /**
38 * @var ResourceStorage
39 */
40 protected $storage = null;
41
42 /**
43 * @var ExtractorInterface[]
44 */
45 protected $extractionServices = null;
46
47 /**
48 * @param ResourceStorage $storage
49 */
50 public function __construct(ResourceStorage $storage)
51 {
52 $this->storage = $storage;
53 }
54
55 /**
56 * Create index entry
57 *
58 * @param string $identifier
59 * @return File
60 * @throws \InvalidArgumentException
61 */
62 public function createIndexEntry($identifier)
63 {
64 if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
65 throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
66 }
67 $fileProperties = $this->gatherFileInformationArray($identifier);
68 $record = $this->getFileIndexRepository()->addRaw($fileProperties);
69 $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
70 $this->extractRequiredMetaData($fileObject);
71 return $fileObject;
72 }
73
74 /**
75 * Update index entry
76 *
77 * @param File $fileObject
78 * @return void
79 */
80 public function updateIndexEntry(File $fileObject)
81 {
82 $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
83 $fileObject->updateProperties($updatedInformation);
84 $this->getFileIndexRepository()->update($fileObject);
85 $this->extractRequiredMetaData($fileObject);
86 }
87
88 /**
89 * @return void
90 */
91 public function processChangesInStorages()
92 {
93 // get all file-identifiers from the storage
94 $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder()->getIdentifier(), true, true);
95 $this->detectChangedFilesInStorage($availableFiles);
96 $this->processChangedAndNewFiles();
97
98 $this->detectMissingFiles();
99 }
100
101 /**
102 * @param int $maximumFileCount
103 * @return void
104 */
105 public function runMetaDataExtraction($maximumFileCount = -1)
106 {
107 $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
108 foreach ($fileIndexRecords as $indexRecord) {
109 $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
110 $this->extractMetaData($fileObject);
111 }
112 }
113
114 /**
115 * Extract metadata for given fileObject
116 *
117 * @param File $fileObject
118 */
119 public function extractMetaData(File $fileObject)
120 {
121 $newMetaData = array(
122 0 => $fileObject->_getMetaData()
123 );
124
125 // Loop through available extractors and fetch metadata for the given file.
126 foreach ($this->getExtractionServices() as $service) {
127 if ($this->isFileTypeSupportedByExtractor($fileObject, $service) && $service->canProcess($fileObject)) {
128 $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
129 }
130 }
131
132 // Sort metadata by priority so that merging happens in order of precedence.
133 ksort($newMetaData);
134
135 // Merge the collected metadata.
136 $metaData = array();
137 foreach ($newMetaData as $data) {
138 $metaData = array_merge($metaData, $data);
139 }
140 $fileObject->_updateMetaDataProperties($metaData);
141 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
142 $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
143 }
144
145 /**
146 * Get available extraction services
147 *
148 * @return ExtractorInterface[]
149 */
150 protected function getExtractionServices()
151 {
152 if ($this->extractionServices === null) {
153 $this->extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
154 }
155 return $this->extractionServices;
156 }
157
158 /**
159 * Since by now all files in filesystem have been looked at it is save to assume,
160 * that files that are in indexed but not touched in this run are missing
161 */
162 protected function detectMissingFiles()
163 {
164 $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
165 $this->storage,
166 $this->identifiedFileUids
167 );
168
169 foreach ($indexedNotExistentFiles as $record) {
170 if (!$this->storage->hasFile($record['identifier'])) {
171 $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
172 }
173 }
174 }
175
176 /**
177 * Check whether the extractor service supports this file according to file type restrictions.
178 *
179 * @param File $file
180 * @param ExtractorInterface $extractor
181 * @return bool
182 */
183 protected function isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
184 {
185 $isSupported = true;
186 $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
187 if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
188 $isSupported = false;
189 }
190 return $isSupported;
191 }
192
193 /**
194 * Adds updated files to the processing queue
195 *
196 * @param array $fileIdentifierArray
197 * @return void
198 */
199 protected function detectChangedFilesInStorage(array $fileIdentifierArray)
200 {
201 foreach ($fileIdentifierArray as $fileIdentifier) {
202 // skip processed files
203 if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
204 continue;
205 }
206 // Get the modification time for file-identifier from the storage
207 $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, array('mtime'));
208 // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
209 $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
210
211 if ($indexRecord !== false) {
212 $this->identifiedFileUids[] = $indexRecord['uid'];
213
214 if ($indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
215 $this->filesToUpdate[$fileIdentifier] = $indexRecord;
216 }
217 } else {
218 $this->filesToUpdate[$fileIdentifier] = null;
219 }
220 }
221 }
222
223 /**
224 * Processes the Files which have been detected as "changed or new"
225 * in the storage
226 *
227 * @return void
228 */
229 protected function processChangedAndNewFiles()
230 {
231 foreach ($this->filesToUpdate as $identifier => $data) {
232 if ($data == null) {
233 // search for files with same content hash in indexed storage
234 $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
235 $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
236 $fileObject = null;
237 if (!empty($files)) {
238 foreach ($files as $fileIndexEntry) {
239 // check if file is missing then we assume it's moved/renamed
240 if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
241 $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
242 $fileObject->updateProperties(array(
243 'identifier' => $identifier
244 ));
245 $this->updateIndexEntry($fileObject);
246 $this->identifiedFileUids[] = $fileObject->getUid();
247 break;
248 }
249 }
250 }
251 // create new index when no missing file with same content hash is found
252 if ($fileObject === null) {
253 $fileObject = $this->createIndexEntry($identifier);
254 $this->identifiedFileUids[] = $fileObject->getUid();
255 }
256 } else {
257 // update existing file
258 $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
259 $this->updateIndexEntry($fileObject);
260 }
261 }
262 }
263
264 /**
265 * Since the core desperately needs image sizes in metadata table put them there
266 * This should be called after every "content" update and "record" creation
267 *
268 * @param File $fileObject
269 */
270 protected function extractRequiredMetaData(File $fileObject)
271 {
272 // since the core desperately needs image sizes in metadata table do this manually
273 // prevent doing this for remote storages, remote storages must provide the data with extractors
274 if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
275 $rawFileLocation = $fileObject->getForLocalProcessing(false);
276 $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
277 $metaData = array(
278 'width' => $imageInfo->getWidth(),
279 'height' => $imageInfo->getHeight(),
280 );
281 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
282 $fileObject->_updateMetaDataProperties($metaData);
283 }
284 }
285
286 /****************************
287 *
288 * UTILITY
289 *
290 ****************************/
291
292 /**
293 * Collects the information to be cached in sys_file
294 *
295 * @param string $identifier
296 * @return array
297 */
298 protected function gatherFileInformationArray($identifier)
299 {
300 $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
301 $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
302 $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
303 $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
304 $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
305 $fileInfo['missing'] = 0;
306
307 return $fileInfo;
308 }
309
310 /**
311 * Maps the mimetype to a sys_file table type
312 *
313 * @param string $mimeType
314 * @return string
315 */
316 protected function getFileType($mimeType)
317 {
318 list($fileType) = explode('/', $mimeType);
319 switch (strtolower($fileType)) {
320 case 'text':
321 $type = File::FILETYPE_TEXT;
322 break;
323 case 'image':
324 $type = File::FILETYPE_IMAGE;
325 break;
326 case 'audio':
327 $type = File::FILETYPE_AUDIO;
328 break;
329 case 'video':
330 $type = File::FILETYPE_VIDEO;
331 break;
332 case 'application':
333 case 'software':
334 $type = File::FILETYPE_APPLICATION;
335 break;
336 default:
337 $type = File::FILETYPE_UNKNOWN;
338 }
339 return $type;
340 }
341
342 /**
343 * However it happened, the properties of a file object which
344 * are persisted to the database are named different than the
345 * properties the driver returns in getFileInfo.
346 * Therefore a mapping must happen.
347 *
348 * @param array $fileInfo
349 *
350 * @return array
351 */
352 protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
353 {
354 $mappingInfo = array(
355 // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
356 'size' => 'size',
357 'atime' => null,
358 'mtime' => 'modification_date',
359 'ctime' => 'creation_date',
360 'mimetype' => 'mime_type'
361 );
362 $mappedFileInfo = array();
363 foreach ($fileInfo as $key => $value) {
364 if (array_key_exists($key, $mappingInfo)) {
365 if ($mappingInfo[$key] !== null) {
366 $mappedFileInfo[$mappingInfo[$key]] = $value;
367 }
368 } else {
369 $mappedFileInfo[$key] = $value;
370 }
371 }
372 return $mappedFileInfo;
373 }
374
375 /**
376 * Returns an instance of the FileIndexRepository
377 *
378 * @return FileIndexRepository
379 */
380 protected function getFileIndexRepository()
381 {
382 return FileIndexRepository::getInstance();
383 }
384
385 /**
386 * Returns an instance of the FileIndexRepository
387 *
388 * @return MetaDataRepository
389 */
390 protected function getMetaDataRepository()
391 {
392 return MetaDataRepository::getInstance();
393 }
394
395 /**
396 * Returns the ResourceFactory
397 *
398 * @return \TYPO3\CMS\Core\Resource\ResourceFactory
399 */
400 protected function getResourceFactory()
401 {
402 return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
403 }
404
405 /**
406 * Returns an instance of the FileIndexRepository
407 *
408 * @return ExtractorRegistry
409 */
410 protected function getExtractorRegistry()
411 {
412 return ExtractorRegistry::getInstance();
413 }
414 }