b52b5346008f5bdb2753a16c3bcec80e6a40cbf0
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Resource / Index / Indexer.php
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Core\Resource\ResourceStorage;
18 use TYPO3\CMS\Core\Resource\File;
19 use TYPO3\CMS\Core\Type\File\ImageInfo;
20 use TYPO3\CMS\Core\Utility\GeneralUtility;
21
22 /**
23 * The New FAL Indexer
24 */
25 class Indexer {
26
27 /**
28 * @var array
29 */
30 protected $filesToUpdate = array();
31
32 /**
33 * @var int[]
34 */
35 protected $identifiedFileUids = array();
36
37 /**
38 * @var ResourceStorage
39 */
40 protected $storage = NULL;
41
42 /**
43 * @param ResourceStorage $storage
44 */
45 public function __construct(ResourceStorage $storage) {
46 $this->storage = $storage;
47 }
48
49 /**
50 * Create index entry
51 *
52 * @param string $identifier
53 * @return File
54 * @throws \InvalidArgumentException
55 */
56 public function createIndexEntry($identifier) {
57 if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
58 throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
59 }
60 $fileProperties = $this->gatherFileInformationArray($identifier);
61 $record = $this->getFileIndexRepository()->addRaw($fileProperties);
62 $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
63 $this->extractRequiredMetaData($fileObject);
64 return $fileObject;
65 }
66
67 /**
68 * Update index entry
69 *
70 * @param File $fileObject
71 * @return void
72 */
73 public function updateIndexEntry(File $fileObject) {
74 $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
75 $fileObject->updateProperties($updatedInformation);
76 $this->getFileIndexRepository()->update($fileObject);
77 $this->extractRequiredMetaData($fileObject);
78 }
79
80 /**
81 * @return void
82 */
83 public function processChangesInStorages() {
84 // get all file-identifiers from the storage
85 $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder()->getIdentifier(), TRUE, TRUE);
86 $this->detectChangedFilesInStorage($availableFiles);
87 $this->processChangedAndNewFiles();
88
89 $this->detectMissingFiles();
90 }
91
92 /**
93 * @param int $maximumFileCount
94 * @return void
95 */
96 public function runMetaDataExtraction($maximumFileCount = -1) {
97 $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
98
99 $extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
100 foreach ($fileIndexRecords as $indexRecord) {
101 $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
102
103 $newMetaData = array(
104 0 => $fileObject->_getMetaData()
105 );
106 foreach ($extractionServices as $service) {
107 if ($service->canProcess($fileObject)) {
108 $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
109 }
110 }
111 ksort($newMetaData);
112 $metaData = array();
113 foreach ($newMetaData as $data) {
114 $metaData = array_merge($metaData, $data);
115 }
116 $fileObject->_updateMetaDataProperties($metaData);
117 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
118 $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
119 }
120 }
121
122 /**
123 * Since by now all files in filesystem have been looked at it is save to assume,
124 * that files that are in indexed but not touched in this run are missing
125 */
126 protected function detectMissingFiles() {
127 $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
128 $this->storage,
129 $this->identifiedFileUids
130 );
131
132 foreach ($indexedNotExistentFiles as $record) {
133 if (!$this->storage->hasFile($record['identifier'])) {
134 $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
135 }
136 }
137 }
138
139 /**
140 * Adds updated files to the processing queue
141 *
142 * @param array $fileIdentifierArray
143 * @return void
144 */
145 protected function detectChangedFilesInStorage(array $fileIdentifierArray) {
146 foreach ($fileIdentifierArray as $fileIdentifier) {
147 // skip processed files
148 if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
149 continue;
150 }
151 // Get the modification time for file-identifier from the storage
152 $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, array('mtime'));
153 // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
154 $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
155
156 if ($indexRecord !== FALSE) {
157 $this->identifiedFileUids[] = $indexRecord['uid'];
158
159 if ($indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
160 $this->filesToUpdate[$fileIdentifier] = $indexRecord;
161 }
162 } else {
163 $this->filesToUpdate[$fileIdentifier] = NULL;
164 }
165 }
166 }
167
168 /**
169 * Processes the Files which have been detected as "changed or new"
170 * in the storage
171 *
172 * @return void
173 */
174 protected function processChangedAndNewFiles() {
175 foreach ($this->filesToUpdate AS $identifier => $data) {
176 if ($data == NULL) {
177 // search for files with same content hash in indexed storage
178 $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
179 $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
180 $fileObject = NULL;
181 if (!empty($files)) {
182 foreach ($files as $fileIndexEntry) {
183 // check if file is missing then we assume it's moved/renamed
184 if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
185 $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
186 $fileObject->updateProperties(array(
187 'identifier' => $identifier
188 ));
189 $this->updateIndexEntry($fileObject);
190 $this->identifiedFileUids[] = $fileObject->getUid();
191 break;
192 }
193 }
194 }
195 // create new index when no missing file with same content hash is found
196 if ($fileObject === NULL) {
197 $fileObject = $this->createIndexEntry($identifier);
198 $this->identifiedFileUids[] = $fileObject->getUid();
199 }
200 } else {
201 // update existing file
202 $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
203 $this->updateIndexEntry($fileObject);
204 }
205 }
206 }
207
208 /**
209 * Since the core desperately needs image sizes in metadata table put them there
210 * This should be called after every "content" update and "record" creation
211 *
212 * @param File $fileObject
213 */
214 protected function extractRequiredMetaData(File $fileObject) {
215 // since the core desperately needs image sizes in metadata table do this manually
216 // prevent doing this for remote storages, remote storages must provide the data with extractors
217 if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
218 $rawFileLocation = $fileObject->getForLocalProcessing(FALSE);
219 $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
220 $metaData = array(
221 'width' => $imageInfo->getWidth(),
222 'height' => $imageInfo->getHeight(),
223 );
224 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
225 $fileObject->_updateMetaDataProperties($metaData);
226 }
227 }
228
229 /****************************
230 *
231 * UTILITY
232 *
233 ****************************/
234
235 /**
236 * Collects the information to be cached in sys_file
237 *
238 * @param string $identifier
239 * @return array
240 */
241 protected function gatherFileInformationArray($identifier) {
242 $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
243 $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
244 $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
245 $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
246 $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
247 $fileInfo['missing'] = 0;
248
249 return $fileInfo;
250 }
251
252 /**
253 * Maps the mimetype to a sys_file table type
254 *
255 * @param string $mimeType
256 * @return string
257 */
258 protected function getFileType($mimeType) {
259 list($fileType) = explode('/', $mimeType);
260 switch (strtolower($fileType)) {
261 case 'text':
262 $type = File::FILETYPE_TEXT;
263 break;
264 case 'image':
265 $type = File::FILETYPE_IMAGE;
266 break;
267 case 'audio':
268 $type = File::FILETYPE_AUDIO;
269 break;
270 case 'video':
271 $type = File::FILETYPE_VIDEO;
272 break;
273 case 'application':
274 case 'software':
275 $type = File::FILETYPE_APPLICATION;
276 break;
277 default:
278 $type = File::FILETYPE_UNKNOWN;
279 }
280 return $type;
281 }
282
283 /**
284 * However it happened, the properties of a file object which
285 * are persisted to the database are named different than the
286 * properties the driver returns in getFileInfo.
287 * Therefore a mapping must happen.
288 *
289 * @param array $fileInfo
290 *
291 * @return array
292 */
293 protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo) {
294 $mappingInfo = array(
295 // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
296 'size' => 'size',
297 'atime' => NULL,
298 'mtime' => 'modification_date',
299 'ctime' => 'creation_date',
300 'mimetype' => 'mime_type'
301 );
302 $mappedFileInfo = array();
303 foreach ($fileInfo as $key => $value) {
304 if (array_key_exists($key, $mappingInfo)) {
305 if ($mappingInfo[$key] !== NULL) {
306 $mappedFileInfo[$mappingInfo[$key]] = $value;
307 }
308 } else {
309 $mappedFileInfo[$key] = $value;
310 }
311 }
312 return $mappedFileInfo;
313 }
314
315
316 /**
317 * Returns an instance of the FileIndexRepository
318 *
319 * @return FileIndexRepository
320 */
321 protected function getFileIndexRepository() {
322 return FileIndexRepository::getInstance();
323 }
324
325 /**
326 * Returns an instance of the FileIndexRepository
327 *
328 * @return MetaDataRepository
329 */
330 protected function getMetaDataRepository() {
331 return MetaDataRepository::getInstance();
332 }
333
334 /**
335 * Returns the ResourceFactory
336 *
337 * @return \TYPO3\CMS\Core\Resource\ResourceFactory
338 */
339 protected function getResourceFactory() {
340 return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
341 }
342
343 /**
344 * Returns an instance of the FileIndexRepository
345 *
346 * @return ExtractorRegistry
347 */
348 protected function getExtractorRegistry() {
349 return ExtractorRegistry::getInstance();
350 }
351
352 }