[BUGFIX] Guard FAL API when called with invalid identifiers
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Resource / Index / Indexer.php
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3
4 /***************************************************************
5 * Copyright notice
6 *
7 * (c) 2013 Steffen Ritter <steffen.ritter@typo3.org>
8 * All rights reserved
9 *
10 * This script is part of the TYPO3 project. The TYPO3 project is
11 * free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * The GNU General Public License can be found at
17 * http://www.gnu.org/copyleft/gpl.html.
18 * A copy is found in the text file GPL.txt and important notices to the license
19 * from the author is found in LICENSE.txt distributed with these scripts.
20 *
21 *
22 * This script is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * This copyright notice MUST APPEAR in all copies of the script!
28 ***************************************************************/
29
30 use TYPO3\CMS\Core\Resource\ResourceStorage;
31 use TYPO3\CMS\Core\Resource\File;
32
33 /**
34 * The New FAL Indexer
35 */
36 class Indexer {
37
38 /**
39 * @var array
40 */
41 protected $filesToUpdate = array();
42
43 /**
44 * @var integer[]
45 */
46 protected $identifiedFileUids = array();
47
48 /**
49 * @var ResourceStorage
50 */
51 protected $storage = NULL;
52
53 /**
54 * @param ResourceStorage $storage
55 */
56 public function __construct(ResourceStorage $storage) {
57 $this->storage = $storage;
58 }
59
60 /**
61 * Create index entry
62 *
63 * @param string $identifier
64 * @return File
65 * @throws \InvalidArgumentException
66 */
67 public function createIndexEntry($identifier) {
68 if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
69 throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
70 }
71 $fileProperties = $this->gatherFileInformationArray($identifier);
72 $record = $this->getFileIndexRepository()->addRaw($fileProperties);
73 $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
74 $this->extractRequiredMetaData($fileObject);
75 return $fileObject;
76 }
77
78 /**
79 * Update index entry
80 *
81 * @param File $fileObject
82 * @return void
83 */
84 public function updateIndexEntry(File $fileObject) {
85 $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
86 $fileObject->updateProperties($updatedInformation);
87 $this->getFileIndexRepository()->update($fileObject);
88 $this->extractRequiredMetaData($fileObject);
89 }
90
91 /**
92 * @return void
93 */
94 public function processChangesInStorages() {
95 // get all file-identifiers from the storage
96 $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder()->getIdentifier(), TRUE, TRUE);
97 $this->detectChangedFilesInStorage($availableFiles);
98 $this->processChangedAndNewFiles();
99
100 $this->detectMissingFiles();
101 }
102
103 /**
104 * @param integer $maximumFileCount
105 * @return void
106 */
107 public function runMetaDataExtraction($maximumFileCount = -1) {
108 $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
109
110 $extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
111 foreach ($fileIndexRecords as $indexRecord) {
112 $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
113
114 $newMetaData = array(
115 0 => $fileObject->_getMetaData()
116 );
117 foreach ($extractionServices as $service) {
118 if ($service->canProcess($fileObject)) {
119 $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
120 }
121 }
122 ksort($newMetaData);
123 $metaData = array();
124 foreach ($newMetaData as $data) {
125 $metaData = array_merge($metaData, $data);
126 }
127 $fileObject->_updateMetaDataProperties($metaData);
128 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
129 $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
130 }
131 }
132
133 /**
134 * Since by now all files in filesystem have been looked at it is save to assume,
135 * that files that are in indexed but not touched in this run are missing
136 */
137 protected function detectMissingFiles() {
138 if (count($this->identifiedFileUids) > 0) {
139 $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList($this->storage, $this->identifiedFileUids);
140
141 foreach ($indexedNotExistentFiles as $record) {
142 if (!$this->storage->hasFile($record['identifier'])) {
143 $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
144 }
145 }
146 }
147 }
148
149 /**
150 * Adds updated files to the processing queue
151 *
152 * @param array $fileIdentifierArray
153 * @return void
154 */
155 protected function detectChangedFilesInStorage(array $fileIdentifierArray) {
156 foreach ($fileIdentifierArray as $fileIdentifier) {
157 // skip processed files
158 if (strpos($fileIdentifier, $this->storage->getProcessingFolder()->getIdentifier()) === 0) {
159 continue;
160 }
161 // Get the modification time for file-identifier from the storage
162 $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, array('mtime'));
163 // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
164 $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
165
166 if ($indexRecord !== FALSE) {
167 $this->identifiedFileUids[] = $indexRecord['uid'];
168
169 if ($indexRecord['modification_date'] < $modificationTime['mtime'] || $indexRecord['missing']) {
170 $this->filesToUpdate[$fileIdentifier] = $indexRecord;
171 }
172 } else {
173 $this->filesToUpdate[$fileIdentifier] = NULL;
174 }
175 }
176 }
177
178 /**
179 * Processes the Files which have been detected as "changed or new"
180 * in the storage
181 *
182 * @return void
183 */
184 protected function processChangedAndNewFiles() {
185 foreach ($this->filesToUpdate AS $identifier => $data) {
186 if ($data == NULL) {
187 $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
188 $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
189 if (count($files) > 0) {
190 foreach ($files as $fileIndexEntry) {
191 if ($fileIndexEntry['missing']) {
192 $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
193 $fileObject->updateProperties(array(
194 'identifier' => $identifier
195 ));
196 $this->updateIndexEntry($fileObject);
197 $this->identifiedFileUids[] = $fileObject->getUid();
198 break;
199 }
200 }
201 } else {
202 // index new file
203 $fileObject = $this->createIndexEntry($identifier);
204 $this->identifiedFileUids[] = $fileObject->getUid();
205 }
206 } else {
207 // update existing file
208 $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
209 $this->updateIndexEntry($fileObject);
210 }
211 }
212 }
213
214 /**
215 * Since the core desperately needs image sizes in metadata table put them there
216 * This should be called after every "content" update and "record" creation
217 *
218 * @param File $fileObject
219 */
220 protected function extractRequiredMetaData(File $fileObject) {
221 // since the core desperately needs image sizes in metadata table do this manually
222 // prevent doing this for remote storages, remote storages must provide the data with extractors
223 if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
224 $rawFileLocation = $fileObject->getForLocalProcessing(FALSE);
225 $metaData = array();
226 list($metaData['width'], $metaData['height']) = getimagesize($rawFileLocation);
227 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
228 $fileObject->_updateMetaDataProperties($metaData);
229 }
230 }
231
232 /****************************
233 *
234 * UTILITY
235 *
236 ****************************/
237
238 /**
239 * Collects the information to be cached in sys_file
240 *
241 * @param string $identifier
242 * @return array
243 */
244 protected function gatherFileInformationArray($identifier) {
245 $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
246 $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
247 $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
248 $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
249 $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
250 $fileInfo['missing'] = 0;
251
252 return $fileInfo;
253 }
254
255 /**
256 * Maps the mimetype to a sys_file table type
257 *
258 * @param string $mimeType
259 * @return string
260 */
261 protected function getFileType($mimeType) {
262 list($fileType) = explode('/', $mimeType);
263 switch (strtolower($fileType)) {
264 case 'text':
265 $type = File::FILETYPE_TEXT;
266 break;
267 case 'image':
268 $type = File::FILETYPE_IMAGE;
269 break;
270 case 'audio':
271 $type = File::FILETYPE_AUDIO;
272 break;
273 case 'video':
274 $type = File::FILETYPE_VIDEO;
275 break;
276 case 'application':
277 case 'software':
278 $type = File::FILETYPE_APPLICATION;
279 break;
280 default:
281 $type = File::FILETYPE_UNKNOWN;
282 }
283 return $type;
284 }
285
286 /**
287 * However it happened, the properties of a file object which
288 * are persisted to the database are named different than the
289 * properties the driver returns in getFileInfo.
290 * Therefore a mapping must happen.
291 *
292 * @param array $fileInfo
293 *
294 * @return array
295 */
296 protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo) {
297 $mappingInfo = array(
298 // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
299 'size' => 'size',
300 'atime' => NULL,
301 'mtime' => 'modification_date',
302 'ctime' => 'creation_date',
303 'mimetype' => 'mime_type'
304 );
305 $mappedFileInfo = array();
306 foreach ($fileInfo as $key => $value) {
307 if (array_key_exists($key, $mappingInfo)) {
308 if ($mappingInfo[$key] !== NULL) {
309 $mappedFileInfo[$mappingInfo[$key]] = $value;
310 }
311 } else {
312 $mappedFileInfo[$key] = $value;
313 }
314 }
315 return $mappedFileInfo;
316 }
317
318
319 /**
320 * Returns an instance of the FileIndexRepository
321 *
322 * @return FileIndexRepository
323 */
324 protected function getFileIndexRepository() {
325 return FileIndexRepository::getInstance();
326 }
327
328 /**
329 * Returns an instance of the FileIndexRepository
330 *
331 * @return MetaDataRepository
332 */
333 protected function getMetaDataRepository() {
334 return MetaDataRepository::getInstance();
335 }
336
337 /**
338 * Returns the ResourceFactory
339 *
340 * @return \TYPO3\CMS\Core\Resource\ResourceFactory
341 */
342 protected function getResourceFactory() {
343 return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
344 }
345
346 /**
347 * Returns an instance of the FileIndexRepository
348 *
349 * @return ExtractorRegistry
350 */
351 protected function getExtractorRegistry() {
352 return ExtractorRegistry::getInstance();
353 }
354 }