898879fee658bae8a4a1c5bf453d7df20b0f786f
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Resource / Index / Indexer.php
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3
4 /***************************************************************
5 * Copyright notice
6 *
7 * (c) 2013 Steffen Ritter <steffen.ritter@typo3.org>
8 * All rights reserved
9 *
10 * This script is part of the TYPO3 project. The TYPO3 project is
11 * free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * The GNU General Public License can be found at
17 * http://www.gnu.org/copyleft/gpl.html.
18 * A copy is found in the text file GPL.txt and important notices to the license
19 * from the author is found in LICENSE.txt distributed with these scripts.
20 *
21 *
22 * This script is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * This copyright notice MUST APPEAR in all copies of the script!
28 ***************************************************************/
29
30 use TYPO3\CMS\Core\Resource\ResourceStorage;
31 use TYPO3\CMS\Core\Resource\File;
32
33 /**
34 * The New FAL Indexer
35 */
36 class Indexer {
37
38 /**
39 * @var array
40 */
41 protected $filesToUpdate = array();
42
43 /**
44 * @var integer[]
45 */
46 protected $identifiedFileUids = array();
47
48 /**
49 * @var ResourceStorage
50 */
51 protected $storage = NULL;
52
53 /**
54 * @param ResourceStorage $storage
55 */
56 public function __construct(ResourceStorage $storage) {
57 $this->storage = $storage;
58 }
59
60 /**
61 * Create index entry
62 *
63 * @param string $identifier
64 * @return File
65 */
66 public function createIndexEntry($identifier) {
67 $fileProperties = $this->gatherFileInformationArray($identifier);
68 $record = $this->getFileIndexRepository()->addRaw($fileProperties);
69 $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
70 $this->extractRequiredMetaData($fileObject);
71 return $fileObject;
72 }
73
74 /**
75 * Update index entry
76 *
77 * @param File $fileObject
78 * @return void
79 */
80 public function updateIndexEntry(File $fileObject) {
81 $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
82 $fileObject->updateProperties($updatedInformation);
83 $this->getFileIndexRepository()->update($fileObject);
84 $this->extractRequiredMetaData($fileObject);
85 }
86
87 /**
88 * @return void
89 */
90 public function processChangesInStorages() {
91 // get all file-identifiers from the storage
92 $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder()->getIdentifier(), TRUE, TRUE);
93 $this->detectChangedFilesInStorage($availableFiles);
94 $this->processChangedAndNewFiles();
95
96 $this->detectMissingFiles();
97 }
98
99 /**
100 * @param integer $maximumFileCount
101 * @return void
102 */
103 public function runMetaDataExtraction($maximumFileCount = -1) {
104 $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
105
106 $extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
107 foreach ($fileIndexRecords as $indexRecord) {
108 $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
109
110 $newMetaData = array(
111 0 => $fileObject->_getMetaData()
112 );
113 foreach ($extractionServices as $service) {
114 if ($service->canProcess($fileObject)) {
115 $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
116 }
117 }
118 ksort($newMetaData);
119 $metaData = array();
120 foreach ($newMetaData as $data) {
121 $metaData = array_merge($metaData, $data);
122 }
123 $fileObject->_updateMetaDataProperties($metaData);
124 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
125 $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
126 }
127 }
128
129 /**
130 * Since by now all files in filesystem have been looked at it is save to assume,
131 * that files that are in indexed but not touched in this run are missing
132 */
133 protected function detectMissingFiles() {
134 if (count($this->identifiedFileUids) > 0) {
135 $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList($this->storage, $this->identifiedFileUids);
136
137 foreach ($indexedNotExistentFiles as $record) {
138 if (!$this->storage->hasFile($record['identifier'])) {
139 $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
140 }
141 }
142 }
143 }
144
145 /**
146 * Adds updated files to the processing queue
147 *
148 * @param array $fileIdentifierArray
149 * @return void
150 */
151 protected function detectChangedFilesInStorage(array $fileIdentifierArray) {
152 foreach ($fileIdentifierArray as $fileIdentifier) {
153 // skip processed files
154 if (strpos($fileIdentifier, $this->storage->getProcessingFolder()->getIdentifier()) === 0) {
155 continue;
156 }
157 // Get the modification time for file-identifier from the storage
158 $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, array('mtime'));
159 // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
160 $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
161
162 if ($indexRecord !== FALSE) {
163 $this->identifiedFileUids[] = $indexRecord['uid'];
164
165 if ($indexRecord['modification_date'] < $modificationTime['mtime'] || $indexRecord['missing']) {
166 $this->filesToUpdate[$fileIdentifier] = $indexRecord;
167 }
168 } else {
169 $this->filesToUpdate[$fileIdentifier] = NULL;
170 }
171 }
172 }
173
174 /**
175 * Processes the Files which have been detected as "changed or new"
176 * in the storage
177 *
178 * @return void
179 */
180 protected function processChangedAndNewFiles() {
181 foreach ($this->filesToUpdate AS $identifier => $data) {
182 if ($data == NULL) {
183 $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
184 $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
185 if (count($files) > 0) {
186 foreach ($files as $fileIndexEntry) {
187 if ($fileIndexEntry['missing']) {
188 $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
189 $fileObject->updateProperties(array(
190 'identifier' => $identifier
191 ));
192 $this->updateIndexEntry($fileObject);
193 $this->identifiedFileUids[] = $fileObject->getUid();
194 break;
195 }
196 }
197 } else {
198 // index new file
199 $fileObject = $this->createIndexEntry($identifier);
200 $this->identifiedFileUids[] = $fileObject->getUid();
201 }
202 } else {
203 // update existing file
204 $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
205 $this->updateIndexEntry($fileObject);
206 }
207 }
208 }
209
210 /**
211 * Since the core desperately needs image sizes in metadata table put them there
212 * This should be called after every "content" update and "record" creation
213 *
214 * @param File $fileObject
215 */
216 protected function extractRequiredMetaData(File $fileObject) {
217 // since the core desperately needs image sizes in metadata table do this manually
218 // prevent doing this for remote storages, remote storages must provide the data with extractors
219 if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
220 $rawFileLocation = $fileObject->getForLocalProcessing(FALSE);
221 $metaData = array();
222 list($metaData['width'], $metaData['height']) = getimagesize($rawFileLocation);
223 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
224 $fileObject->_updateMetaDataProperties($metaData);
225 }
226 }
227
228 /****************************
229 *
230 * UTILITY
231 *
232 ****************************/
233
234 /**
235 * Collects the information to be cached in sys_file
236 *
237 * @param string $identifier
238 * @return array
239 */
240 protected function gatherFileInformationArray($identifier) {
241 $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
242 $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
243 $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
244 $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
245 $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
246 $fileInfo['missing'] = 0;
247
248 return $fileInfo;
249 }
250
251 /**
252 * Maps the mimetype to a sys_file table type
253 *
254 * @param string $mimeType
255 * @return string
256 */
257 protected function getFileType($mimeType) {
258 list($fileType) = explode('/', $mimeType);
259 switch (strtolower($fileType)) {
260 case 'text':
261 $type = File::FILETYPE_TEXT;
262 break;
263 case 'image':
264 $type = File::FILETYPE_IMAGE;
265 break;
266 case 'audio':
267 $type = File::FILETYPE_AUDIO;
268 break;
269 case 'video':
270 $type = File::FILETYPE_VIDEO;
271 break;
272 case 'application':
273 case 'software':
274 $type = File::FILETYPE_APPLICATION;
275 break;
276 default:
277 $type = File::FILETYPE_UNKNOWN;
278 }
279 return $type;
280 }
281
282 /**
283 * However it happened, the properties of a file object which
284 * are persisted to the database are named different than the
285 * properties the driver returns in getFileInfo.
286 * Therefore a mapping must happen.
287 *
288 * @param array $fileInfo
289 *
290 * @return array
291 */
292 protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo) {
293 $mappingInfo = array(
294 // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
295 'size' => 'size',
296 'atime' => NULL,
297 'mtime' => 'modification_date',
298 'ctime' => 'creation_date',
299 'mimetype' => 'mime_type'
300 );
301 $mappedFileInfo = array();
302 foreach ($fileInfo as $key => $value) {
303 if (array_key_exists($key, $mappingInfo)) {
304 if ($mappingInfo[$key] !== NULL) {
305 $mappedFileInfo[$mappingInfo[$key]] = $value;
306 }
307 } else {
308 $mappedFileInfo[$key] = $value;
309 }
310 }
311 return $mappedFileInfo;
312 }
313
314
315 /**
316 * Returns an instance of the FileIndexRepository
317 *
318 * @return FileIndexRepository
319 */
320 protected function getFileIndexRepository() {
321 return FileIndexRepository::getInstance();
322 }
323
324 /**
325 * Returns an instance of the FileIndexRepository
326 *
327 * @return MetaDataRepository
328 */
329 protected function getMetaDataRepository() {
330 return MetaDataRepository::getInstance();
331 }
332
333 /**
334 * Returns the ResourceFactory
335 *
336 * @return \TYPO3\CMS\Core\Resource\ResourceFactory
337 */
338 protected function getResourceFactory() {
339 return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
340 }
341
342 /**
343 * Returns an instance of the FileIndexRepository
344 *
345 * @return ExtractorRegistry
346 */
347 protected function getExtractorRegistry() {
348 return ExtractorRegistry::getInstance();
349 }
350 }