fb7a937dcc26c0ed1b7d4309508bf2d5d1eed02a
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Resource / Index / Indexer.php
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3
4 /**
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Core\Resource\ResourceStorage;
18 use TYPO3\CMS\Core\Resource\File;
19
20 /**
21 * The New FAL Indexer
22 */
23 class Indexer {
24
25 /**
26 * @var array
27 */
28 protected $filesToUpdate = array();
29
30 /**
31 * @var integer[]
32 */
33 protected $identifiedFileUids = array();
34
35 /**
36 * @var ResourceStorage
37 */
38 protected $storage = NULL;
39
40 /**
41 * @param ResourceStorage $storage
42 */
43 public function __construct(ResourceStorage $storage) {
44 $this->storage = $storage;
45 }
46
47 /**
48 * Create index entry
49 *
50 * @param string $identifier
51 * @return File
52 * @throws \InvalidArgumentException
53 */
54 public function createIndexEntry($identifier) {
55 if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
56 throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
57 }
58 $fileProperties = $this->gatherFileInformationArray($identifier);
59 $record = $this->getFileIndexRepository()->addRaw($fileProperties);
60 $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
61 $this->extractRequiredMetaData($fileObject);
62 return $fileObject;
63 }
64
65 /**
66 * Update index entry
67 *
68 * @param File $fileObject
69 * @return void
70 */
71 public function updateIndexEntry(File $fileObject) {
72 $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
73 $fileObject->updateProperties($updatedInformation);
74 $this->getFileIndexRepository()->update($fileObject);
75 $this->extractRequiredMetaData($fileObject);
76 }
77
78 /**
79 * @return void
80 */
81 public function processChangesInStorages() {
82 // get all file-identifiers from the storage
83 $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder()->getIdentifier(), TRUE, TRUE);
84 $this->detectChangedFilesInStorage($availableFiles);
85 $this->processChangedAndNewFiles();
86
87 $this->detectMissingFiles();
88 }
89
90 /**
91 * @param integer $maximumFileCount
92 * @return void
93 */
94 public function runMetaDataExtraction($maximumFileCount = -1) {
95 $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
96
97 $extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
98 foreach ($fileIndexRecords as $indexRecord) {
99 $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
100
101 $newMetaData = array(
102 0 => $fileObject->_getMetaData()
103 );
104 foreach ($extractionServices as $service) {
105 if ($service->canProcess($fileObject)) {
106 $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
107 }
108 }
109 ksort($newMetaData);
110 $metaData = array();
111 foreach ($newMetaData as $data) {
112 $metaData = array_merge($metaData, $data);
113 }
114 $fileObject->_updateMetaDataProperties($metaData);
115 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
116 $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
117 }
118 }
119
120 /**
121 * Since by now all files in filesystem have been looked at it is save to assume,
122 * that files that are in indexed but not touched in this run are missing
123 */
124 protected function detectMissingFiles() {
125 if (count($this->identifiedFileUids) > 0) {
126 $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList($this->storage, $this->identifiedFileUids);
127
128 foreach ($indexedNotExistentFiles as $record) {
129 if (!$this->storage->hasFile($record['identifier'])) {
130 $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
131 }
132 }
133 }
134 }
135
136 /**
137 * Adds updated files to the processing queue
138 *
139 * @param array $fileIdentifierArray
140 * @return void
141 */
142 protected function detectChangedFilesInStorage(array $fileIdentifierArray) {
143 foreach ($fileIdentifierArray as $fileIdentifier) {
144 // skip processed files
145 if (strpos($fileIdentifier, $this->storage->getProcessingFolder()->getIdentifier()) === 0) {
146 continue;
147 }
148 // Get the modification time for file-identifier from the storage
149 $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, array('mtime'));
150 // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
151 $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
152
153 if ($indexRecord !== FALSE) {
154 $this->identifiedFileUids[] = $indexRecord['uid'];
155
156 if ($indexRecord['modification_date'] < $modificationTime['mtime'] || $indexRecord['missing']) {
157 $this->filesToUpdate[$fileIdentifier] = $indexRecord;
158 }
159 } else {
160 $this->filesToUpdate[$fileIdentifier] = NULL;
161 }
162 }
163 }
164
165 /**
166 * Processes the Files which have been detected as "changed or new"
167 * in the storage
168 *
169 * @return void
170 */
171 protected function processChangedAndNewFiles() {
172 foreach ($this->filesToUpdate AS $identifier => $data) {
173 if ($data == NULL) {
174 $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
175 $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
176 if (count($files) > 0) {
177 foreach ($files as $fileIndexEntry) {
178 if ($fileIndexEntry['missing']) {
179 $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
180 $fileObject->updateProperties(array(
181 'identifier' => $identifier
182 ));
183 $this->updateIndexEntry($fileObject);
184 $this->identifiedFileUids[] = $fileObject->getUid();
185 break;
186 }
187 }
188 } else {
189 // index new file
190 $fileObject = $this->createIndexEntry($identifier);
191 $this->identifiedFileUids[] = $fileObject->getUid();
192 }
193 } else {
194 // update existing file
195 $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
196 $this->updateIndexEntry($fileObject);
197 }
198 }
199 }
200
201 /**
202 * Since the core desperately needs image sizes in metadata table put them there
203 * This should be called after every "content" update and "record" creation
204 *
205 * @param File $fileObject
206 */
207 protected function extractRequiredMetaData(File $fileObject) {
208 // since the core desperately needs image sizes in metadata table do this manually
209 // prevent doing this for remote storages, remote storages must provide the data with extractors
210 if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
211 $rawFileLocation = $fileObject->getForLocalProcessing(FALSE);
212 $metaData = array();
213 list($metaData['width'], $metaData['height']) = getimagesize($rawFileLocation);
214 $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
215 $fileObject->_updateMetaDataProperties($metaData);
216 }
217 }
218
219 /****************************
220 *
221 * UTILITY
222 *
223 ****************************/
224
225 /**
226 * Collects the information to be cached in sys_file
227 *
228 * @param string $identifier
229 * @return array
230 */
231 protected function gatherFileInformationArray($identifier) {
232 $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
233 $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
234 $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
235 $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
236 $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
237 $fileInfo['missing'] = 0;
238
239 return $fileInfo;
240 }
241
242 /**
243 * Maps the mimetype to a sys_file table type
244 *
245 * @param string $mimeType
246 * @return string
247 */
248 protected function getFileType($mimeType) {
249 list($fileType) = explode('/', $mimeType);
250 switch (strtolower($fileType)) {
251 case 'text':
252 $type = File::FILETYPE_TEXT;
253 break;
254 case 'image':
255 $type = File::FILETYPE_IMAGE;
256 break;
257 case 'audio':
258 $type = File::FILETYPE_AUDIO;
259 break;
260 case 'video':
261 $type = File::FILETYPE_VIDEO;
262 break;
263 case 'application':
264 case 'software':
265 $type = File::FILETYPE_APPLICATION;
266 break;
267 default:
268 $type = File::FILETYPE_UNKNOWN;
269 }
270 return $type;
271 }
272
273 /**
274 * However it happened, the properties of a file object which
275 * are persisted to the database are named different than the
276 * properties the driver returns in getFileInfo.
277 * Therefore a mapping must happen.
278 *
279 * @param array $fileInfo
280 *
281 * @return array
282 */
283 protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo) {
284 $mappingInfo = array(
285 // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
286 'size' => 'size',
287 'atime' => NULL,
288 'mtime' => 'modification_date',
289 'ctime' => 'creation_date',
290 'mimetype' => 'mime_type'
291 );
292 $mappedFileInfo = array();
293 foreach ($fileInfo as $key => $value) {
294 if (array_key_exists($key, $mappingInfo)) {
295 if ($mappingInfo[$key] !== NULL) {
296 $mappedFileInfo[$mappingInfo[$key]] = $value;
297 }
298 } else {
299 $mappedFileInfo[$key] = $value;
300 }
301 }
302 return $mappedFileInfo;
303 }
304
305
306 /**
307 * Returns an instance of the FileIndexRepository
308 *
309 * @return FileIndexRepository
310 */
311 protected function getFileIndexRepository() {
312 return FileIndexRepository::getInstance();
313 }
314
315 /**
316 * Returns an instance of the FileIndexRepository
317 *
318 * @return MetaDataRepository
319 */
320 protected function getMetaDataRepository() {
321 return MetaDataRepository::getInstance();
322 }
323
324 /**
325 * Returns the ResourceFactory
326 *
327 * @return \TYPO3\CMS\Core\Resource\ResourceFactory
328 */
329 protected function getResourceFactory() {
330 return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
331 }
332
333 /**
334 * Returns an instance of the FileIndexRepository
335 *
336 * @return ExtractorRegistry
337 */
338 protected function getExtractorRegistry() {
339 return ExtractorRegistry::getInstance();
340 }
341 }