[!!!][TASK] Streamline directory structure of ext:indexed_search
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Domain / Repository / AdministrationRepository.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Domain\Repository;
3
4 /**
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\FrontendBackendUserAuthentication;
18 use TYPO3\CMS\Backend\Tree\View\PageTreeView;
19 use TYPO3\CMS\Backend\Utility\IconUtility;
20 use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
21 use TYPO3\CMS\Core\Cache\CacheManager;
22 use TYPO3\CMS\Core\Cache\Frontend\FrontendInterface;
23 use TYPO3\CMS\Core\DataHandling\DataHandler;
24 use TYPO3\CMS\Core\Utility\GeneralUtility;
25 use TYPO3\CMS\Backend\Utility\BackendUtility;
26 use TYPO3\CMS\Dbal\Database\DatabaseConnection;
27 use TYPO3\CMS\IndexedSearch\FileContentParser;
28
29 /**
30 * Administration repository
31 */
32 class AdministrationRepository {
33
34 /**
35 * List of fileContentParsers
36 *
37 * @var FileContentParser[]
38 */
39 public $external_parsers = array();
40
41 /**
42 * @var array
43 */
44 protected $allPhashListed = array();
45
46 /**
47 * @var array
48 */
49 protected $iconFileNameCache = array();
50
51 /**
52 * Get group list information
53 *
54 * @param int $phash
55 * @return array
56 */
57 public function getGrlistRecord($phash) {
58 $db = $this->getDatabaseConnection();
59 $res = $db->exec_SELECTquery('index_grlist.*', 'index_grlist', 'phash=' . (int)$phash);
60 $allRows = array();
61 $numberOfRows = $db->sql_num_rows($res);
62 while ($row = $db->sql_fetch_assoc($res)) {
63 $row['pcount'] = $numberOfRows;
64 $allRows[] = $row;
65 }
66 $db->sql_free_result($res);
67 return $allRows;
68 }
69
70 /**
71 * Get number of fulltext records
72 *
73 * @param int $phash
74 * @return int|bool
75 */
76 public function getNumberOfFulltextRecords($phash) {
77 return $this->getDatabaseConnection()->exec_SELECTcountRows('phash', 'index_fulltext', 'phash=' . (int)$phash);
78 }
79
80 /**
81 * Get number of words
82 *
83 * @param int $phash
84 * @return int|bool
85 */
86 public function getNumberOfWords($phash) {
87 return $this->getDatabaseConnection()->exec_SELECTcountRows('*', 'index_rel', 'phash=' . (int)$phash);
88 }
89
90 /**
91 * Get statistic of external documents
92 *
93 * @return array
94 */
95 public function getExternalDocumentsStatistic() {
96 $result = array();
97
98 $db = $this->getDatabaseConnection();
99 $res = $db->exec_SELECTquery(
100 'count(*) AS pcount,index_phash.*',
101 'index_phash',
102 'item_type<>\'0\'',
103 'phash_grouping,phash,cHashParams,data_filename,data_page_id,data_page_reg1,data_page_type,data_page_mp,gr_list,item_type,item_title,item_description,item_mtime,tstamp,item_size,contentHash,crdate,parsetime,sys_language_uid,item_crdate,externalUrl,recordUid,freeIndexUid,freeIndexSetId',
104 'item_type'
105 );
106 while ($row = $db->sql_fetch_assoc($res)) {
107 $this->addAdditionalInformation($row);
108
109 $result[] = $row;
110
111 if ($row['pcount'] > 1) {
112 $res2 = $db->exec_SELECTquery(
113 'index_phash.*',
114 'index_phash',
115 'phash_grouping=' . (int)$row['phash_grouping'] . ' AND phash<>' . (int)$row['phash']
116 );
117 while ($row2 = $db->sql_fetch_assoc($res2)) {
118 $this->addAdditionalInformation($row2);
119 $result[] = $row2;
120 }
121 $db->sql_free_result($res2);
122 }
123 }
124 $db->sql_free_result($res);
125
126 return $result;
127 }
128
129 /**
130 * Get count of the tables used for indexed_search
131 *
132 * @return array
133 */
134 public function getRecordsNumbers() {
135 $tables = array(
136 'index_phash',
137 'index_words',
138 'index_rel',
139 'index_grlist',
140 'index_section',
141 'index_fulltext',
142 );
143 $recordList = array();
144 foreach ($tables as $tableName) {
145 $recordList[$tableName] = $this->getDatabaseConnection()->exec_SELECTcountRows('*', $tableName);
146 }
147 return $recordList;
148 }
149
150 /**
151 * Get hash types
152 *
153 * @return array
154 */
155 public function getPageHashTypes() {
156 $counts = array();
157 $types = array(
158 'html' => 1,
159 'htm' => 1,
160 'pdf' => 2,
161 'doc' => 3,
162 'txt' => 4
163 );
164 $revTypes = array_flip($types);
165 $revTypes[0] = 'TYPO3 page';
166 $db = $this->getDatabaseConnection();
167 $res = $db->exec_SELECTquery('count(*),item_type', 'index_phash', '', 'item_type', 'item_type');
168 while ($row = $db->sql_fetch_row($res)) {
169 $itemType = $row[1];
170 $counts[] = array(
171 'count' => $row[0],
172 'name' => $revTypes[$itemType],
173 'type' => $itemType,
174 'uniqueCount' => $this->countUniqueTypes($itemType),
175 );
176 }
177 $db->sql_free_result($res);
178
179 return $counts;
180 }
181
182 /**
183 * Count unique types
184 *
185 * @param string $itemType
186 * @return int
187 */
188 protected function countUniqueTypes($itemType) {
189 $db = $this->getDatabaseConnection();
190 $res = $db->exec_SELECTquery(
191 'count(*)',
192 'index_phash',
193 'item_type=' . $db->fullQuoteStr($itemType, 'index_phash'),
194 'phash_grouping'
195 );
196 $items = array();
197 while ($row = $db->sql_fetch_row($res)) {
198 $items[] = $row;
199 }
200 $db->sql_free_result($res);
201
202 return count($items);
203 }
204
205 /**
206 * Get number of section records
207 *
208 * @param int $pageHash
209 * @return int
210 */
211 public function getNumberOfSections($pageHash) {
212 return $this->getDatabaseConnection()->exec_SELECTcountRows('phash', 'index_section', 'phash=' . (int)$pageHash);
213 }
214
215 /**
216 * Get page statistic
217 *
218 * @return array
219 */
220 public function getPageStatistic() {
221 $result = array();
222 $db = $this->getDatabaseConnection();
223 $res = $db->exec_SELECTquery(
224 'count(*) AS pcount,index_phash.*',
225 'index_phash',
226 'data_page_id<>0',
227 'phash_grouping,phash,cHashParams,data_filename,data_page_id,data_page_reg1,data_page_type,data_page_mp,gr_list,item_type,item_title,item_description,item_mtime,tstamp,item_size,contentHash,crdate,parsetime,sys_language_uid,item_crdate,externalUrl,recordUid,freeIndexUid,freeIndexSetId',
228 'data_page_id'
229 );
230 while ($row = $db->sql_fetch_assoc($res)) {
231 $this->addAdditionalInformation($row);
232 $result[] = $row;
233
234 if ($row['pcount'] > 1) {
235 $res2 = $db->exec_SELECTquery(
236 'index_phash.*',
237 'index_phash',
238 'phash_grouping=' . (int)$row['phash_grouping'] . ' AND phash<>' . (int)$row['phash']
239 );
240 while ($row2 = $db->sql_fetch_assoc($res2)) {
241 $this->addAdditionalInformation($row2);
242 $result[] = $row2;
243 }
244 $db->sql_free_result($res2);
245 }
246 }
247 $db->sql_free_result($res);
248
249 return $result;
250 }
251
252 /**
253 * Get general statistic
254 *
255 * @param string $additionalWhere
256 * @param int $pageUid
257 * @param int $max
258 * @return array|NULL
259 */
260 public function getGeneralSearchStatistic($additionalWhere, $pageUid, $max = 50) {
261 $queryParts = array(
262 'SELECT' => 'word, COUNT(*) AS c',
263 'FROM' => 'index_stat_word',
264 'WHERE' => sprintf('pageid= %d ' . $additionalWhere, $pageUid),
265 'GROUPBY' => 'word',
266 'ORDERBY' => '',
267 'LIMIT' => (int)$max
268 );
269 $db = $this->getDatabaseConnection();
270 $res = $db->exec_SELECTquery(
271 $queryParts['SELECT'],
272 $queryParts['FROM'],
273 $queryParts['WHERE'],
274 $queryParts['GROUPBY'],
275 $queryParts['ORDERBY'],
276 $queryParts['LIMIT']
277 );
278
279 $count = 0;
280 if ($res) {
281 $count = $db->sql_num_rows($res);
282 }
283
284 $db->sql_free_result($res);
285
286 // exist several statistics for this page?
287 if ($count == 0) {
288 // Limit access to pages of the current site
289 $secureAddWhere = ' AND pageid IN (' . $this->extGetTreeList((int)$pageUid, 100, 0, '1=1') . ') ';
290 $queryParts['WHERE'] = '1=1 ' . $additionalWhere . $secureAddWhere;
291 }
292
293 return $db->exec_SELECTgetRows(
294 $queryParts['SELECT'],
295 $queryParts['FROM'],
296 $queryParts['WHERE'],
297 $queryParts['GROUPBY'],
298 $queryParts['ORDERBY'],
299 $queryParts['LIMIT']
300 );
301 }
302
303 /**
304 * Add additional information to the result row
305 *
306 * @param array $row
307 * @return void
308 */
309 protected function addAdditionalInformation(array &$row) {
310 $grListRec = $this->getGrlistRecord($row['phash']);
311 $unserializedCHashParams = unserialize($row['cHashParams']);
312
313 $row['numberOfWords'] = $this->getNumberOfWords($row['phash']);
314 $row['numberOfSections'] = $this->getNumberOfSections($row['phash']);
315 $row['numberOfFulltext'] = $this->getNumberOfFulltextRecords($row['phash']);
316 $row['cHashParams'] = !empty($unserializedCHashParams) ? $unserializedCHashParams : '';
317 $row['grList'] = $grListRec;
318 }
319
320 /**
321 * Get the page tree by using \TYPO3\CMS\Backend\Tree\View\PageTreeView
322 *
323 * @param int $pageId
324 * @param int $depth
325 * @param string $mode
326 * @return array
327 */
328 public function getTree($pageId, $depth = 4, $mode) {
329 $allLines = array();
330 $pageRecord = BackendUtility::getRecord('pages', (int)$pageId);
331 if (!$pageRecord) {
332 return $allLines;
333 }
334 /** @var PageTreeView $tree */
335 $tree = GeneralUtility::makeInstance(PageTreeView::class);
336 $perms_clause = $this->getBackendUserAuthentication()->getPagePermsClause(1);
337 $tree->init('AND ' . $perms_clause);
338 $HTML = IconUtility::getSpriteIconForRecord('pages', $pageRecord, array('title' => $pageRecord['title']));
339 $tree->tree[] = array(
340 'row' => $pageRecord,
341 'HTML' => $HTML
342 );
343
344 if ($depth > 0) {
345 $tree->getTree((int)$pageId, $depth, '');
346 }
347 $db = $this->getDatabaseConnection();
348 foreach ($tree->tree as $singleLine) {
349 $res = $db->exec_SELECTquery(
350 'ISEC.phash_t3, ISEC.rl0, ISEC.rl1, ISEC.rl2, ISEC.page_id, ISEC.uniqid, ' .
351 'IP.phash, IP.phash_grouping, IP.cHashParams, IP.data_filename, IP.data_page_id, ' .
352 'IP.data_page_reg1, IP.data_page_type, IP.data_page_mp, IP.gr_list, IP.item_type, ' .
353 'IP.item_title, IP.item_description, IP.item_mtime, IP.tstamp, IP.item_size, ' .
354 'IP.contentHash, IP.crdate, IP.parsetime, IP.sys_language_uid, IP.item_crdate, ' .
355 'IP.externalUrl, IP.recordUid, IP.freeIndexUid, IP.freeIndexSetId, count(*) AS count_val',
356 'index_phash IP, index_section ISEC',
357 'IP.phash = ISEC.phash AND ISEC.page_id = ' . (int)$singleLine['row']['uid'],
358 'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId',
359 'IP.item_type, IP.tstamp',
360 10 + 1
361 );
362 $lines = array();
363 // Collecting phash values (to remove local indexing for)
364 // Traverse the result set of phash rows selected:
365 while ($row = $db->sql_fetch_assoc($res)) {
366 $this->allPhashListed[] = $row['phash'];
367 // Adds a display row:
368 $row['icon'] = $this->makeItemTypeIcon($row['item_type']);
369 $row['wordCount'] = count($db->exec_SELECTgetRows(
370 'index_words.baseword, index_rel.*',
371 'index_rel, index_words',
372 'index_rel.phash = ' . (int)$row['phash'] . ' AND index_words.wid = index_rel.wid',
373 '',
374 '',
375 '',
376 'baseword'
377 ));
378
379 if ($mode === 'content') {
380 $row['fulltextData'] = $db->exec_SELECTgetSingleRow(
381 '*',
382 'index_fulltext',
383 'phash = ' . $row['phash']);
384 $wordRecords = $db->exec_SELECTgetRows(
385 'index_words.baseword, index_rel.*',
386 'index_rel, index_words',
387 'index_rel.phash = ' . (int)$row['phash'] . ' AND index_words.wid = index_rel.wid',
388 '', '', '', 'baseword');
389 if (is_array($wordRecords)) {
390 $indexed_words = array_keys($wordRecords);
391 sort($indexed_words);
392 $row['allWords'] = $indexed_words;
393 }
394 }
395
396 $lines[] = $row;
397 }
398
399 $singleLine['lines'] = $lines;
400 $allLines[] = $singleLine;
401 }
402
403 return $allLines;
404 }
405
406 /**
407 * Generates a list of Page-uid's from $id.
408 * The only pages excluded from the list are deleted pages.
409 *
410 * @param int $id page id
411 * @param int $depth to traverse down the page tree.
412 * @param int $begin is an optional integer that determines at which level in the tree to start collecting uid's. Zero means 'start right away', 1 = 'next level and out'
413 * @param string $perms_clause
414 * @return string Returns the list with a comma in the end + id itself
415 */
416 protected function extGetTreeList($id, $depth, $begin = 0, $perms_clause) {
417 $list = GeneralUtility::makeInstance(FrontendBackendUserAuthentication::class)->extGetTreeList($id, $depth, $begin, $perms_clause);
418
419 if (empty($list)) {
420 $list = $id;
421 } else {
422 $list = rtrim($list, ',') . ',' . $id;
423 }
424
425 return $list;
426 }
427
428 /**
429 * Remove indexed phash row
430 *
431 * @param string $phashList
432 * @param int $pageId
433 * @param int $depth
434 * @return void
435 */
436 public function removeIndexedPhashRow($phashList, $pageId, $depth = 4) {
437 if ($phashList === 'ALL') {
438 $this->getTree($pageId, $depth, '');
439 $phashRows = $this->allPhashListed;
440 $this->allPhashListed = array();
441 } else {
442 $phashRows = GeneralUtility::trimExplode(',', $phashList, TRUE);
443 }
444
445 $db = $this->getDatabaseConnection();
446 foreach ($phashRows as $phash) {
447 $phash = (int)$phash;
448 if ($phash > 0) {
449 $idList = array();
450 $res = $db->exec_SELECTquery('page_id', 'index_section', 'phash=' . $phash);
451 while ($row = $db->sql_fetch_assoc($res)) {
452 $idList[] = (int)$row['page_id'];
453 }
454 $db->sql_free_result($res);
455
456 if (!empty($idList)) {
457 /** @var FrontendInterface $pageCache */
458 $pageCache = GeneralUtility::makeInstance(CacheManager::class)->getCache('cache_pages');
459 foreach ($idList as $pageId) {
460 $pageCache->flushByTag('pageId_' . $pageId);
461 }
462 }
463
464 // Removing old registrations for all tables.
465 $tableArr = array('index_phash', 'index_rel', 'index_section', 'index_grlist', 'index_fulltext', 'index_debug');
466 foreach ($tableArr as $table) {
467 $db->exec_DELETEquery($table, 'phash=' . $phash);
468 }
469 }
470 }
471 }
472
473 /**
474 * Save stop words
475 *
476 * @param array $words stop words
477 * @return void
478 */
479 public function saveStopWords(array $words) {
480 foreach ($words as $wid => $state) {
481 $fieldArray = array(
482 'is_stopword' => (int)$state
483 );
484 $this->getDatabaseConnection()->exec_UPDATEquery('index_words', 'wid=' . (int)$wid, $fieldArray);
485 }
486 }
487
488 /**
489 * Save keywords
490 *
491 * @param array $words keywords
492 * @param int $pageId page id
493 * @return void
494 */
495 public function saveKeywords(array $words, $pageId) {
496 // Get pages current keywords
497 $pageRec = BackendUtility::getRecord('pages', $pageId);
498 if (!is_array($pageRec)) {
499 return;
500 }
501 $keywords = array_flip(GeneralUtility::trimExplode(',', $pageRec['keywords'], TRUE));
502 // Merge keywords:
503 foreach ($words as $key => $v) {
504 if ($v) {
505 $keywords[$key] = 1;
506 } else {
507 unset($keywords[$key]);
508 }
509 }
510 // Compile new list:
511 $data = array();
512 $data['pages'][$pageId]['keywords'] = implode(', ', array_keys($keywords));
513 $dataHandler = GeneralUtility::makeInstance(DataHandler::class);
514 $dataHandler->stripslashes_values = 0;
515 $dataHandler->start($data, array());
516 $dataHandler->process_datamap();
517 }
518
519 /**
520 * Collect the type icons
521 *
522 * @param string $itemType
523 * @return string
524 */
525 protected function makeItemTypeIcon($itemType) {
526 if (!isset($this->iconFileNameCache[$itemType])) {
527 $icon = '';
528 if ($itemType === '0') {
529 $icon = 'EXT:indexed_search/Resources/Public/Icons/FileTypes/pages.gif';
530 } elseif ($this->external_parsers[$itemType]) {
531 $icon = $this->external_parsers[$itemType]->getIcon($itemType);
532 }
533 $this->iconFileNameCache[$itemType] = $icon;
534 }
535 return $this->iconFileNameCache[$itemType];
536 }
537
538 /**
539 * @return DatabaseConnection
540 */
541 protected function getDatabaseConnection() {
542 return $GLOBALS['TYPO3_DB'];
543 }
544
545 /**
546 * @return BackendUserAuthentication
547 */
548 protected function getBackendUserAuthentication() {
549 return $GLOBALS['BE_USER'];
550 }
551
552 }