[TASK] Doctrine: Migrate indexed_search part 1
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Domain / Repository / AdministrationRepository.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Domain\Repository;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16 use TYPO3\CMS\Backend\FrontendBackendUserAuthentication;
17 use TYPO3\CMS\Backend\Tree\View\PageTreeView;
18 use TYPO3\CMS\Backend\Utility\BackendUtility;
19 use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
20 use TYPO3\CMS\Core\Cache\CacheManager;
21 use TYPO3\CMS\Core\Cache\Frontend\FrontendInterface;
22 use TYPO3\CMS\Core\Database\ConnectionPool;
23 use TYPO3\CMS\Core\DataHandling\DataHandler;
24 use TYPO3\CMS\Core\Imaging\Icon;
25 use TYPO3\CMS\Core\Imaging\IconFactory;
26 use TYPO3\CMS\Core\Utility\GeneralUtility;
27 use TYPO3\CMS\Dbal\Database\DatabaseConnection;
28 use TYPO3\CMS\IndexedSearch\FileContentParser;
29
30 /**
31 * Administration repository
32 */
33 class AdministrationRepository
34 {
35 /**
36 * List of fileContentParsers
37 *
38 * @var FileContentParser[]
39 */
40 public $external_parsers = array();
41
42 /**
43 * @var array
44 */
45 protected $allPhashListed = array();
46
47 /**
48 * @var array
49 */
50 protected $iconFileNameCache = array();
51
52 /**
53 * Get group list information
54 *
55 * @param int $phash
56 * @return array
57 */
58 public function getGrlistRecord($phash)
59 {
60 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_grlist');
61 $res = $queryBuilder
62 ->select('*')
63 ->from('index_grlist')
64 ->where(
65 $queryBuilder->expr()->eq('phash', (int)$phash)
66 )
67 ->execute();
68 $numberOfRows = $res->rowCount();
69 $allRows = [];
70 while ($row = $res->fetch()) {
71 $row['pcount'] = $numberOfRows;
72 $allRows[] = $row;
73 }
74 return $allRows;
75 }
76
77 /**
78 * Get number of fulltext records
79 *
80 * @param int $phash
81 * @return int|bool
82 */
83 public function getNumberOfFulltextRecords($phash)
84 {
85 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_fulltext');
86 return $queryBuilder
87 ->count('phash')
88 ->from('index_fulltext')
89 ->where($queryBuilder->expr()->eq('phash', (int)$phash))
90 ->execute()
91 ->fetchColumn(0);
92 }
93
94 /**
95 * Get number of words
96 *
97 * @param int $phash
98 * @return int|bool
99 */
100 public function getNumberOfWords($phash)
101 {
102 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_rel');
103 return $queryBuilder
104 ->count('*')
105 ->from('index_rel')
106 ->where($queryBuilder->expr()->eq('phash', (int)$phash))
107 ->execute()
108 ->fetchColumn(0);
109 }
110
111 /**
112 * Get statistic of external documents
113 *
114 * @return array
115 */
116 public function getExternalDocumentsStatistic()
117 {
118 $result = array();
119
120 $db = $this->getDatabaseConnection();
121 $res = $db->exec_SELECTquery(
122 'count(*) AS pcount,index_phash.*',
123 'index_phash',
124 'item_type<>\'0\'',
125 'phash_grouping,phash,cHashParams,data_filename,data_page_id,data_page_reg1,data_page_type,data_page_mp,gr_list,item_type,item_title,item_description,item_mtime,tstamp,item_size,contentHash,crdate,parsetime,sys_language_uid,item_crdate,externalUrl,recordUid,freeIndexUid,freeIndexSetId',
126 'item_type'
127 );
128 while ($row = $db->sql_fetch_assoc($res)) {
129 $this->addAdditionalInformation($row);
130
131 $result[] = $row;
132
133 if ($row['pcount'] > 1) {
134 $res2 = $db->exec_SELECTquery(
135 'index_phash.*',
136 'index_phash',
137 'phash_grouping=' . (int)$row['phash_grouping'] . ' AND phash<>' . (int)$row['phash']
138 );
139 while ($row2 = $db->sql_fetch_assoc($res2)) {
140 $this->addAdditionalInformation($row2);
141 $result[] = $row2;
142 }
143 $db->sql_free_result($res2);
144 }
145 }
146 $db->sql_free_result($res);
147
148 return $result;
149 }
150
151 /**
152 * Get count of the tables used for indexed_search
153 *
154 * @return array
155 */
156 public function getRecordsNumbers()
157 {
158 $tables = array(
159 'index_phash',
160 'index_words',
161 'index_rel',
162 'index_grlist',
163 'index_section',
164 'index_fulltext',
165 );
166 $recordList = array();
167 foreach ($tables as $tableName) {
168 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($tableName);
169 $recordList[$tableName] = $queryBuilder
170 ->count('*')
171 ->from($tableName)
172 ->execute()
173 ->fetchColumn(0);
174 }
175 return $recordList;
176 }
177
178 /**
179 * Get hash types
180 *
181 * @return array
182 */
183 public function getPageHashTypes()
184 {
185 $counts = array();
186 $types = array(
187 'html' => 1,
188 'htm' => 1,
189 'pdf' => 2,
190 'doc' => 3,
191 'txt' => 4
192 );
193 $revTypes = array_flip($types);
194 $revTypes[0] = 'TYPO3 page';
195 $db = $this->getDatabaseConnection();
196 $res = $db->exec_SELECTquery('count(*),item_type', 'index_phash', '', 'item_type', 'item_type');
197 while ($row = $db->sql_fetch_row($res)) {
198 $itemType = $row[1];
199 $counts[] = array(
200 'count' => $row[0],
201 'name' => $revTypes[$itemType],
202 'type' => $itemType,
203 'uniqueCount' => $this->countUniqueTypes($itemType),
204 );
205 }
206 $db->sql_free_result($res);
207
208 return $counts;
209 }
210
211 /**
212 * Count unique types
213 *
214 * @param string $itemType
215 * @return int
216 */
217 protected function countUniqueTypes($itemType)
218 {
219 $items = array();
220 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_phash');
221 $res = $queryBuilder
222 ->count('*')
223 ->from('index_phash')
224 ->where($queryBuilder->expr()->eq('item_type', $queryBuilder->createNamedParameter($itemType)))
225 ->groupBy('phash_grouping')
226 ->execute();
227 while ($row = $res->fetch()) {
228 $items[] = $row;
229 }
230 return count($items);
231 }
232
233 /**
234 * Get number of section records
235 *
236 * @param int $pageHash
237 * @return int
238 */
239 public function getNumberOfSections($pageHash)
240 {
241 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_section');
242 return $queryBuilder
243 ->count('phash')
244 ->from('index_section')
245 ->where($queryBuilder->expr()->eq('phash', (int)$pageHash))
246 ->execute()
247 ->fetchColumn(0);
248 }
249
250 /**
251 * Get page statistic
252 *
253 * @return array
254 */
255 public function getPageStatistic()
256 {
257 $result = array();
258 $db = $this->getDatabaseConnection();
259 $res = $db->exec_SELECTquery(
260 'count(*) AS pcount,index_phash.*',
261 'index_phash',
262 'data_page_id<>0',
263 'phash_grouping,phash,cHashParams,data_filename,data_page_id,data_page_reg1,data_page_type,data_page_mp,gr_list,item_type,item_title,item_description,item_mtime,tstamp,item_size,contentHash,crdate,parsetime,sys_language_uid,item_crdate,externalUrl,recordUid,freeIndexUid,freeIndexSetId',
264 'data_page_id'
265 );
266 while ($row = $db->sql_fetch_assoc($res)) {
267 $this->addAdditionalInformation($row);
268 $result[] = $row;
269
270 if ($row['pcount'] > 1) {
271 $res2 = $db->exec_SELECTquery(
272 'index_phash.*',
273 'index_phash',
274 'phash_grouping=' . (int)$row['phash_grouping'] . ' AND phash<>' . (int)$row['phash']
275 );
276 while ($row2 = $db->sql_fetch_assoc($res2)) {
277 $this->addAdditionalInformation($row2);
278 $result[] = $row2;
279 }
280 $db->sql_free_result($res2);
281 }
282 }
283 $db->sql_free_result($res);
284
285 return $result;
286 }
287
288 /**
289 * Get general statistic
290 *
291 * @param string $additionalWhere
292 * @param int $pageUid
293 * @param int $max
294 * @return array|NULL
295 */
296 public function getGeneralSearchStatistic($additionalWhere, $pageUid, $max = 50)
297 {
298 $queryParts = array(
299 'SELECT' => 'word, COUNT(*) AS c',
300 'FROM' => 'index_stat_word',
301 'WHERE' => sprintf('pageid= %d ' . $additionalWhere, $pageUid),
302 'GROUPBY' => 'word',
303 'ORDERBY' => '',
304 'LIMIT' => (int)$max
305 );
306 $db = $this->getDatabaseConnection();
307 $res = $db->exec_SELECTquery(
308 $queryParts['SELECT'],
309 $queryParts['FROM'],
310 $queryParts['WHERE'],
311 $queryParts['GROUPBY'],
312 $queryParts['ORDERBY'],
313 $queryParts['LIMIT']
314 );
315
316 $count = 0;
317 if ($res) {
318 $count = $db->sql_num_rows($res);
319 }
320
321 $db->sql_free_result($res);
322
323 // exist several statistics for this page?
324 if ($count == 0) {
325 // Limit access to pages of the current site
326 $secureAddWhere = ' AND pageid IN (' . $this->extGetTreeList((int)$pageUid, 100, 0, '1=1') . ') ';
327 $queryParts['WHERE'] = '1=1 ' . $additionalWhere . $secureAddWhere;
328 }
329
330 return $db->exec_SELECTgetRows(
331 $queryParts['SELECT'],
332 $queryParts['FROM'],
333 $queryParts['WHERE'],
334 $queryParts['GROUPBY'],
335 $queryParts['ORDERBY'],
336 $queryParts['LIMIT']
337 );
338 }
339
340 /**
341 * Add additional information to the result row
342 *
343 * @param array $row
344 * @return void
345 */
346 protected function addAdditionalInformation(array &$row)
347 {
348 $grListRec = $this->getGrlistRecord($row['phash']);
349 $unserializedCHashParams = unserialize($row['cHashParams']);
350
351 $row['numberOfWords'] = $this->getNumberOfWords($row['phash']);
352 $row['numberOfSections'] = $this->getNumberOfSections($row['phash']);
353 $row['numberOfFulltext'] = $this->getNumberOfFulltextRecords($row['phash']);
354 $row['cHashParams'] = !empty($unserializedCHashParams) ? $unserializedCHashParams : '';
355 $row['grList'] = $grListRec;
356 }
357
358 /**
359 * Get the page tree by using \TYPO3\CMS\Backend\Tree\View\PageTreeView
360 *
361 * @param int $pageId
362 * @param int $depth
363 * @param string $mode
364 * @return array
365 */
366 public function getTree($pageId, $depth = 4, $mode)
367 {
368 $allLines = array();
369 $pageRecord = BackendUtility::getRecord('pages', (int)$pageId);
370 if (!$pageRecord) {
371 return $allLines;
372 }
373 /** @var PageTreeView $tree */
374 $tree = GeneralUtility::makeInstance(PageTreeView::class);
375 $perms_clause = $this->getBackendUserAuthentication()->getPagePermsClause(1);
376 $tree->init('AND ' . $perms_clause);
377 $iconFactory = GeneralUtility::makeInstance(IconFactory::class);
378 $HTML = '<span title="' . htmlspecialchars($pageRecord['title']) . '">' . $iconFactory->getIconForRecord('pages', $pageRecord, Icon::SIZE_SMALL)->render() . '</span>';
379 $tree->tree[] = array(
380 'row' => $pageRecord,
381 'HTML' => $HTML
382 );
383
384 if ($depth > 0) {
385 $tree->getTree((int)$pageId, $depth, '');
386 }
387 $db = $this->getDatabaseConnection();
388 foreach ($tree->tree as $singleLine) {
389 $res = $db->exec_SELECTquery(
390 'ISEC.phash_t3, ISEC.rl0, ISEC.rl1, ISEC.rl2, ISEC.page_id, ISEC.uniqid, ' .
391 'IP.phash, IP.phash_grouping, IP.cHashParams, IP.data_filename, IP.data_page_id, ' .
392 'IP.data_page_reg1, IP.data_page_type, IP.data_page_mp, IP.gr_list, IP.item_type, ' .
393 'IP.item_title, IP.item_description, IP.item_mtime, IP.tstamp, IP.item_size, ' .
394 'IP.contentHash, IP.crdate, IP.parsetime, IP.sys_language_uid, IP.item_crdate, ' .
395 'IP.externalUrl, IP.recordUid, IP.freeIndexUid, IP.freeIndexSetId, count(*) AS count_val',
396 'index_phash IP, index_section ISEC',
397 'IP.phash = ISEC.phash AND ISEC.page_id = ' . (int)$singleLine['row']['uid'],
398 'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId',
399 'IP.item_type, IP.tstamp',
400 10 + 1
401 );
402 $lines = array();
403 // Collecting phash values (to remove local indexing for)
404 // Traverse the result set of phash rows selected:
405 while ($row = $db->sql_fetch_assoc($res)) {
406 $this->allPhashListed[] = $row['phash'];
407 // Adds a display row:
408 $row['icon'] = $this->makeItemTypeIcon($row['item_type']);
409 $row['wordCount'] = count($db->exec_SELECTgetRows(
410 'index_words.baseword, index_rel.*',
411 'index_rel, index_words',
412 'index_rel.phash = ' . (int)$row['phash'] . ' AND index_words.wid = index_rel.wid',
413 '',
414 '',
415 '',
416 'baseword'
417 ));
418
419 if ($mode === 'content') {
420 $row['fulltextData'] = $db->exec_SELECTgetSingleRow(
421 '*',
422 'index_fulltext',
423 'phash = ' . $row['phash']);
424 $wordRecords = $db->exec_SELECTgetRows(
425 'index_words.baseword, index_rel.*',
426 'index_rel, index_words',
427 'index_rel.phash = ' . (int)$row['phash'] . ' AND index_words.wid = index_rel.wid',
428 '', '', '', 'baseword');
429 if (is_array($wordRecords)) {
430 $indexed_words = array_keys($wordRecords);
431 sort($indexed_words);
432 $row['allWords'] = $indexed_words;
433 }
434 }
435
436 $lines[] = $row;
437 }
438
439 $singleLine['lines'] = $lines;
440 $allLines[] = $singleLine;
441 }
442
443 return $allLines;
444 }
445
446 /**
447 * Generates a list of Page-uid's from $id.
448 * The only pages excluded from the list are deleted pages.
449 *
450 * @param int $id page id
451 * @param int $depth to traverse down the page tree.
452 * @param int $begin is an optional integer that determines at which level in the tree to start collecting uid's. Zero means 'start right away', 1 = 'next level and out'
453 * @param string $perms_clause
454 * @return string Returns the list with a comma in the end + id itself
455 */
456 protected function extGetTreeList($id, $depth, $begin = 0, $perms_clause)
457 {
458 $list = GeneralUtility::makeInstance(FrontendBackendUserAuthentication::class)->extGetTreeList($id, $depth, $begin, $perms_clause);
459
460 if (empty($list)) {
461 $list = $id;
462 } else {
463 $list = rtrim($list, ',') . ',' . $id;
464 }
465
466 return $list;
467 }
468
469 /**
470 * Remove indexed phash row
471 *
472 * @param string $phashList
473 * @param int $pageId
474 * @param int $depth
475 * @return void
476 */
477 public function removeIndexedPhashRow($phashList, $pageId, $depth = 4)
478 {
479 if ($phashList === 'ALL') {
480 $this->getTree($pageId, $depth, '');
481 $phashRows = $this->allPhashListed;
482 $this->allPhashListed = array();
483 } else {
484 $phashRows = GeneralUtility::trimExplode(',', $phashList, true);
485 }
486
487 foreach ($phashRows as $phash) {
488 $phash = (int)$phash;
489 if ($phash > 0) {
490 $idList = array();
491 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_section');
492 $res = $queryBuilder
493 ->select('page_id')
494 ->from('index_section')
495 ->where(
496 $queryBuilder->expr()->eq('phash', (int)$phash)
497 )
498 ->execute();
499 while ($row = $res->fetch()) {
500 $idList[] = (int)$row['page_id'];
501 }
502
503 if (!empty($idList)) {
504 /** @var FrontendInterface $pageCache */
505 $pageCache = GeneralUtility::makeInstance(CacheManager::class)->getCache('cache_pages');
506 foreach ($idList as $pageId) {
507 $pageCache->flushByTag('pageId_' . $pageId);
508 }
509 }
510
511 // Removing old registrations for all tables.
512 $tableArr = array('index_phash', 'index_rel', 'index_section', 'index_grlist', 'index_fulltext', 'index_debug');
513 foreach ($tableArr as $table) {
514 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
515 $queryBuilder
516 ->delete($table)
517 ->where(
518 $queryBuilder->expr()->eq('phash', (int)$phash)
519 )
520 ->execute();
521 }
522 }
523 }
524 }
525
526 /**
527 * Save stop words
528 *
529 * @param array $words stop words
530 * @return void
531 */
532 public function saveStopWords(array $words)
533 {
534 foreach ($words as $wid => $state) {
535 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words');
536 $queryBuilder
537 ->update('index_words')
538 ->set('is_stopword', (int)$state)
539 ->where($queryBuilder->expr()->eq('wid', (int)$wid))
540 ->execute();
541 }
542 }
543
544 /**
545 * Save keywords
546 *
547 * @param array $words keywords
548 * @param int $pageId page id
549 * @return void
550 */
551 public function saveKeywords(array $words, $pageId)
552 {
553 // Get pages current keywords
554 $pageRec = BackendUtility::getRecord('pages', $pageId);
555 if (!is_array($pageRec)) {
556 return;
557 }
558 $keywords = array_flip(GeneralUtility::trimExplode(',', $pageRec['keywords'], true));
559 // Merge keywords:
560 foreach ($words as $key => $v) {
561 if ($v) {
562 $keywords[$key] = 1;
563 } else {
564 unset($keywords[$key]);
565 }
566 }
567 // Compile new list:
568 $data = array();
569 $data['pages'][$pageId]['keywords'] = implode(', ', array_keys($keywords));
570 $dataHandler = GeneralUtility::makeInstance(DataHandler::class);
571 $dataHandler->start($data, array());
572 $dataHandler->process_datamap();
573 }
574
575 /**
576 * Collect the type icons
577 *
578 * @param string $itemType
579 * @return string
580 */
581 protected function makeItemTypeIcon($itemType)
582 {
583 if (!isset($this->iconFileNameCache[$itemType])) {
584 $icon = '';
585 if ($itemType === '0') {
586 $icon = 'EXT:indexed_search/Resources/Public/Icons/FileTypes/pages.gif';
587 } elseif ($this->external_parsers[$itemType]) {
588 $icon = $this->external_parsers[$itemType]->getIcon($itemType);
589 }
590 $this->iconFileNameCache[$itemType] = $icon;
591 }
592 return $this->iconFileNameCache[$itemType];
593 }
594
595 /**
596 * @return DatabaseConnection
597 */
598 protected function getDatabaseConnection()
599 {
600 return $GLOBALS['TYPO3_DB'];
601 }
602
603 /**
604 * @return BackendUserAuthentication
605 */
606 protected function getBackendUserAuthentication()
607 {
608 return $GLOBALS['BE_USER'];
609 }
610 }