[TASK] Doctrine: Migrate indexed_search part 2
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Domain / Repository / IndexSearchRepository.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Domain\Repository;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use Doctrine\DBAL\Driver\Statement;
18 use TYPO3\CMS\Core\Database\ConnectionPool;
19 use TYPO3\CMS\Core\Database\Query\QueryHelper;
20 use TYPO3\CMS\Core\TimeTracker\TimeTracker;
21 use TYPO3\CMS\Core\Utility\GeneralUtility;
22 use TYPO3\CMS\Core\Utility\MathUtility;
23 use TYPO3\CMS\IndexedSearch\Indexer;
24 use TYPO3\CMS\IndexedSearch\Utility;
25
26 /**
27 * Index search abstraction to search through the index
28 */
29 class IndexSearchRepository
30 {
31 /**
32 * Indexer object
33 *
34 * @var Indexer
35 */
36 protected $indexerObj;
37
38 /**
39 * External Parsers
40 *
41 * @var array
42 */
43 protected $externalParsers = array();
44
45 /**
46 * Frontend User Group List
47 *
48 * @var string
49 */
50 protected $frontendUserGroupList = '';
51
52 /**
53 * Sections
54 * formally known as $this->piVars['sections']
55 *
56 * @var string
57 */
58 protected $sections = null;
59
60 /**
61 * Search type
62 * formally known as $this->piVars['type']
63 *
64 * @var string
65 */
66 protected $searchType = null;
67
68 /**
69 * Language uid
70 * formally known as $this->piVars['lang']
71 *
72 * @var int
73 */
74 protected $languageUid = null;
75
76 /**
77 * Media type
78 * formally known as $this->piVars['media']
79 *
80 * @var int
81 */
82 protected $mediaType = null;
83
84 /**
85 * Sort order
86 * formally known as $this->piVars['sort_order']
87 *
88 * @var string
89 */
90 protected $sortOrder = null;
91
92 /**
93 * Descending sort order flag
94 * formally known as $this->piVars['desc']
95 *
96 * @var bool
97 */
98 protected $descendingSortOrderFlag = null;
99
100 /**
101 * Result page pointer
102 * formally known as $this->piVars['pointer']
103 *
104 * @var int
105 */
106 protected $resultpagePointer = 0;
107
108 /**
109 * Number of results
110 * formally known as $this->piVars['result']
111 *
112 * @var int
113 */
114 protected $numberOfResults = 10;
115
116 /**
117 * list of all root pages that will be used
118 * If this value is set to less than zero (eg. -1) searching will happen
119 * in ALL of the page tree with no regard to branches at all.
120 *
121 * @var string
122 */
123 protected $searchRootPageIdList;
124
125 /**
126 * formally known as $conf['search.']['searchSkipExtendToSubpagesChecking']
127 * enabled through settings.searchSkipExtendToSubpagesChecking
128 *
129 * @var bool
130 */
131 protected $joinPagesForQuery = false;
132
133 /**
134 * Select clauses for individual words, will be filled during the search
135 *
136 * @var array
137 */
138 protected $wSelClauses = array();
139
140 /**
141 * Flag for exact search count
142 * formally known as $conf['search.']['exactCount']
143 *
144 * Continue counting and checking of results even if we are sure
145 * they are not displayed in this request. This will slow down your
146 * page rendering, but it allows precise search result counters.
147 * enabled through settings.exactCount
148 *
149 * @var bool
150 */
151 protected $useExactCount = false;
152
153 /**
154 * Display forbidden records
155 * formally known as $this->conf['show.']['forbiddenRecords']
156 *
157 * enabled through settings.displayForbiddenRecords
158 *
159 * @var bool
160 */
161 protected $displayForbiddenRecords = false;
162
163 /**
164 * initialize all options that are necessary for the search
165 *
166 * @param array $settings the extbase plugin settings
167 * @param array $searchData the search data
168 * @param array $externalParsers
169 * @param string $searchRootPageIdList
170 * @return void
171 */
172 public function initialize($settings, $searchData, $externalParsers, $searchRootPageIdList)
173 {
174 // Initialize the indexer-class - just to use a few function (for making hashes)
175 $this->indexerObj = GeneralUtility::makeInstance(Indexer::class);
176 $this->externalParsers = $externalParsers;
177 $this->searchRootPageIdList = $searchRootPageIdList;
178 $this->frontendUserGroupList = $this->getTypoScriptFrontendController()->gr_list;
179 // Should we use joinPagesForQuery instead of long lists of uids?
180 if ($settings['searchSkipExtendToSubpagesChecking']) {
181 $this->joinPagesForQuery = 1;
182 }
183 if ($settings['exactCount']) {
184 $this->useExactCount = true;
185 }
186 if ($settings['displayForbiddenRecords']) {
187 $this->displayForbiddenRecords = true;
188 }
189 $this->sections = $searchData['sections'];
190 $this->searchType = $searchData['searchType'];
191 $this->languageUid = $searchData['languageUid'];
192 $this->mediaType = isset($searchData['mediaType']) ? $searchData['mediaType'] : false;
193 $this->sortOrder = $searchData['sortOrder'];
194 $this->descendingSortOrderFlag = $searchData['desc'];
195 $this->resultpagePointer = $searchData['pointer'];
196 if (isset($searchData['numberOfResults']) && is_numeric($searchData['numberOfResults'])) {
197 $this->numberOfResults = (int)$searchData['numberOfResults'];
198 }
199 }
200
201 /**
202 * Get search result rows / data from database. Returned as data in array.
203 *
204 * @param array $searchWords Search word array
205 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
206 * @return bool|array FALSE if no result, otherwise an array with keys for first row, result rows and total number of results found.
207 */
208 public function doSearch($searchWords, $freeIndexUid = -1)
209 {
210 // Getting SQL result pointer:
211 $this->getTimeTracker()->push('Searching result');
212 if ($hookObj = &$this->hookRequest('getResultRows_SQLpointer')) {
213 $result = $hookObj->getResultRows_SQLpointer($searchWords, $freeIndexUid);
214 } else {
215 $result = $this->getResultRows_SQLpointer($searchWords, $freeIndexUid);
216 }
217 $this->getTimeTracker()->pull();
218 // Organize and process result:
219 if ($result) {
220 // Total search-result count
221 $count = $result->rowCount();
222 // The pointer is set to the result page that is currently being viewed
223 $pointer = MathUtility::forceIntegerInRange($this->resultpagePointer, 0, floor($count / $this->numberOfResults));
224 // Initialize result accumulation variables:
225 $c = 0;
226 // Result pointer: Counts up the position in the current search-result
227 $grouping_phashes = array();
228 // Used to filter out duplicates.
229 $grouping_chashes = array();
230 // Used to filter out duplicates BASED ON cHash.
231 $firstRow = array();
232 // Will hold the first row in result - used to calculate relative hit-ratings.
233 $resultRows = array();
234 // Will hold the results rows for display.
235 // Now, traverse result and put the rows to be displayed into an array
236 // Each row should contain the fields from 'ISEC.*, IP.*' combined
237 // + artificial fields "show_resume" (bool) and "result_number" (counter)
238 while ($row = $result->fetch()) {
239 // Set first row
240 if (!$c) {
241 $firstRow = $row;
242 }
243 // Tells whether we can link directly to a document
244 // or not (depends on possible right problems)
245 $row['show_resume'] = $this->checkResume($row);
246 $phashGr = !in_array($row['phash_grouping'], $grouping_phashes);
247 $chashGr = !in_array(($row['contentHash'] . '.' . $row['data_page_id']), $grouping_chashes);
248 if ($phashGr && $chashGr) {
249 // Only if the resume may be shown are we going to filter out duplicates...
250 if ($row['show_resume'] || $this->displayForbiddenRecords) {
251 // Only on documents which are not multiple pages documents
252 if (!$this->multiplePagesType($row['item_type'])) {
253 $grouping_phashes[] = $row['phash_grouping'];
254 }
255 $grouping_chashes[] = $row['contentHash'] . '.' . $row['data_page_id'];
256 // Increase the result pointer
257 $c++;
258 // All rows for display is put into resultRows[]
259 if ($c > $pointer * $this->numberOfResults && $c <= $pointer * $this->numberOfResults + $this->numberOfResults) {
260 $row['result_number'] = $c;
261 $resultRows[] = $row;
262 // This may lead to a problem: If the result check is not stopped here, the search will take longer.
263 // However the result counter will not filter out grouped cHashes/pHashes that were not processed yet.
264 // You can change this behavior using the "search.exactCount" property (see above).
265 if (!$this->useExactCount && $c + 1 > ($pointer + 1) * $this->numberOfResults) {
266 break;
267 }
268 }
269 } else {
270 // Skip this row if the user cannot
271 // view it (missing permission)
272 $count--;
273 }
274 } else {
275 // For each time a phash_grouping document is found
276 // (which is thus not displayed) the search-result count is reduced,
277 // so that it matches the number of rows displayed.
278 $count--;
279 }
280 }
281
282 $result->closeCursor();
283
284 return array(
285 'resultRows' => $resultRows,
286 'firstRow' => $firstRow,
287 'count' => $count
288 );
289 } else {
290 // No results found
291 return false;
292 }
293 }
294
295 /**
296 * Gets a SQL result pointer to traverse for the search records.
297 *
298 * @param array $searchWords Search words
299 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
300 * @return Statement
301 */
302 protected function getResultRows_SQLpointer($searchWords, $freeIndexUid = -1)
303 {
304 // This SEARCHES for the searchwords in $searchWords AND returns a
305 // COMPLETE list of phash-integers of the matches.
306 $list = $this->getPhashList($searchWords);
307 // Perform SQL Search / collection of result rows array:
308 if ($list) {
309 // Do the search:
310 $this->getTimeTracker()->push('execFinalQuery');
311 $res = $this->execFinalQuery($list, $freeIndexUid);
312 $this->getTimeTracker()->pull();
313 return $res;
314 } else {
315 return false;
316 }
317 }
318
319 /***********************************
320 *
321 * Helper functions on searching (SQL)
322 *
323 ***********************************/
324 /**
325 * Returns a COMPLETE list of phash-integers matching the search-result composed of the search-words in the $searchWords array.
326 * The list of phash integers are unsorted and should be used for subsequent selection of index_phash records for display of the result.
327 *
328 * @param array $searchWords Search word array
329 * @return string List of integers
330 */
331 protected function getPhashList($searchWords)
332 {
333 // Initialize variables:
334 $c = 0;
335 // This array accumulates the phash-values
336 $totalHashList = array();
337 $this->wSelClauses = array();
338 // Traverse searchwords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator)
339 foreach ($searchWords as $k => $v) {
340 // Making the query for a single search word based on the search-type
341 $sWord = $v['sword'];
342 $theType = (string)$this->searchType;
343 // If there are spaces in the search-word, make a full text search instead.
344 if (strstr($sWord, ' ')) {
345 $theType = 20;
346 }
347 $this->getTimeTracker()->push('SearchWord "' . $sWord . '" - $theType=' . $theType);
348 // Perform search for word:
349 switch ($theType) {
350 case '1':
351 // Part of word
352 $res = $this->searchWord($sWord, Utility\LikeWildcard::BOTH);
353 break;
354 case '2':
355 // First part of word
356 $res = $this->searchWord($sWord, Utility\LikeWildcard::RIGHT);
357 break;
358 case '3':
359 // Last part of word
360 $res = $this->searchWord($sWord, Utility\LikeWildcard::LEFT);
361 break;
362 case '10':
363 // Sounds like
364 /**
365 * Indexer object
366 *
367 * @var Indexer
368 */
369 $indexerObj = GeneralUtility::makeInstance(Indexer::class);
370 // Perform metaphone search
371 $storeMetaphoneInfoAsWords = !$this->isTableUsed('index_words');
372 $res = $this->searchMetaphone($indexerObj->metaphone($sWord, $storeMetaphoneInfoAsWords));
373 unset($indexerObj);
374 break;
375 case '20':
376 // Sentence
377 $res = $this->searchSentence($sWord);
378 // If there is a fulltext search for a sentence there is
379 // a likeliness that sorting cannot be done by the rankings
380 // from the rel-table (because no relations will exist for the
381 // sentence in the word-table). So therefore mtime is used instead.
382 // It is not required, but otherwise some hits may be left out.
383 $this->sortOrder = 'mtime';
384 break;
385 default:
386 // Distinct word
387 $res = $this->searchDistinct($sWord);
388 }
389 // If there was a query to do, then select all phash-integers which resulted from this.
390 if ($res) {
391 // Get phash list by searching for it:
392 $phashList = array();
393 while ($row = $res->fetch()) {
394 $phashList[] = $row['phash'];
395 }
396 // Here the phash list are merged with the existing result based on whether we are dealing with OR, NOT or AND operations.
397 if ($c) {
398 switch ($v['oper']) {
399 case 'OR':
400 $totalHashList = array_unique(array_merge($phashList, $totalHashList));
401 break;
402 case 'AND NOT':
403 $totalHashList = array_diff($totalHashList, $phashList);
404 break;
405 default:
406 // AND...
407 $totalHashList = array_intersect($totalHashList, $phashList);
408 }
409 } else {
410 // First search
411 $totalHashList = $phashList;
412 }
413 }
414 $this->getTimeTracker()->pull();
415 $c++;
416 }
417 return implode(',', $totalHashList);
418 }
419
420 /**
421 * Returns a query which selects the search-word from the word/rel tables.
422 *
423 * @param string $wordSel WHERE clause selecting the word from phash
424 * @param string $additionalWhereClause Additional AND clause in the end of the query.
425 * @return Statement
426 */
427 protected function execPHashListQuery($wordSel, $additionalWhereClause = '')
428 {
429 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words');
430 $queryBuilder->select('IR.phash')
431 ->from('index_words', 'IW')
432 ->from('index_rel', 'IR')
433 ->from('index_section', 'ISEC')
434 ->where(
435 QueryHelper::stripLogicalOperatorPrefix($wordSel),
436 $queryBuilder->expr()->eq('IW.wid', $queryBuilder->quoteIdentifier('IR.wid')),
437 $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IR.phash')),
438 QueryHelper::stripLogicalOperatorPrefix($this->sectionTableWhere()),
439 QueryHelper::stripLogicalOperatorPrefix($additionalWhereClause)
440 )
441 ->groupBy('IR.phash');
442
443 return $queryBuilder->execute();
444 }
445
446 /**
447 * Search for a word
448 *
449 * @param string $sWord the search word
450 * @param int $wildcard Bit-field of Utility\LikeWildcard
451 * @return Statement
452 */
453 protected function searchWord($sWord, $wildcard)
454 {
455 $likeWildcard = Utility\LikeWildcard::cast($wildcard);
456 $wSel = $likeWildcard->getLikeQueryPart(
457 'index_words',
458 'IW.baseword',
459 $sWord
460 );
461 $this->wSelClauses[] = $wSel;
462 return $this->execPHashListQuery($wSel, ' AND is_stopword=0');
463 }
464
465 /**
466 * Search for one distinct word
467 *
468 * @param string $sWord the search word
469 * @return Statement
470 */
471 protected function searchDistinct($sWord)
472 {
473 $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
474 ->getQueryBuilderForTable('index_words')
475 ->expr();
476 $wSel = $expressionBuilder->eq('IW.wid', $this->md5inthash($sWord));
477 $this->wSelClauses[] = $wSel;
478 return $this->execPHashListQuery($wSel, $expressionBuilder->eq('is_stopword', 0));
479 }
480
481 /**
482 * Search for a sentence
483 *
484 * @param string $sWord the search word
485 * @return Statement
486 */
487 protected function searchSentence($sWord)
488 {
489 $this->wSelClauses[] = '1=1';
490 $likeWildcard = Utility\LikeWildcard::cast(Utility\LikeWildcard::BOTH);
491 $likePart = $likeWildcard->getLikeQueryPart(
492 'index_fulltext',
493 'IFT.fulltextdata',
494 $sWord
495 );
496
497 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_section');
498 return $queryBuilder->select('ISEC.phash')
499 ->from('index_section', 'ISEC')
500 ->from('index_fulltext', 'IFT')
501 ->where(
502 QueryHelper::stripLogicalOperatorPrefix($likePart),
503 $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier(('IFT.phash'))),
504 QueryHelper::stripLogicalOperatorPrefix($this->sectionTableWhere())
505 )
506 ->groupBy('ISEC.phash')
507 ->execute();
508 }
509
510 /**
511 * Search for a metaphone word
512 *
513 * @param string $sWord the search word
514 * @return Statement
515 */
516 protected function searchMetaphone($sWord)
517 {
518 $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
519 ->getQueryBuilderForTable('index_words')
520 ->expr();
521 $wSel = $expressionBuilder->eq('IW.metaphone', $expressionBuilder->literal($sWord));
522 $this->wSelClauses[] = $wSel;
523 return $this->execPHashListQuery($wSel, $expressionBuilder->eq('is_stopword', 0));
524 }
525
526 /**
527 * Returns AND statement for selection of section in database. (rootlevel 0-2 + page_id)
528 *
529 * @return string AND clause for selection of section in database.
530 */
531 public function sectionTableWhere()
532 {
533 $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
534 ->getQueryBuilderForTable('index_section')
535 ->expr();
536
537 $whereClause = $expressionBuilder->andX();
538 $match = false;
539 if (!($this->searchRootPageIdList < 0)) {
540 $whereClause->add(
541 $expressionBuilder->in('ISEC.rl0', GeneralUtility::intExplode(',', $this->searchRootPageIdList, true))
542 );
543 }
544 if (substr($this->sections, 0, 4) == 'rl1_') {
545 $whereClause->add(
546 $expressionBuilder->in('ISEC.rl1', GeneralUtility::intExplode(',', substr($this->sections, 4)))
547 );
548 $match = true;
549 } elseif (substr($this->sections, 0, 4) == 'rl2_') {
550 $whereClause->add(
551 $expressionBuilder->in('ISEC.rl2', GeneralUtility::intExplode(',', substr($this->sections, 4)))
552 );
553 $match = true;
554 } elseif (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) {
555 // Traversing user configured fields to see if any of those are used to limit search to a section:
556 foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
557 if (substr($this->sections, 0, strlen($fieldName) + 1) == $fieldName . '_') {
558 $whereClause->add(
559 $expressionBuilder->in(
560 'ISEC.' . $fieldName,
561 GeneralUtility::intExplode(',', substr($this->sections, strlen($fieldName) + 1))
562 )
563 );
564 $match = true;
565 break;
566 }
567 }
568 }
569 // If no match above, test the static types:
570 if (!$match) {
571 switch ((string)$this->sections) {
572 case '-1':
573 $whereClause->add(
574 $expressionBuilder->eq('ISEC.page_id', (int)$this->getTypoScriptFrontendController()->id)
575 );
576 break;
577 case '-2':
578 $whereClause->add($expressionBuilder->eq('ISEC.rl2', 0));
579 break;
580 case '-3':
581 $whereClause->add($expressionBuilder->gt('ISEC.rl2', 0));
582 break;
583 }
584 }
585
586 return $whereClause->count() ? ' AND ' . $whereClause : '';
587 }
588
589 /**
590 * Returns AND statement for selection of media type
591 *
592 * @return string AND statement for selection of media type
593 */
594 public function mediaTypeWhere()
595 {
596 $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
597 ->getQueryBuilderForTable('index_phash')
598 ->expr();
599 switch ($this->mediaType) {
600 case '0':
601 // '0' => 'only TYPO3 pages',
602 $whereClause = $expressionBuilder->eq('IP.item_type', $expressionBuilder->literal('0'));
603 break;
604 case '-2':
605 // All external documents
606 $whereClause = $expressionBuilder->neq('IP.item_type', $expressionBuilder->literal('0'));
607 break;
608 case false:
609 // Intentional fall-through
610 case '-1':
611 // All content
612 $whereClause = '';
613 break;
614 default:
615 $whereClause = $expressionBuilder->eq('IP.item_type', $expressionBuilder->literal($this->mediaType));
616 }
617 return $whereClause ? ' AND ' . $whereClause : '';
618 }
619
620 /**
621 * Returns AND statement for selection of language
622 *
623 * @return string AND statement for selection of language
624 */
625 public function languageWhere()
626 {
627 // -1 is the same as ALL language.
628 if ($this->languageUid < 0) {
629 return '';
630 }
631
632 $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
633 ->getQueryBuilderForTable('index_phash')
634 ->expr();
635
636 return ' AND ' . $expressionBuilder->eq('IP.sys_language_uid', (int)$this->languageUid);
637 }
638
639 /**
640 * Where-clause for free index-uid value.
641 *
642 * @param int $freeIndexUid Free Index UID value to limit search to.
643 * @return string WHERE SQL clause part.
644 */
645 public function freeIndexUidWhere($freeIndexUid)
646 {
647 $freeIndexUid = (int)$freeIndexUid;
648 if ($freeIndexUid < 0) {
649 return '';
650 }
651 // First, look if the freeIndexUid is a meta configuration:
652 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
653 ->getQueryBuilderForTable('index_config');
654 $queryBuilder->getRestrictions()->removeAll();
655 $indexCfgRec = $queryBuilder->select('indexcfgs')
656 ->from('index_config')
657 ->where(
658 $queryBuilder->expr()->eq('type', 5),
659 $queryBuilder->expr()->eq('uid', $freeIndexUid),
660 QueryHelper::stripLogicalOperatorPrefix($this->enableFields('index_config'))
661 )
662 ->execute()
663 ->fetch();
664
665 if (is_array($indexCfgRec)) {
666 $refs = GeneralUtility::trimExplode(',', $indexCfgRec['indexcfgs']);
667 // Default value to protect against empty array.
668 $list = array(-99);
669 foreach ($refs as $ref) {
670 list($table, $uid) = GeneralUtility::revExplode('_', $ref, 2);
671 $uid = (int)$uid;
672 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
673 ->getQueryBuilderForTable('index_config');
674 $queryBuilder->getRestrictions()->removeAll();
675 $queryBuilder->select('uid')
676 ->from('index_config')
677 ->where(QueryHelper::stripLogicalOperatorPrefix($this->enableFields('index_config')));
678 switch ($table) {
679 case 'index_config':
680 $idxRec = $queryBuilder->andWhere($queryBuilder->expr()->eq('uid', $uid))
681 ->execute()
682 ->fetch();
683 if ($idxRec) {
684 $list[] = $uid;
685 }
686 break;
687 case 'pages':
688 $indexCfgRecordsFromPid = $queryBuilder->andWhere($queryBuilder->expr()->eq('pid', $uid))
689 ->execute();
690 while ($idxRec = $indexCfgRecordsFromPid->fetch()) {
691 $list[] = $idxRec['uid'];
692 }
693 break;
694 }
695 }
696 $list = array_unique($list);
697 } else {
698 $list = [$freeIndexUid];
699 }
700
701 $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
702 ->getQueryBuilderForTable('index_phash')
703 ->expr();
704 return ' AND ' . $expressionBuilder->in('IP.freeIndexUid', array_map('intval', $list));
705 }
706
707 /**
708 * Execute final query, based on phash integer list. The main point is sorting the result in the right order.
709 *
710 * @param string $list List of phash integers which match the search.
711 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
712 * @return Statement
713 */
714 protected function execFinalQuery($list, $freeIndexUid = -1)
715 {
716 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words');
717 $queryBuilder->select('ISEC.*', 'IP.*')
718 ->from('index_phash', 'IP')
719 ->from('index_section', 'ISEC')
720 ->where(
721 $queryBuilder->expr()->in('IP.phash', GeneralUtility::intExplode(',', $list, true)),
722 QueryHelper::stripLogicalOperatorPrefix($this->mediaTypeWhere()),
723 QueryHelper::stripLogicalOperatorPrefix($this->languageWhere()),
724 QueryHelper::stripLogicalOperatorPrefix($this->freeIndexUidWhere($freeIndexUid)),
725 $queryBuilder->expr()->eq('IP.phash', $queryBuilder->quoteIdentifier('IR.phash'))
726 )
727 ->groupBy(
728 'IP.phash',
729 'ISEC.phash',
730 'ISEC.phash_t3',
731 'ISEC.rl0',
732 'ISEC.rl1',
733 'ISEC.rl2',
734 'ISEC.page_id',
735 'ISEC.uniqid',
736 'IP.phash_grouping',
737 'IP.data_filename',
738 'IP.data_page_id',
739 'IP.data_page_reg1',
740 'IP.data_page_type',
741 'IP.data_page_mp',
742 'IP.gr_list',
743 'IP.item_type',
744 'IP.item_title',
745 'IP.item_description',
746 'IP.item_mtime',
747 'IP.tstamp',
748 'IP.item_size',
749 'IP.contentHash',
750 'IP.crdate',
751 'IP.parsetime',
752 'IP.sys_language_uid',
753 'IP.item_crdate',
754 'IP.cHashParams',
755 'IP.externalUrl',
756 'IP.recordUid',
757 'IP.freeIndexUid',
758 'IP.freeIndexSetId'
759 );
760
761 // Setting up methods of filtering results
762 // based on page types, access, etc.
763 if ($hookObj = $this->hookRequest('execFinalQuery_idList')) {
764 // Calling hook for alternative creation of page ID list
765 $hookWhere = QueryHelper::stripLogicalOperatorPrefix($hookObj->execFinalQuery_idList($list));
766 if (!empty($hookWhere)) {
767 $queryBuilder->andWhere($hookWhere);
768 }
769 } elseif ($this->joinPagesForQuery) {
770 // Alternative to getting all page ids by ->getTreeList() where
771 // "excludeSubpages" is NOT respected.
772 $queryBuilder->getRestrictions()->removeAll();
773 $queryBuilder->from('pages');
774 $queryBuilder->andWhere(
775 $queryBuilder->expr()->eq('pages.uid', $queryBuilder->quoteIdentifier('ISEC.page')),
776 QueryHelper::stripLogicalOperatorPrefix($this->enableFields('pages')),
777 $queryBuilder->expr()->eq('pages.no_search', 0),
778 $queryBuilder->expr()->lt('pages.doktype', 200)
779 );
780 } elseif ($this->searchRootPageIdList >= 0) {
781 // Collecting all pages IDs in which to search;
782 // filtering out ALL pages that are not accessible due to enableFields.
783 // Does NOT look for "no_search" field!
784 $siteIdNumbers = GeneralUtility::intExplode(',', $this->searchRootPageIdList);
785 $pageIdList = array();
786 foreach ($siteIdNumbers as $rootId) {
787 $pageIdList[] = $this->getTypoScriptFrontendController()->cObj->getTreeList(-1 * $rootId, 9999);
788 }
789 $queryBuilder->andWhere(
790 $queryBuilder->expr()->in(
791 'ISEC.page_id',
792 array_unique(GeneralUtility::intExplode(',', implode(',', $pageIdList), true))
793 )
794 );
795 }
796 // otherwise select all / disable everything
797 // If any of the ranking sortings are selected, we must make a
798 // join with the word/rel-table again, because we need to
799 // calculate ranking based on all search-words found.
800 if (substr($this->sortOrder, 0, 5) === 'rank_') {
801 $queryBuilder
802 ->from('index_words', 'IW')
803 ->from('index_rel', 'IR')
804 ->andWhere(
805 $queryBuilder->expr()->eq('IW.wid', $queryBuilder->quoteIdentifier('IR.wid')),
806 $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IR.phash'))
807 );
808 switch ($this->sortOrder) {
809 case 'rank_flag':
810 // This gives priority to word-position (max-value) so that words in title, keywords, description counts more than in content.
811 // The ordering is refined with the frequency sum as well.
812 $queryBuilder
813 ->addSelectLiteral(
814 $queryBuilder->expr()->max('IR.flags', 'order_val1'),
815 $queryBuilder->expr()->sum('IR.freq', 'order_val2')
816 )
817 ->orderBy('order_val1', $this->getDescendingSortOrderFlag())
818 ->addOrderBy('order_val2', $this->getDescendingSortOrderFlag());
819 break;
820 case 'rank_first':
821 // Results in average position of search words on page.
822 // Must be inversely sorted (low numbers are closer to top)
823 $queryBuilder
824 ->addSelectLiteral($queryBuilder->expr()->avg('IR.first', 'order_val'))
825 ->orderBy('order_val', $this->getDescendingSortOrderFlag(true));
826 break;
827 case 'rank_count':
828 // Number of words found
829 $queryBuilder
830 ->addSelectLiteral($queryBuilder->expr()->sum('IR.count', 'order_val'))
831 ->orderBy('order_val', $this->getDescendingSortOrderFlag());
832 break;
833 default:
834 // Frequency sum. I'm not sure if this is the best way to do
835 // it (make a sum...). Or should it be the average?
836 $queryBuilder
837 ->addSelectLiteral($queryBuilder->expr()->sum('IR.freq', 'order_val'))
838 ->orderBy('order_val', $this->getDescendingSortOrderFlag());
839 }
840
841 if (!empty($this->wSelClauses)) {
842 // So, words are combined in an OR statement
843 // (no "sentence search" should be done here - may deselect results)
844 $wordSel = $queryBuilder->expr()->orX();
845 foreach ($this->wSelClauses as $wSelClause) {
846 $wordSel->add(QueryHelper::stripLogicalOperatorPrefix($wSelClause));
847 }
848 $queryBuilder->andWhere($wordSel);
849 }
850 } else {
851 // Otherwise, if sorting are done with the pages table or other fields,
852 // there is no need for joining with the rel/word tables:
853 $orderBy = '';
854 switch ((string)$this->sortOrder) {
855 case 'title':
856 $queryBuilder->orderBy('IP.item_title', $this->getDescendingSortOrderFlag());
857 break;
858 case 'crdate':
859 $queryBuilder->orderBy('IP.item_crdate', $this->getDescendingSortOrderFlag());
860 break;
861 case 'mtime':
862 $queryBuilder->orderBy('IP.item_mtime', $this->getDescendingSortOrderFlag());
863 break;
864 }
865 }
866
867 return $queryBuilder->execute();
868 }
869
870 /**
871 * Checking if the resume can be shown for the search result
872 * (depending on whether the rights are OK)
873 * ? Should it also check for gr_list "0,-1"?
874 *
875 * @param array $row Result row array.
876 * @return bool Returns TRUE if resume can safely be shown
877 */
878 protected function checkResume($row)
879 {
880 // If the record is indexed by an indexing configuration, just show it.
881 // At least this is needed for external URLs and files.
882 // For records we might need to extend this - for instance block display if record is access restricted.
883 if ($row['freeIndexUid']) {
884 return true;
885 }
886 // Evaluate regularly indexed pages based on item_type:
887 // External media:
888 $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_grlist');
889 if ($row['item_type']) {
890 // For external media we will check the access of the parent page on which the media was linked from.
891 // "phash_t3" is the phash of the parent TYPO3 page row which initiated the indexing of the documents
892 // in this section. So, selecting for the grlist records belonging to the parent phash-row where the
893 // current users gr_list exists will help us to know. If this is NOT found, there is still a theoretical
894 // possibility that another user accessible page would display a link, so maybe the resume of such a
895 // document here may be unjustified hidden. But better safe than sorry.
896 if (!$this->isTableUsed('index_grlist')) {
897 return false;
898 }
899
900 return (bool)$connection->count(
901 'phash',
902 'index_grlist',
903 [
904 'phash' => (int)$row['phash_t3'],
905 'gr_list' => $this->frontendUserGroupList
906 ]
907 );
908 } else {
909 // Ordinary TYPO3 pages:
910 if ((string)$row['gr_list'] !== (string)$this->frontendUserGroupList) {
911 // Selecting for the grlist records belonging to the phash-row where the current users gr_list exists.
912 // If it is found it is proof that this user has direct access to the phash-rows content although
913 // he did not himself initiate the indexing...
914 if (!$this->isTableUsed('index_grlist')) {
915 return false;
916 }
917
918 return (bool)$connection->count(
919 'phash',
920 'index_grlist',
921 [
922 'phash' => (int)$row['phash'],
923 'gr_list' => $this->frontendUserGroupList
924 ]
925 );
926 } else {
927 return true;
928 }
929 }
930 }
931
932 /**
933 * Returns "DESC" or "" depending on the settings of the incoming
934 * highest/lowest result order (piVars['desc'])
935 *
936 * @param bool $inverse If TRUE, inverse the order which is defined by piVars['desc']
937 * @return string " DESC" or
938 * @formallyknownas tx_indexedsearch_pi->isDescending
939 */
940 protected function getDescendingSortOrderFlag($inverse = false)
941 {
942 $desc = $this->descendingSortOrderFlag;
943 if ($inverse) {
944 $desc = !$desc;
945 }
946 return !$desc ? ' DESC' : '';
947 }
948
949 /**
950 * Returns a part of a WHERE clause which will filter out records with start/end times or hidden/fe_groups fields
951 * set to values that should de-select them according to the current time, preview settings or user login.
952 * Definitely a frontend function.
953 * THIS IS A VERY IMPORTANT FUNCTION: Basically you must add the output from this function for EVERY select query you create
954 * for selecting records of tables in your own applications - thus they will always be filtered according to the "enablefields"
955 * configured in TCA
956 * Simply calls \TYPO3\CMS\Frontend\Page\PageRepository::enableFields() BUT will send the show_hidden flag along!
957 * This means this function will work in conjunction with the preview facilities of the frontend engine/Admin Panel.
958 *
959 * @param string $table The table for which to get the where clause
960 * @return string The part of the where clause on the form " AND [fieldname]=0 AND ...". Eg. " AND hidden=0 AND starttime < 123345567
961 * @see \TYPO3\CMS\Frontend\Page\PageRepository::enableFields()
962 */
963 protected function enableFields($table)
964 {
965 return $this->getTypoScriptFrontendController()->sys_page->enableFields($table, $table === 'pages' ? $this->getTypoScriptFrontendController()->showHiddenPage : $this->getTypoScriptFrontendController()->showHiddenRecords);
966 }
967
968 /**
969 * Returns if an item type is a multipage item type
970 *
971 * @param string $itemType Item type
972 * @return bool TRUE if multipage capable
973 */
974 protected function multiplePagesType($itemType)
975 {
976 /** @var \TYPO3\CMS\IndexedSearch\FileContentParser $fileContentParser */
977 $fileContentParser = $this->externalParsers[$itemType];
978 return is_object($fileContentParser) && $fileContentParser->isMultiplePageExtension($itemType);
979 }
980
981 /**
982 * md5 integer hash
983 * Using 7 instead of 8 just because that makes the integers lower than
984 * 32 bit (28 bit) and so they do not interfere with UNSIGNED integers
985 * or PHP-versions which has varying output from the hexdec function.
986 *
987 * @param string $str String to hash
988 * @return int Integer intepretation of the md5 hash of input string.
989 */
990 protected function md5inthash($str)
991 {
992 return Utility\IndexedSearchUtility::md5inthash($str);
993 }
994
995 /**
996 * Check if the tables provided are configured for usage.
997 * This becomes necessary for extensions that provide additional database
998 * functionality like indexed_search_mysql.
999 *
1000 * @param string $table_list Comma-separated list of tables
1001 * @return bool TRUE if given tables are enabled
1002 */
1003 protected function isTableUsed($table_list)
1004 {
1005 return Utility\IndexedSearchUtility::isTableUsed($table_list);
1006 }
1007
1008 /**
1009 * Returns an object reference to the hook object if any
1010 *
1011 * @param string $functionName Name of the function you want to call / hook key
1012 * @return object|NULL Hook object, if any. Otherwise NULL.
1013 */
1014 public function hookRequest($functionName)
1015 {
1016 // Hook: menuConfig_preProcessModMenu
1017 if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['pi1_hooks'][$functionName]) {
1018 $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['pi1_hooks'][$functionName]);
1019 if (method_exists($hookObj, $functionName)) {
1020 $hookObj->pObj = $this;
1021 return $hookObj;
1022 }
1023 }
1024 return null;
1025 }
1026
1027 /**
1028 * Search type
1029 * e.g. sentence (20), any part of the word (1)
1030 *
1031 * @return int
1032 */
1033 public function getSearchType()
1034 {
1035 return (int)$this->searchType;
1036 }
1037
1038 /**
1039 * A list of integer which should be root-pages to search from
1040 *
1041 * @return int[]
1042 */
1043 public function getSearchRootPageIdList()
1044 {
1045 return GeneralUtility::intExplode(',', $this->searchRootPageIdList);
1046 }
1047
1048 /**
1049 * Getter for joinPagesForQuery flag
1050 * enabled through TypoScript 'settings.skipExtendToSubpagesChecking'
1051 *
1052 * @return bool
1053 */
1054 public function getJoinPagesForQuery()
1055 {
1056 return $this->joinPagesForQuery;
1057 }
1058
1059 /**
1060 * @return \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController
1061 */
1062 protected function getTypoScriptFrontendController()
1063 {
1064 return $GLOBALS['TSFE'];
1065 }
1066
1067 /**
1068 * @return TimeTracker
1069 */
1070 protected function getTimeTracker()
1071 {
1072 return GeneralUtility::makeInstance(TimeTracker::class);
1073 }
1074 }