f580d2478544815238f2cf53c22c71dddb0753a4
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Hook / MysqlFulltextIndexHook.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Hook;
3
4 /***************************************************************
5 * Copyright notice
6 *
7 * (c) 2011-2013 Michael Stucki (michael@typo3.org)
8 * All rights reserved
9 *
10 * This script is part of the TYPO3 project. The TYPO3 project is
11 * free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * The GNU General Public License can be found at
17 * http://www.gnu.org/copyleft/gpl.html.
18 * A copy is found in the textfile GPL.txt and important notices to the license
19 * from the author is found in LICENSE.txt distributed with these scripts.
20 *
21 *
22 * This script is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * This copyright notice MUST APPEAR in all copies of the script!
28 ***************************************************************/
29 /**
30 * Class that hooks into Indexed Search and replaces standard SQL queries with MySQL fulltext index queries.
31 *
32 * @author Michael Stucki <michael@typo3.org>
33 */
34 class MysqlFulltextIndexHook {
35
36 /**
37 * @var \TYPO3\CMS\IndexedSearch\Controller\SearchFormController
38 */
39 public $pObj;
40
41 const ANY_PART_OF_THE_WORD = '1';
42 const LAST_PART_OF_THE_WORD = '2';
43 const FIRST_PART_OF_THE_WORD = '3';
44 const SOUNDS_LIKE = '10';
45 const SENTENCE = '20';
46 /**
47 * Gets a SQL result pointer to traverse for the search records.
48 *
49 * @param array $searchWordsArray Search words
50 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
51 * @return boolean|\mysqli_result|object MySQLi result object / DBAL object
52 */
53 public function getResultRows_SQLpointer($searchWordsArray, $freeIndexUid = -1) {
54 // Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not
55 $searchData = $this->getSearchString($searchWordsArray);
56 // Perform SQL Search / collection of result rows array:
57 $resource = FALSE;
58 if ($searchData) {
59 // Do the search:
60 $GLOBALS['TT']->push('execFinalQuery');
61 $resource = $this->execFinalQuery_fulltext($searchData, $freeIndexUid);
62 $GLOBALS['TT']->pull();
63 }
64 return $resource;
65 }
66
67 /**
68 * Returns a search string for use with MySQL FULLTEXT query
69 *
70 * @param array $searchWordArray Search word array
71 * @return string Search string
72 */
73 public function getSearchString($searchWordArray) {
74 // Initialize variables:
75 $count = 0;
76 $searchBoolean = FALSE;
77 // Change this to TRUE to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty)
78 $fulltextIndex = 'index_fulltext.fulltextdata';
79 $naturalSearchString = '';
80 // This holds the result if the search is natural (doesn't contain any boolean operators)
81 $booleanSearchString = '';
82 // This holds the result if the search is boolen (contains +/-/| operators)
83 $searchType = (string) $this->pObj->piVars['type'];
84 // Traverse searchwords and prefix them with corresponding operator
85 foreach ($searchWordArray as $searchWordData) {
86 // Making the query for a single search word based on the search-type
87 $searchWord = $searchWordData['sword'];
88 $wildcard = '';
89 if (strstr($searchWord, ' ')) {
90 $searchType = self::SENTENCE;
91 }
92 switch ($searchType) {
93 case self::ANY_PART_OF_THE_WORD:
94
95 case self::LAST_PART_OF_THE_WORD:
96
97 case self::FIRST_PART_OF_THE_WORD:
98 // First part of word
99 $wildcard = '*';
100 // Part-of-word search requires boolean mode!
101 $searchBoolean = TRUE;
102 break;
103 case self::SOUNDS_LIKE:
104 $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\IndexedSearch\\Indexer');
105 // Initialize the indexer-class
106 /** @var \TYPO3\CMS\IndexedSearch\Indexer $indexerObj */
107 $searchWord = $indexerObj->metaphone($searchWord, $indexerObj->storeMetaphoneInfoAsWords);
108 unset($indexerObj);
109 $fulltextIndex = 'index_fulltext.metaphonedata';
110 break;
111 case self::SENTENCE:
112 $searchBoolean = TRUE;
113 // Remove existing quotes and fix misplaced quotes.
114 $searchWord = trim(str_replace('"', ' ', $searchWord));
115 break;
116 }
117 // Perform search for word:
118 switch ($searchWordData['oper']) {
119 case 'AND NOT':
120 $booleanSearchString .= ' -' . $searchWord . $wildcard;
121 $searchBoolean = TRUE;
122 break;
123 case 'OR':
124 $booleanSearchString .= ' ' . $searchWord . $wildcard;
125 $searchBoolean = TRUE;
126 break;
127 default:
128 $booleanSearchString .= ' +' . $searchWord . $wildcard;
129 $naturalSearchString .= ' ' . $searchWord;
130 }
131 $count++;
132 }
133 if ($searchType == self::SENTENCE) {
134 $searchString = '"' . trim($naturalSearchString) . '"';
135 } elseif ($searchBoolean) {
136 $searchString = trim($booleanSearchString);
137 } else {
138 $searchString = trim($naturalSearchString);
139 }
140 return array(
141 'searchBoolean' => $searchBoolean,
142 'searchString' => $searchString,
143 'fulltextIndex' => $fulltextIndex
144 );
145 }
146
147 /**
148 * Execute final query, based on phash integer list. The main point is sorting the result in the right order.
149 *
150 * @param array $searchData Array with search string, boolean indicator, and fulltext index reference
151 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
152 * @return boolean|\mysqli_result|object MySQLi result object / DBAL object
153 */
154 protected function execFinalQuery_fulltext($searchData, $freeIndexUid = -1) {
155 // Setting up methods of filtering results based on page types, access, etc.
156 $pageJoin = '';
157 // Indexing configuration clause:
158 $freeIndexUidClause = $this->pObj->freeIndexUidWhere($freeIndexUid);
159 // Calling hook for alternative creation of page ID list
160 if ($hookObj = &$this->pObj->hookRequest('execFinalQuery_idList')) {
161 $pageWhere = $hookObj->execFinalQuery_idList('');
162 } elseif ($this->pObj->join_pages) {
163 // Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected.
164 $pageJoin = ',
165 pages';
166 $pageWhere = 'pages.uid = ISEC.page_id
167 ' . $this->pObj->cObj->enableFields('pages') . '
168 AND pages.no_search=0
169 AND pages.doktype<200
170 ';
171 } elseif ($this->pObj->wholeSiteIdList >= 0) {
172 // Collecting all pages IDs in which to search; filtering out ALL pages that are not accessible due to enableFields. Does NOT look for "no_search" field!
173 $siteIdNumbers = \TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $this->pObj->wholeSiteIdList);
174 $idList = array();
175 foreach ($siteIdNumbers as $rootId) {
176 $cObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\ContentObject\\ContentObjectRenderer');
177 /** @var \TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer $cObj */
178 $idList[] = $cObj->getTreeList($rootId, 9999, 0, 0, '', '') . $rootId;
179 }
180 $pageWhere = ' ISEC.page_id IN (' . implode(',', $idList) . ')';
181 } else {
182 // Disable everything... (select all)
183 $pageWhere = ' 1=1';
184 }
185 $searchBoolean = '';
186 if ($searchData['searchBoolean']) {
187 $searchBoolean = ' IN BOOLEAN MODE';
188 }
189 $resource = $GLOBALS['TYPO3_DB']->exec_SELECTquery('index_fulltext.*, ISEC.*, IP.*', 'index_fulltext, index_section ISEC, index_phash IP' . $pageJoin, 'MATCH (' . $searchData['fulltextIndex'] . ') AGAINST (' . $GLOBALS['TYPO3_DB']->fullQuoteStr($searchData['searchString'], 'index_fulltext') . $searchBoolean . ') ' . $this->pObj->mediaTypeWhere() . ' ' . $this->pObj->languageWhere() . $freeIndexUidClause . '
190 AND index_fulltext.phash = IP.phash
191 AND ISEC.phash = IP.phash
192 AND ' . $pageWhere, 'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId');
193 return $resource;
194 }
195
196 }
197
198
199 ?>