664fd0ceb985e0ffaa7e25d2f324936c90ad795a
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Hook / MysqlFulltextIndexHook.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Hook;
3
4 /***************************************************************
5 * Copyright notice
6 *
7 * (c) 2011 Michael Stucki (michael@typo3.org)
8 * All rights reserved
9 *
10 * This script is part of the TYPO3 project. The TYPO3 project is
11 * free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * The GNU General Public License can be found at
17 * http://www.gnu.org/copyleft/gpl.html.
18 * A copy is found in the textfile GPL.txt and important notices to the license
19 * from the author is found in LICENSE.txt distributed with these scripts.
20 *
21 *
22 * This script is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * This copyright notice MUST APPEAR in all copies of the script!
28 ***************************************************************/
29 /**
30 * Class that hooks into Indexed Search and replaces standard SQL queries with MySQL fulltext index queries.
31 *
32 * @author Michael Stucki <michael@typo3.org>
33 * @package TYPO3
34 * @subpackage tx_indexedsearch_mysql
35 */
36 class MysqlFulltextIndexHook {
37
38 /**
39 * @var \TYPO3\CMS\IndexedSearch\Controller\SearchFormController
40 */
41 public $pObj;
42
43 const ANY_PART_OF_THE_WORD = '1';
44 const LAST_PART_OF_THE_WORD = '2';
45 const FIRST_PART_OF_THE_WORD = '3';
46 const SOUNDS_LIKE = '10';
47 const SENTENCE = '20';
48 /**
49 * Gets a SQL result pointer to traverse for the search records.
50 *
51 * @param array $searchWordsArray Search words
52 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
53 * @return resource|false
54 */
55 public function getResultRows_SQLpointer($searchWordsArray, $freeIndexUid = -1) {
56 // Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not
57 $searchData = $this->getSearchString($searchWordsArray);
58 // Perform SQL Search / collection of result rows array:
59 $resource = FALSE;
60 if ($searchData) {
61 // Do the search:
62 $GLOBALS['TT']->push('execFinalQuery');
63 $resource = $this->execFinalQuery_fulltext($searchData, $freeIndexUid);
64 $GLOBALS['TT']->pull();
65 }
66 return $resource;
67 }
68
69 /**
70 * Returns a search string for use with MySQL FULLTEXT query
71 *
72 * @param array $searchWordArray Search word array
73 * @return string Search string
74 */
75 public function getSearchString($searchWordArray) {
76 // Initialize variables:
77 $count = 0;
78 $searchBoolean = FALSE;
79 // Change this to TRUE to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty)
80 $fulltextIndex = 'index_fulltext.fulltextdata';
81 $naturalSearchString = '';
82 // This holds the result if the search is natural (doesn't contain any boolean operators)
83 $booleanSearchString = '';
84 // This holds the result if the search is boolen (contains +/-/| operators)
85 $searchType = (string) $this->pObj->piVars['type'];
86 // Traverse searchwords and prefix them with corresponding operator
87 foreach ($searchWordArray as $searchWordData) {
88 // Making the query for a single search word based on the search-type
89 $searchWord = $searchWordData['sword'];
90 $wildcard = '';
91 if (strstr($searchWord, ' ')) {
92 $searchType = self::SENTENCE;
93 }
94 switch ($searchType) {
95 case self::ANY_PART_OF_THE_WORD:
96
97 case self::LAST_PART_OF_THE_WORD:
98
99 case self::FIRST_PART_OF_THE_WORD:
100 // First part of word
101 $wildcard = '*';
102 // Part-of-word search requires boolean mode!
103 $searchBoolean = TRUE;
104 break;
105 case self::SOUNDS_LIKE:
106 $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_indexedsearch_indexer');
107 // Initialize the indexer-class
108 /** @var \TYPO3\CMS\IndexedSearch\Indexer $indexerObj */
109 $searchWord = $indexerObj->metaphone($searchWord, $indexerObj->storeMetaphoneInfoAsWords);
110 unset($indexerObj);
111 $fulltextIndex = 'index_fulltext.metaphonedata';
112 break;
113 case self::SENTENCE:
114 $searchBoolean = TRUE;
115 // Remove existing quotes and fix misplaced quotes.
116 $searchWord = trim(str_replace('"', ' ', $searchWord));
117 break;
118 }
119 // Perform search for word:
120 switch ($searchWordData['oper']) {
121 case 'AND NOT':
122 $booleanSearchString .= ' -' . $searchWord . $wildcard;
123 $searchBoolean = TRUE;
124 break;
125 case 'OR':
126 $booleanSearchString .= ' ' . $searchWord . $wildcard;
127 $searchBoolean = TRUE;
128 break;
129 default:
130 $booleanSearchString .= ' +' . $searchWord . $wildcard;
131 $naturalSearchString .= ' ' . $searchWord;
132 }
133 $count++;
134 }
135 if ($searchType == self::SENTENCE) {
136 $searchString = '"' . trim($naturalSearchString) . '"';
137 } elseif ($searchBoolean) {
138 $searchString = trim($booleanSearchString);
139 } else {
140 $searchString = trim($naturalSearchString);
141 }
142 return array(
143 'searchBoolean' => $searchBoolean,
144 'searchString' => $searchString,
145 'fulltextIndex' => $fulltextIndex
146 );
147 }
148
149 /**
150 * Execute final query, based on phash integer list. The main point is sorting the result in the right order.
151 *
152 * @param array $searchData Array with search string, boolean indicator, and fulltext index reference
153 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
154 * @return resource Query result
155 */
156 protected function execFinalQuery_fulltext($searchData, $freeIndexUid = -1) {
157 // Setting up methods of filtering results based on page types, access, etc.
158 $pageJoin = '';
159 // Indexing configuration clause:
160 $freeIndexUidClause = $this->pObj->freeIndexUidWhere($freeIndexUid);
161 // Calling hook for alternative creation of page ID list
162 if ($hookObj =& $this->pObj->hookRequest('execFinalQuery_idList')) {
163 $pageWhere = $hookObj->execFinalQuery_idList('');
164 } elseif ($this->pObj->join_pages) {
165 // Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected.
166 $pageJoin = ',
167 pages';
168 $pageWhere = 'pages.uid = ISEC.page_id
169 ' . $this->pObj->cObj->enableFields('pages') . '
170 AND pages.no_search=0
171 AND pages.doktype<200
172 ';
173 } elseif ($this->pObj->wholeSiteIdList >= 0) {
174 // Collecting all pages IDs in which to search; filtering out ALL pages that are not accessible due to enableFields. Does NOT look for "no_search" field!
175 $siteIdNumbers = \TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $this->pObj->wholeSiteIdList);
176 $idList = array();
177 foreach ($siteIdNumbers as $rootId) {
178 $cObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\ContentObject\\ContentObjectRenderer');
179 /** @var \TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer $cObj */
180 $idList[] = $cObj->getTreeList($rootId, 9999, 0, 0, '', '') . $rootId;
181 }
182 $pageWhere = ' ISEC.page_id IN (' . implode(',', $idList) . ')';
183 } else {
184 // Disable everything... (select all)
185 $pageWhere = ' 1=1';
186 }
187 $searchBoolean = '';
188 if ($searchData['searchBoolean']) {
189 $searchBoolean = ' IN BOOLEAN MODE';
190 }
191 $resource = $GLOBALS['TYPO3_DB']->exec_SELECTquery('index_fulltext.*, ISEC.*, IP.*', 'index_fulltext, index_section ISEC, index_phash IP' . $pageJoin, 'MATCH (' . $searchData['fulltextIndex'] . ') AGAINST (' . $GLOBALS['TYPO3_DB']->fullQuoteStr($searchData['searchString'], 'index_fulltext') . $searchBoolean . ') ' . $this->pObj->mediaTypeWhere() . ' ' . $this->pObj->languageWhere() . $freeIndexUidClause . '
192 AND index_fulltext.phash = IP.phash
193 AND ISEC.phash = IP.phash
194 AND ' . $pageWhere, 'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId');
195 return $resource;
196 }
197
198 }
199
200
201 ?>