[TASK] Remove superfluous parenthesis in sysexts
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Hook / CrawlerHook.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Hook;
3
4 /**
5 * Crawler hook for indexed search. Works with the "crawler" extension
6 *
7 * @author Kasper Skårhøj <kasperYYYY@typo3.com>
8 * @package TYPO3
9 * @subpackage tx_indexedsearch
10 */
11 class CrawlerHook {
12
13 // Static:
14 /**
15 * @todo Define visibility
16 */
17 public $secondsPerExternalUrl = 3;
18
19 // Number of seconds to use as interval between queued indexing operations of URLs / files (types 2 & 3)
20 // Internal, dynamic:
21 /**
22 * @todo Define visibility
23 */
24 public $instanceCounter = 0;
25
26 // Counts up for each added URL (type 3)
27 // Internal, static:
28 /**
29 * @todo Define visibility
30 */
31 public $callBack = 'EXT:indexed_search/class.crawler.php:&TYPO3\\CMS\\IndexedSearch\\Controller\\SearchFormController_crawler';
32
33 // The object reference to this class.
34 /**
35 * Initialization of crawler hook.
36 * This function is asked for each instance of the crawler and we must check if something is timed to happen and if so put entry(s) in the crawlers log to start processing.
37 * In reality we select indexing configurations and evaluate if any of them needs to run.
38 *
39 * @param object Parent object (tx_crawler lib)
40 * @return void
41 * @todo Define visibility
42 */
43 public function crawler_init(&$pObj) {
44 // Select all indexing configuration which are waiting to be activated:
45 $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', 'index_config', 'hidden=0
46 AND (starttime=0 OR starttime<=' . $GLOBALS['EXEC_TIME'] . ')
47 AND timer_next_indexing<' . $GLOBALS['EXEC_TIME'] . '
48 AND set_id=0
49 ' . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('index_config'));
50 // For each configuration, check if it should be executed and if so, start:
51 foreach ($indexingConfigurations as $cfgRec) {
52 // Generate a unique set-ID:
53 $setId = \TYPO3\CMS\Core\Utility\GeneralUtility::md5int(microtime());
54 // Get next time:
55 $nextTime = $this->generateNextIndexingTime($cfgRec);
56 // Start process by updating index-config record:
57 $field_array = array(
58 'set_id' => $setId,
59 'timer_next_indexing' => $nextTime,
60 'session_data' => ''
61 );
62 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . intval($cfgRec['uid']), $field_array);
63 // Based on configuration type:
64 switch ($cfgRec['type']) {
65 case 1:
66 // RECORDS:
67 // Parameters:
68 $params = array(
69 'indexConfigUid' => $cfgRec['uid'],
70 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
71 'url' => 'Records (start)'
72 );
73 //
74 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
75 break;
76 case 2:
77 // FILES:
78 // Parameters:
79 $params = array(
80 'indexConfigUid' => $cfgRec['uid'],
81 // General
82 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
83 // General
84 'url' => $cfgRec['filepath'],
85 // Partly general... (for URL and file types)
86 'depth' => 0
87 );
88 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
89 break;
90 case 3:
91 // External URL:
92 // Parameters:
93 $params = array(
94 'indexConfigUid' => $cfgRec['uid'],
95 // General
96 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
97 // General
98 'url' => $cfgRec['externalUrl'],
99 // Partly general... (for URL and file types)
100 'depth' => 0
101 );
102 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
103 break;
104 case 4:
105 // Page tree
106 // Parameters:
107 $params = array(
108 'indexConfigUid' => $cfgRec['uid'],
109 // General
110 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
111 // General
112 'url' => intval($cfgRec['alternative_source_pid']),
113 // Partly general... (for URL and file types and page tree (root))
114 'depth' => 0
115 );
116 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
117 break;
118 case 5:
119 // Meta configuration, nothing to do:
120 // NOOP
121 break;
122 default:
123 if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
124 $hookObj = \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
125 if (is_object($hookObj)) {
126 // Parameters:
127 $params = array(
128 'indexConfigUid' => $cfgRec['uid'],
129 // General
130 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . '/CUSTOM]'),
131 // General
132 'url' => $hookObj->initMessage($message)
133 );
134 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
135 }
136 }
137 break;
138 }
139 }
140 // Finally, look up all old index configurations which are finished and needs to be reset and done.
141 $this->cleanUpOldRunningConfigurations();
142 }
143
144 /**
145 * Call back function for execution of a log element
146 *
147 * @param array Params from log element. Must contain $params['indexConfigUid']
148 * @param object Parent object (tx_crawler lib)
149 * @return array Result array
150 * @todo Define visibility
151 */
152 public function crawler_execute($params, &$pObj) {
153 // Indexer configuration ID must exist:
154 if ($params['indexConfigUid']) {
155 // Load the indexing configuration record:
156 $cfgRec = $GLOBALS['TYPO3_DB']->exec_SELECTgetSingleRow('*', 'index_config', 'uid=' . intval($params['indexConfigUid']));
157 if (is_array($cfgRec)) {
158 // Unpack session data:
159 $session_data = unserialize($cfgRec['session_data']);
160 // Select which type:
161 switch ($cfgRec['type']) {
162 case 1:
163 // Records:
164 $this->crawler_execute_type1($cfgRec, $session_data, $params, $pObj);
165 break;
166 case 2:
167 // Files
168 $this->crawler_execute_type2($cfgRec, $session_data, $params, $pObj);
169 break;
170 case 3:
171 // External URL:
172 $this->crawler_execute_type3($cfgRec, $session_data, $params, $pObj);
173 break;
174 case 4:
175 // Page tree:
176 $this->crawler_execute_type4($cfgRec, $session_data, $params, $pObj);
177 break;
178 case 5:
179 // Meta
180 // NOOP (should never enter here!)
181 break;
182 default:
183 if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
184 $hookObj = \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
185 if (is_object($hookObj)) {
186 $this->pObj = $pObj;
187 // For addQueueEntryForHook()
188 $hookObj->indexOperation($cfgRec, $session_data, $params, $this);
189 }
190 }
191 break;
192 }
193 // Save process data which might be modified:
194 $field_array = array(
195 'session_data' => serialize($session_data)
196 );
197 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . intval($cfgRec['uid']), $field_array);
198 }
199 }
200 return array('log' => $params);
201 }
202
203 /**
204 * Indexing records from a table
205 *
206 * @param array Indexing Configuration Record
207 * @param array Session data for the indexing session spread over multiple instances of the script. Passed by reference so changes hereto will be saved for the next call!
208 * @param array Parameters from the log queue.
209 * @param object Parent object (from "crawler" extension!)
210 * @return void
211 * @todo Define visibility
212 */
213 public function crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj) {
214 if ($cfgRec['table2index'] && isset($GLOBALS['TCA'][$cfgRec['table2index']])) {
215 // Init session data array if not already:
216 if (!is_array($session_data)) {
217 $session_data = array(
218 'uid' => 0
219 );
220 }
221 // Init:
222 $pid = intval($cfgRec['alternative_source_pid']) ? intval($cfgRec['alternative_source_pid']) : $cfgRec['pid'];
223 $numberOfRecords = $cfgRec['recordsbatch'] ? \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['recordsbatch'], 1) : 100;
224 // Get root line:
225 $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
226 // Select
227 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', $cfgRec['table2index'], 'pid = ' . intval($pid) . '
228 AND uid > ' . intval($session_data['uid']) . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($cfgRec['table2index']) . \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields($cfgRec['table2index']), '', 'uid', $numberOfRecords);
229 // Traverse:
230 if (count($recs)) {
231 foreach ($recs as $r) {
232 // Index single record:
233 $this->indexSingleRecord($r, $cfgRec, $rl);
234 // Update the UID we last processed:
235 $session_data['uid'] = $r['uid'];
236 }
237 // Finally, set entry for next indexing of batch of records:
238 $nparams = array(
239 'indexConfigUid' => $cfgRec['uid'],
240 'url' => 'Records from UID#' . ($r['uid'] + 1) . '-?',
241 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']')
242 );
243 $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
244 }
245 }
246 }
247
248 /**
249 * Indexing files from fileadmin
250 *
251 * @param array Indexing Configuration Record
252 * @param array Session data for the indexing session spread over multiple instances of the script. Passed by reference so changes hereto will be saved for the next call!
253 * @param array Parameters from the log queue.
254 * @param object Parent object (from "crawler" extension!)
255 * @return void
256 * @todo Define visibility
257 */
258 public function crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj) {
259 // Prepare path, making it absolute and checking:
260 $readpath = $params['url'];
261 if (!\TYPO3\CMS\Core\Utility\GeneralUtility::isAbsPath($readpath)) {
262 $readpath = \TYPO3\CMS\Core\Utility\GeneralUtility::getFileAbsFileName($readpath);
263 }
264 if (\TYPO3\CMS\Core\Utility\GeneralUtility::isAllowedAbsPath($readpath)) {
265 if (@is_file($readpath)) {
266 // If file, index it!
267 // Get root line (need to provide this when indexing external files)
268 $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
269 // Load indexer if not yet.
270 $this->loadIndexerClass();
271 // (Re)-Indexing file on page.
272 $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_indexedsearch_indexer');
273 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, 0, '', $rl);
274 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
275 $indexerObj->hash['phash'] = -1;
276 // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
277 // Index document:
278 $indexerObj->indexRegularDocument(substr($readpath, strlen(PATH_site)), TRUE);
279 } elseif (@is_dir($readpath)) {
280 // If dir, read content and create new pending items for log:
281 // Select files and directories in path:
282 $extList = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $cfgRec['extensions'], 1));
283 $fileArr = array();
284 $files = \TYPO3\CMS\Core\Utility\GeneralUtility::getAllFilesAndFoldersInPath($fileArr, $readpath, $extList, 0, 0);
285 $directoryList = \TYPO3\CMS\Core\Utility\GeneralUtility::get_dirs($readpath);
286 if (is_array($directoryList) && $params['depth'] < $cfgRec['depth']) {
287 foreach ($directoryList as $subdir) {
288 if ((string) $subdir != '') {
289 $files[] = $readpath . $subdir . '/';
290 }
291 }
292 }
293 $files = \TYPO3\CMS\Core\Utility\GeneralUtility::removePrefixPathFromList($files, PATH_site);
294 // traverse the items and create log entries:
295 foreach ($files as $path) {
296 $this->instanceCounter++;
297 if ($path !== $params['url']) {
298 // Parameters:
299 $nparams = array(
300 'indexConfigUid' => $cfgRec['uid'],
301 'url' => $path,
302 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
303 'depth' => $params['depth'] + 1
304 );
305 $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
306 }
307 }
308 }
309 }
310 }
311
312 /**
313 * Indexing External URLs
314 *
315 * @param array Indexing Configuration Record
316 * @param array Session data for the indexing session spread over multiple instances of the script. Passed by reference so changes hereto will be saved for the next call!
317 * @param array Parameters from the log queue.
318 * @param object Parent object (from "crawler" extension!)
319 * @return void
320 * @todo Define visibility
321 */
322 public function crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj) {
323 // Init session data array if not already:
324 if (!is_array($session_data)) {
325 $session_data = array(
326 'urlLog' => array($params['url'])
327 );
328 }
329 // Index the URL:
330 $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
331 $subUrls = $this->indexExtUrl($params['url'], $cfgRec['pid'], $rl, $cfgRec['uid'], $cfgRec['set_id']);
332 // Add more elements to log now:
333 if ($params['depth'] < $cfgRec['depth']) {
334 foreach ($subUrls as $url) {
335 if ($url = $this->checkUrl($url, $session_data['urlLog'], $cfgRec['externalUrl'])) {
336 if (!$this->checkDeniedSuburls($url, $cfgRec['url_deny'])) {
337 $this->instanceCounter++;
338 $session_data['urlLog'][] = $url;
339 // Parameters:
340 $nparams = array(
341 'indexConfigUid' => $cfgRec['uid'],
342 'url' => $url,
343 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
344 'depth' => $params['depth'] + 1
345 );
346 $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
347 }
348 }
349 }
350 }
351 }
352
353 /**
354 * Page tree indexing type
355 *
356 * @param array Indexing Configuration Record
357 * @param array Session data for the indexing session spread over multiple instances of the script. Passed by reference so changes hereto will be saved for the next call!
358 * @param array Parameters from the log queue.
359 * @param object Parent object (from "crawler" extension!)
360 * @return void
361 * @todo Define visibility
362 */
363 public function crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj) {
364 // Base page uid:
365 $pageUid = intval($params['url']);
366 // Get array of URLs from page:
367 $pageRow = \TYPO3\CMS\Backend\Utility\BackendUtility::getRecord('pages', $pageUid);
368 $res = $pObj->getUrlsForPageRow($pageRow);
369 $duplicateTrack = array();
370 // Registry for duplicates
371 $downloadUrls = array();
372 // Dummy.
373 // Submit URLs:
374 if (count($res)) {
375 foreach ($res as $paramSetKey => $vv) {
376 $urlList = $pObj->urlListFromUrlArray($vv, $pageRow, $GLOBALS['EXEC_TIME'], 30, 1, 0, $duplicateTrack, $downloadUrls, array('tx_indexedsearch_reindex'));
377 }
378 }
379 // Add subpages to log now:
380 if ($params['depth'] < $cfgRec['depth']) {
381 // Subpages selected
382 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid,title', 'pages', 'pid = ' . intval($pageUid) . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('pages'));
383 // Traverse subpages and add to queue:
384 if (count($recs)) {
385 foreach ($recs as $r) {
386 $this->instanceCounter++;
387 $url = 'pages:' . $r['uid'] . ': ' . $r['title'];
388 $session_data['urlLog'][] = $url;
389 // Parameters:
390 $nparams = array(
391 'indexConfigUid' => $cfgRec['uid'],
392 'url' => $r['uid'],
393 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
394 'depth' => $params['depth'] + 1
395 );
396 $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
397 }
398 }
399 }
400 }
401
402 /**
403 * Look up all old index configurations which are finished and needs to be reset and done
404 *
405 * @return void
406 * @todo Define visibility
407 */
408 public function cleanUpOldRunningConfigurations() {
409 // Lookup running index configurations:
410 $runningIndexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid,set_id', 'index_config', 'set_id<>0' . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('index_config'));
411 // For each running configuration, look up how many log entries there are which are scheduled for execution and if none, clear the "set_id" (means; Processing was DONE)
412 foreach ($runningIndexingConfigurations as $cfgRec) {
413 // Look for ended processes:
414 $queued_items = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('*', 'tx_crawler_queue', 'set_id=' . intval($cfgRec['set_id']) . ' AND exec_time=0');
415 if (!$queued_items) {
416 // Lookup old phash rows:
417 $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash', 'index_phash', 'freeIndexUid=' . intval($cfgRec['uid']) . ' AND freeIndexSetId<>' . $cfgRec['set_id']);
418 foreach ($oldPhashRows as $pHashRow) {
419 // Removing old registrations for all tables (code copied from class.tx_indexedsearch_modfunc1.php)
420 $tableArr = explode(',', 'index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug');
421 foreach ($tableArr as $table) {
422 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash=' . intval($pHashRow['phash']));
423 }
424 }
425 // End process by updating index-config record:
426 $field_array = array(
427 'set_id' => 0,
428 'session_data' => ''
429 );
430 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . intval($cfgRec['uid']), $field_array);
431 }
432 }
433 }
434
435 /*****************************************
436 *
437 * Helper functions
438 *
439 *****************************************/
440 /**
441 * Check if an input URL are allowed to be indexed. Depends on whether it is already present in the url log.
442 *
443 * @param string URL string to check
444 * @param array Array of already indexed URLs (input url is looked up here and must not exist already)
445 * @param string Base URL of the indexing process (input URL must be "inside" the base URL!)
446 * @return string Returls the URL if OK, otherwise FALSE
447 * @todo Define visibility
448 */
449 public function checkUrl($url, $urlLog, $baseUrl) {
450 $url = preg_replace('/\\/\\/$/', '/', $url);
451 list($url) = explode('#', $url);
452 if (!strstr($url, '../')) {
453 if (\TYPO3\CMS\Core\Utility\GeneralUtility::isFirstPartOfStr($url, $baseUrl)) {
454 if (!in_array($url, $urlLog)) {
455 return $url;
456 }
457 }
458 }
459 }
460
461 /**
462 * Indexing External URL
463 *
464 * @param string URL, http://....
465 * @param integer Page id to relate indexing to.
466 * @param array Rootline array to relate indexing to
467 * @param integer Configuration UID
468 * @param integer Set ID value
469 * @return array URLs found on this page
470 * @todo Define visibility
471 */
472 public function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId) {
473 // Load indexer if not yet.
474 $this->loadIndexerClass();
475 // Index external URL:
476 $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_indexedsearch_indexer');
477 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
478 $indexerObj->backend_setFreeIndexUid($cfgUid, $setId);
479 $indexerObj->hash['phash'] = -1;
480 // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
481 $indexerObj->indexExternalUrl($url);
482 $url_qParts = parse_url($url);
483 $baseAbsoluteHref = $url_qParts['scheme'] . '://' . $url_qParts['host'];
484 $baseHref = $indexerObj->extractBaseHref($indexerObj->indexExternalUrl_content);
485 if (!$baseHref) {
486 // Extract base href from current URL
487 $baseHref = $baseAbsoluteHref;
488 $baseHref .= substr($url_qParts['path'], 0, strrpos($url_qParts['path'], '/'));
489 }
490 $baseHref = rtrim($baseHref, '/');
491 // Get URLs on this page:
492 $subUrls = array();
493 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
494 // Traverse links:
495 foreach ($list as $count => $linkInfo) {
496 // Decode entities:
497 $subUrl = \TYPO3\CMS\Core\Utility\GeneralUtility::htmlspecialchars_decode($linkInfo['href']);
498 $qParts = parse_url($subUrl);
499 if (!$qParts['scheme']) {
500 $relativeUrl = \TYPO3\CMS\Core\Utility\GeneralUtility::resolveBackPath($subUrl);
501 if ($relativeUrl[0] === '/') {
502 $subUrl = $baseAbsoluteHref . $relativeUrl;
503 } else {
504 $subUrl = $baseHref . '/' . $relativeUrl;
505 }
506 }
507 $subUrls[] = $subUrl;
508 }
509 return $subUrls;
510 }
511
512 /**
513 * Indexing Single Record
514 *
515 * @param array Record to index
516 * @param array Configuration Record
517 * @param array Rootline array to relate indexing to
518 * @return void
519 * @todo Define visibility
520 */
521 public function indexSingleRecord($r, $cfgRec, $rl = NULL) {
522 // Load indexer if not yet.
523 $this->loadIndexerClass();
524 // Init:
525 $rl = is_array($rl) ? $rl : $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
526 $fieldList = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $cfgRec['fieldlist'], 1);
527 $languageField = $GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['languageField'];
528 $sys_language_uid = $languageField ? $r[$languageField] : 0;
529 // (Re)-Indexing a row from a table:
530 $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_indexedsearch_indexer');
531 parse_str(str_replace('###UID###', $r['uid'], $cfgRec['get_params']), $GETparams);
532 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, $cfgRec['chashcalc'] ? TRUE : FALSE);
533 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
534 $indexerObj->forceIndexing = TRUE;
535 $theContent = '';
536 foreach ($fieldList as $k => $v) {
537 if (!$k) {
538 $theTitle = $r[$v];
539 } else {
540 $theContent .= $r[$v] . ' ';
541 }
542 }
543 // Indexing the record as a page (but with parameters set, see ->backend_setFreeIndexUid())
544 $indexerObj->backend_indexAsTYPO3Page(strip_tags(str_replace('<', ' <', $theTitle)), '', '', strip_tags(str_replace('<', ' <', $theContent)), $GLOBALS['LANG']->charSet, $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['tstamp']], $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['crdate']], $r['uid']);
545 }
546
547 /**
548 * Include indexer class.
549 *
550 * @return void
551 * @todo Define visibility
552 */
553 public function loadIndexerClass() {
554 global $TYPO3_CONF_VARS;
555 require_once \TYPO3\CMS\Core\Extension\ExtensionManager::extPath('indexed_search') . 'class.indexer.php';
556 }
557
558 /**
559 * Get rootline for closest TypoScript template root.
560 * Algorithm same as used in Web > Template, Object browser
561 *
562 * @param integer The page id to traverse rootline back from
563 * @return array Array where the root lines uid values are found.
564 * @todo Define visibility
565 */
566 public function getUidRootLineForClosestTemplate($id) {
567 global $TYPO3_CONF_VARS;
568 $tmpl = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Core\\TypoScript\\ExtendedTemplateService');
569 $tmpl->tt_track = 0;
570 // Do not log time-performance information
571 $tmpl->init();
572 // Gets the rootLine
573 $sys_page = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Page\\PageRepository');
574 $rootLine = $sys_page->getRootLine($id);
575 // This generates the constants/config + hierarchy info for the template.
576 $tmpl->runThroughTemplates($rootLine, 0);
577 // Root line uids
578 $rootline_uids = array();
579 foreach ($tmpl->rootLine as $rlkey => $rldat) {
580 $rootline_uids[$rlkey] = $rldat['uid'];
581 }
582 return $rootline_uids;
583 }
584
585 /**
586 * Generate the unix time stamp for next visit.
587 *
588 * @param array Index configuration record
589 * @return integer The next time stamp
590 * @todo Define visibility
591 */
592 public function generateNextIndexingTime($cfgRec) {
593 $currentTime = $GLOBALS['EXEC_TIME'];
594 // Now, find a midnight time to use for offset calculation. This has to differ depending on whether we have frequencies within a day or more than a day; Less than a day, we don't care which day to use for offset, more than a day we want to respect the currently entered day as offset regardless of when the script is run - thus the day-of-week used in case "Weekly" is selected will be respected
595 if ($cfgRec['timer_frequency'] <= 24 * 3600) {
596 $aMidNight = mktime(0, 0, 0) - 1 * 24 * 3600;
597 } else {
598 $lastTime = $cfgRec['timer_next_indexing'] ? $cfgRec['timer_next_indexing'] : $GLOBALS['EXEC_TIME'];
599 $aMidNight = mktime(0, 0, 0, date('m', $lastTime), date('d', $lastTime), date('y', $lastTime));
600 }
601 // Find last offset time plus frequency in seconds:
602 $lastSureOffset = $aMidNight + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['timer_offset'], 0, 86400);
603 $frequencySeconds = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['timer_frequency'], 1);
604 // Now, find out how many blocks of the length of frequency there is until the next time:
605 $frequencyBlocksUntilNextTime = ceil(($currentTime - $lastSureOffset) / $frequencySeconds);
606 // Set next time to the offset + the frequencyblocks multiplied with the frequency length in seconds.
607 $nextTime = $lastSureOffset + $frequencyBlocksUntilNextTime * $frequencySeconds;
608 return $nextTime;
609 }
610
611 /**
612 * Checks if $url has any of the URls in the $url_deny "list" in it and if so, returns TRUE.
613 *
614 * @param string URL to test
615 * @param string String where URLs are separated by line-breaks; If any of these strings is the first part of $url, the function returns TRUE (to indicate denial of decend)
616 * @return boolean TRUE if there is a matching URL (hence, do not index!)
617 * @todo Define visibility
618 */
619 public function checkDeniedSuburls($url, $url_deny) {
620 if (trim($url_deny)) {
621 $url_denyArray = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(LF, $url_deny, 1);
622 foreach ($url_denyArray as $testurl) {
623 if (\TYPO3\CMS\Core\Utility\GeneralUtility::isFirstPartOfStr($url, $testurl)) {
624 echo $url . ' /// ' . $url_deny . LF;
625 return TRUE;
626 }
627 }
628 }
629 return FALSE;
630 }
631
632 /**
633 * Adding entry in queue for Hook
634 *
635 * @param array Configuration record
636 * @param string Title/URL
637 * @return void
638 * @todo Define visibility
639 */
640 public function addQueueEntryForHook($cfgRec, $title) {
641 $nparams = array(
642 'indexConfigUid' => $cfgRec['uid'],
643 // This must ALWAYS be the cfgRec uid!
644 'url' => $title,
645 'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']')
646 );
647 $this->pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
648 }
649
650 /**
651 * Deletes all data stored by indexed search for a given page
652 *
653 * @param integer Uid of the page to delete all pHash
654 * @return void
655 * @todo Define visibility
656 */
657 public function deleteFromIndex($id) {
658 // Lookup old phash rows:
659 $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash', 'index_section', 'page_id=' . intval($id));
660 if (count($oldPhashRows)) {
661 $pHashesToDelete = array();
662 foreach ($oldPhashRows as $pHashRow) {
663 $pHashesToDelete[] = $pHashRow['phash'];
664 }
665 $where_clause = 'phash IN (' . implode(',', $GLOBALS['TYPO3_DB']->cleanIntArray($pHashesToDelete)) . ')';
666 $tables = explode(',', 'index_debug,index_fulltext,index_grlist,index_phash,index_rel,index_section');
667 foreach ($tables as $table) {
668 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, $where_clause);
669 }
670 }
671 }
672
673 /*************************
674 *
675 * Hook functions for TCEmain (indexing of records)
676 *
677 *************************/
678 /**
679 * TCEmain hook function for on-the-fly indexing of database records
680 *
681 * @param string TCEmain command
682 * @param string Table name
683 * @param string Record ID. If new record its a string pointing to index inside t3lib_tcemain::substNEWwithIDs
684 * @param mixed Target value (ignored)
685 * @param object Reference to tcemain calling object
686 * @return void
687 * @todo Define visibility
688 */
689 public function processCmdmap_preProcess($command, $table, $id, $value, $pObj) {
690 // Clean up the index
691 if ($command == 'delete' && $table == 'pages') {
692 $this->deleteFromIndex($id);
693 }
694 }
695
696 /**
697 * TCEmain hook function for on-the-fly indexing of database records
698 *
699 * @param string Status "new" or "update
700 * @param string Table name
701 * @param string Record ID. If new record its a string pointing to index inside t3lib_tcemain::substNEWwithIDs
702 * @param array Field array of updated fields in the operation
703 * @param object Reference to tcemain calling object
704 * @return void
705 * @todo Define visibility
706 */
707 public function processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj) {
708 // Check if any fields are actually updated:
709 if (count($fieldArray)) {
710 // Translate new ids.
711 if ($status == 'new') {
712 $id = $pObj->substNEWwithIDs[$id];
713 } elseif ($table == 'pages' && $status == 'update' && (array_key_exists('hidden', $fieldArray) && $fieldArray['hidden'] == 1 || array_key_exists('no_search', $fieldArray) && $fieldArray['no_search'] == 1)) {
714 // If the page should be hidden or not indexed after update, delete index for this page
715 $this->deleteFromIndex($id);
716 }
717 // Get full record and if exists, search for indexing configurations:
718 $currentRecord = \TYPO3\CMS\Backend\Utility\BackendUtility::getRecord($table, $id);
719 if (is_array($currentRecord)) {
720 // Select all (not running) indexing configurations of type "record" (1) and which points to this table and is located on the same page as the record or pointing to the right source PID
721 $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', 'index_config', 'hidden=0
722 AND (starttime=0 OR starttime<=' . $GLOBALS['EXEC_TIME'] . ')
723 AND set_id=0
724 AND type=1
725 AND table2index=' . $GLOBALS['TYPO3_DB']->fullQuoteStr($table, 'index_config') . '
726 AND (
727 (alternative_source_pid=0 AND pid=' . intval($currentRecord['pid']) . ')
728 OR (alternative_source_pid=' . intval($currentRecord['pid']) . ')
729 )
730 AND records_indexonchange=1
731 ' . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('index_config'));
732 foreach ($indexingConfigurations as $cfgRec) {
733 $this->indexSingleRecord($currentRecord, $cfgRec);
734 }
735 }
736 }
737 }
738
739 }
740
741
742 ?>