Fixed a few small bugs in indexed-search
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / class.indexer.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 2001-2004 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * This class is a search indexer for TYPO3
29 *
30 * @author Kasper Skårhøj <kasperYYYY@typo3.com>
31 * Originally Christian Jul Jensen <christian@jul.net> helped as well.
32 */
33 /**
34 * [CLASS/FUNCTION INDEX of SCRIPT]
35 *
36 *
37 *
38 * 135: class tx_indexedsearch_indexer
39 * 198: function hook_indexContent(&$pObj)
40 *
41 * SECTION: Backend API
42 * 283: function backend_initIndexer($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array=array(), $createCHash=FALSE)
43 * 320: function backend_setFreeIndexUid($freeIndexUid)
44 * 337: function backend_indexAsTYPO3Page($title, $keywords, $description, $content, $charset, $mtime, $crdate=0, $recordUid=0)
45 *
46 * SECTION: Initialization
47 * 388: function init()
48 * 439: function initializeExternalParsers()
49 *
50 * SECTION: Indexing; TYPO3 pages (HTML content)
51 * 480: function indexTypo3PageContent()
52 * 564: function splitHTMLContent($content)
53 * 610: function getHTMLcharset($content)
54 * 625: function convertHTMLToUtf8($content,$charset='')
55 * 653: function embracingTags($string,$tagName,&$tagContent,&$stringAfter,&$paramList)
56 * 680: function typoSearchTags(&$body)
57 * 709: function extractLinks($content)
58 * 752: function extractHyperLinks($string)
59 *
60 * SECTION: Indexing; external URL
61 * 804: function indexExternalUrl($externalUrl)
62 * 835: function getUrlHeaders($url, $timeout = 2)
63 *
64 * SECTION: Indexing; external files (PDF, DOC, etc)
65 * 895: function indexRegularDocument($file, $force=FALSE, $contentTmpFile='', $altExtension='')
66 * 1001: function readFileContent($ext,$absFile,$cPKey)
67 * 1018: function fileContentParts($ext,$absFile)
68 * 1036: function splitRegularContent($content)
69 *
70 * SECTION: Analysing content, Extracting words
71 * 1069: function charsetEntity2utf8(&$contentArr, $charset)
72 * 1091: function procesWordsInArrays($contentArr)
73 * 1114: function bodyDescription($contentArr)
74 * 1135: function indexAnalyze($content)
75 * 1156: function analyzeHeaderinfo(&$retArr,$content,$key,$offset)
76 * 1175: function analyzeBody(&$retArr,$content)
77 * 1195: function metaphone($word,$retRaw=FALSE)
78 *
79 * SECTION: SQL; TYPO3 Pages
80 * 1237: function submitPage()
81 * 1306: function submit_grlist($hash,$phash_x)
82 * 1326: function submit_section($hash,$hash_t3)
83 * 1344: function removeOldIndexedPages($phash)
84 *
85 * SECTION: SQL; External media
86 * 1387: function submitFilePage($hash,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts)
87 * 1449: function submitFile_grlist($hash)
88 * 1463: function submitFile_section($hash)
89 * 1477: function removeOldIndexedFiles($phash)
90 *
91 * SECTION: SQL Helper functions
92 * 1513: function checkMtimeTstamp($mtime,$phash)
93 * 1549: function checkContentHash()
94 * 1566: function checkExternalDocContentHash($hashGr,$content_md5h)
95 * 1580: function is_grlist_set($phash_x)
96 * 1593: function update_grlist($phash,$phash_x)
97 * 1608: function updateTstamp($phash,$mtime=0)
98 * 1624: function updateParsetime($phash,$parsetime)
99 * 1637: function updateRootline()
100 * 1652: function getRootLineFields(&$fieldArr)
101 * 1671: function removeLoginpagesWithContentHash()
102 *
103 * SECTION: SQL; Submitting words
104 * 1706: function checkWordList($wl)
105 * 1743: function submitWords($wl,$phash)
106 * 1767: function freqMap($freq)
107 *
108 * SECTION: Hashing
109 * 1800: function setT3Hashes()
110 * 1826: function setExtHashes($file,$subinfo=array())
111 * 1850: function md5inthash($str)
112 * 1860: function makeCHash($paramArray)
113 *
114 * SECTION: Internal logging functions
115 * 1902: function log_push($msg,$key)
116 * 1911: function log_pull()
117 * 1922: function log_setTSlogMessage($msg, $errorNum=0)
118 *
119 * TOTAL FUNCTIONS: 55
120 * (This index is automatically created/updated by the extension "extdeveval")
121 *
122 */
123
124
125 require_once(PATH_t3lib.'class.t3lib_parsehtml.php');
126
127
128 /**
129 * Indexing class for TYPO3 frontend
130 *
131 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
132 * @package TYPO3
133 * @subpackage tx_indexedsearch
134 */
135 class tx_indexedsearch_indexer {
136
137 // Messages:
138 var $reasons = array(
139 -1 => 'mtime matched the document, so no changes detected and no content updated',
140 -2 => 'The minimum age was not exceeded',
141 1 => "The configured max-age was exceeded for the document and thus it's indexed.",
142 2 => 'The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.',
143 3 => 'The minimum age was exceed, but mtime was not set, so the page was indexed.',
144 4 => 'Page has never been indexed (is not represented in the index_phash table).'
145 );
146
147 // HTML code blocks to exclude from indexing:
148 var $excludeSections = 'script,style';
149
150 // Supported Extensions for external files:
151 var $external_parsers = array(); // External parser objects, keys are file extension names. Values are objects with certain methods.
152
153 // Fe-group list (pages might be indexed separately for each usergroup combination to support search in access limited pages!)
154 var $defaultGrList = '0,-1';
155
156 // Min/Max times:
157 var $tstamp_maxAge = 0; // If set, this tells a number of seconds that is the maximum age of an indexed document. Regardless of mtime the document will be re-indexed if this limit is exceeded.
158 var $tstamp_minAge = 0; // If set, this tells a minimum limit before a document can be indexed again. This is regardless of mtime.
159 var $maxExternalFiles = 0; // Max number of external files to index.
160
161 // INTERNALS:
162 var $defaultContentArray=array(
163 'title' => '',
164 'description' => '',
165 'keywords' => '',
166 'body' => '',
167 );
168 var $wordcount = 0;
169 var $externalFileCounter = 0;
170
171 var $conf = array(); // Configuration set internally (see init functions for required keys and their meaning)
172 var $indexerConfig = array(); // Indexer configuration
173 var $hash = array(); // Hash array, contains phash and phash_grouping
174 var $file_phash_arr = array(); // Hash array for files
175 var $contentParts = array(); // Content of TYPO3 page
176 var $content_md5h = '';
177 var $internal_log = array(); // Internal log
178 var $indexExternalUrl_content = '';
179
180 var $cHashParams = array(); // cHashparams array
181
182 var $freqRange = 65000;
183 var $freqMax = 0.1;
184
185 // Objects:
186 var $csObj; // Charset class object , t3lib_cs
187 var $metaphoneObj; // Metaphone object, if any
188 var $lexerObj; // Lexer object for word splitting
189
190
191
192 /**
193 * Parent Object (TSFE) Initialization
194 *
195 * @param object Parent Object (frontend TSFE object), passed by reference
196 * @return void
197 */
198 function hook_indexContent(&$pObj) {
199
200 // Indexer configuration from Extension Manager interface:
201 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
202
203 // Determine if page should be indexed, and if so, configure and initialize indexer
204 if ($pObj->config['config']['index_enable']) {
205 $this->log_push('Index page','');
206
207 if (!$indexerConfig['disableFrontendIndexing']) {
208 if (!$pObj->page['no_search']) {
209 if (!$pObj->no_cache) {
210
211 // Setting up internal configuration from config array:
212 $this->conf = array();
213
214 // Information about page for which the indexing takes place
215 $this->conf['id'] = $pObj->id; // Page id
216 $this->conf['type'] = $pObj->type; // Page type
217 $this->conf['sys_language_uid'] = $pObj->sys_language_uid; // sys_language UID of the language of the indexing.
218 $this->conf['MP'] = $pObj->MP; // MP variable, if any (Mount Points)
219 $this->conf['gr_list'] = $pObj->gr_list; // Group list
220
221 $this->conf['cHash'] = $pObj->cHash; // cHash string for additional parameters
222 $this->conf['cHash_array'] = $pObj->cHash_array; // Array of the additional parameters
223
224 $this->conf['crdate'] = $pObj->page['crdate']; // The creation date of the TYPO3 page
225 $this->conf['page_cache_reg1'] = $pObj->page_cache_reg1; // reg1 of the caching table. Not known what practical use this has.
226
227 // Root line uids
228 $this->conf['rootline_uids'] = array();
229 foreach($pObj->config['rootLine'] as $rlkey => $rldat) {
230 $this->conf['rootline_uids'][$rlkey] = $rldat['uid'];
231 }
232
233 // Content of page:
234 $this->conf['content'] = $pObj->content; // Content string (HTML of TYPO3 page)
235 $this->conf['indexedDocTitle'] = $pObj->indexedDocTitle; // Alternative title for indexing
236 $this->conf['metaCharset'] = $pObj->metaCharset; // Character set of content (will be converted to utf-8 during indexing)
237 $this->conf['mtime'] = $pObj->register['SYS_LASTCHANGED']; // Most recent modification time (seconds) of the content on the page. Used to evaluate whether it should be re-indexed.
238
239 // Configuration of behavior:
240 $this->conf['index_externals'] = $pObj->config['config']['index_externals']; // Whether to index external documents like PDF, DOC etc. (if possible)
241 $this->conf['index_descrLgd'] = $pObj->config['config']['index_descrLgd']; // Length of description text (max 250, default 200)
242
243 // Set to zero:
244 $this->conf['recordUid'] = 0;
245 $this->conf['freeIndexUid'] = 0;
246
247 // Init and start indexing:
248 $this->init();
249 $this->indexTypo3PageContent();
250
251 } else $this->log_setTSlogMessage('Index page? No, page was set to "no_cache" and so cannot be indexed.');
252 } else $this->log_setTSlogMessage('Index page? No, The "No Search" flag has been set in the page header!');
253 } else $this->log_setTSlogMessage('Index page? No, Ordinary Frontend indexing during rendering is disabled.');
254 }
255 $this->log_pull();
256 }
257
258
259
260
261
262
263
264
265 /****************************
266 *
267 * Backend API
268 *
269 ****************************/
270
271 /**
272 * Initializing the "combined ID" of the page (phash) being indexed (or for which external media is attached)
273 *
274 * @param integer The page uid, &id=
275 * @param integer The page type, &type=
276 * @param integer sys_language uid, typically &L=
277 * @param string The MP variable (Mount Points), &MP=
278 * @param array Rootline array of only UIDs.
279 * @param array Array of GET variables to register with this indexing
280 * @param boolean If set, calculates a cHash value from the $cHash_array. Probably you will not do that since such cases are indexed through the frontend and the idea of this interface is to index non-cachable pages from the backend!
281 * @return void
282 */
283 function backend_initIndexer($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array=array(), $createCHash=FALSE) {
284
285 // Setting up internal configuration from config array:
286 $this->conf = array();
287
288 // Information about page for which the indexing takes place
289 $this->conf['id'] = $id; // Page id (integer)
290 $this->conf['type'] = $type; // Page type (integer)
291 $this->conf['sys_language_uid'] = $sys_language_uid; // sys_language UID of the language of the indexing (integer)
292 $this->conf['MP'] = $MP; // MP variable, if any (Mount Points) (string)
293 $this->conf['gr_list'] = '0,-1'; // Group list (hardcoded for now...)
294
295 // cHash values:
296 $this->conf['cHash'] = $createCHash ? $this->makeCHash($cHash_array) : ''; // cHash string for additional parameters
297 $this->conf['cHash_array'] = $cHash_array; // Array of the additional parameters
298
299 // Set to defaults
300 $this->conf['freeIndexUid'] = 0;
301 $this->conf['page_cache_reg1'] = '';
302
303 // Root line uids
304 $this->conf['rootline_uids'] = $uidRL;
305
306 // Configuration of behavior:
307 $this->conf['index_externals'] = 1; // Whether to index external documents like PDF, DOC etc. (if possible)
308 $this->conf['index_descrLgd'] = 200; // Length of description text (max 250, default 200)
309
310 // Init and start indexing:
311 $this->init();
312 }
313
314 /**
315 * Sets the free-index uid. Can be called right after backend_initIndexer()
316 *
317 * @param integer Free index UID
318 * @return void
319 */
320 function backend_setFreeIndexUid($freeIndexUid) {
321 $this->conf['freeIndexUid'] = $freeIndexUid;
322 }
323
324 /**
325 * Indexing records as the content of a TYPO3 page.
326 *
327 * @param string Title equivalent
328 * @param string Keywords equivalent
329 * @param string Description equivalent
330 * @param string The main content to index
331 * @param string The charset of the title, keyword, description and body-content
332 * @param integer Last modification time, in seconds
333 * @param integer The creation date of the content, in seconds
334 * @param integer The record UID that the content comes from (for registration with the indexed rows)
335 * @return void
336 */
337 function backend_indexAsTYPO3Page($title, $keywords, $description, $content, $charset, $mtime, $crdate=0, $recordUid=0) {
338
339 // Content of page:
340 $this->conf['mtime'] = $mtime; // Most recent modification time (seconds) of the content
341 $this->conf['crdate'] = $crdate; // The creation date of the TYPO3 content
342 $this->conf['recordUid'] = $recordUid; // UID of the record, if applicable
343
344 // Construct fake HTML for parsing:
345 $this->conf['content'] = '
346 <html>
347 <head>
348 <title>'.htmlspecialchars($title).'</title>
349 <meta name="keywords" content="'.htmlspecialchars($keywords).'" />
350 <meta name="description" content="'.htmlspecialchars($description).'" />
351 </head>
352 <body>
353 '.htmlspecialchars($content).'
354 </body>
355 </html>'; // Content string (HTML of TYPO3 page)
356
357 // Initializing charset:
358 $this->conf['metaCharset'] = $charset; // Character set of content (will be converted to utf-8 during indexing)
359 $this->conf['indexedDocTitle'] = ''; // Alternative title for indexing
360
361 // Index content as if it was a TYPO3 page:
362 $this->indexTypo3PageContent();
363 }
364
365
366
367
368
369
370
371
372
373
374
375
376
377 /********************************
378 *
379 * Initialization
380 *
381 *******************************/
382
383 /**
384 * Initializes the object. $this->conf MUST be set with proper values prior to this call!!!
385 *
386 * @return void
387 */
388 function init() {
389 global $TYPO3_CONF_VARS;
390
391 // Initializing:
392 $this->cHashParams = $this->conf['cHash_array'];
393 if (is_array($this->cHashParams) && count($this->cHashParams)) {
394 if ($this->conf['cHash']) $this->cHashParams['cHash'] = $this->conf['cHash']; // Add this so that URL's come out right...
395 unset($this->cHashParams['encryptionKey']); // encryptionKey is added inside TSFE in order to calculate the cHash value and it should NOT be a part of this array!!! If it is it will be exposed in links!!!
396 }
397
398 // Setting phash / phash_grouping which identifies the indexed page based on some of these variables:
399 $this->setT3Hashes();
400
401 // Indexer configuration from Extension Manager interface:
402 $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
403 $this->tstamp_minAge = t3lib_div::intInRange($this->indexerConfig['minAge']*3600,0);
404 $this->tstamp_maxAge = t3lib_div::intInRange($this->indexerConfig['maxAge']*3600,0);
405 $this->maxExternalFiles = t3lib_div::intInRange($this->indexerConfig['maxExternalFiles'],0,1000,5);
406 $this->flagBitMask = t3lib_div::intInRange($this->indexerConfig['flagBitMask'],0,255);
407
408 // Initialize external document parsers:
409 // Example configuration, see ext_localconf.php of this file!
410 if ($this->conf['index_externals']) {
411 $this->initializeExternalParsers();
412 }
413
414 // Initialize lexer (class that deconstructs the text into words):
415 // Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['lexer'] = 'EXT:indexed_search/class.lexer.php:&tx_indexedsearch_lexer';
416 $lexerObjRef = $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['lexer'] ?
417 $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['lexer'] :
418 'EXT:indexed_search/class.lexer.php:&tx_indexedsearch_lexer';
419 $this->lexerObj = &t3lib_div::getUserObj($lexerObjRef);
420 $this->lexerObj->debug = $this->indexerConfig['debugMode'];
421
422 // Initialize metaphone hook:
423 // Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
424 if ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) {
425 $this->metaphoneObj = &t3lib_div::getUserObj($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']);
426 }
427
428 // Init charset class:
429 $this->csObj = &t3lib_div::makeInstance('t3lib_cs');
430 }
431
432 /**
433 * Initialize external parsers
434 *
435 * @return void
436 * @access private
437 * @see init()
438 */
439 function initializeExternalParsers() {
440 global $TYPO3_CONF_VARS;
441
442 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) {
443 foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) {
444 $this->external_parsers[$extension] = &t3lib_div::getUserObj($_objRef);
445 $this->external_parsers[$extension]->pObj = &$this;
446
447 // Init parser and if it returns false, unset its entry again:
448 if (!$this->external_parsers[$extension]->initParser($extension)) {
449 unset($this->external_parsers[$extension]);
450 }
451 }
452 }
453 }
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469 /********************************
470 *
471 * Indexing; TYPO3 pages (HTML content)
472 *
473 *******************************/
474
475 /**
476 * Start indexing of the TYPO3 page
477 *
478 * @return void
479 */
480 function indexTypo3PageContent() {
481
482 $check = $this->checkMtimeTstamp($this->conf['mtime'], $this->hash['phash']);
483 $is_grlist = $this->is_grlist_set($this->hash['phash']);
484
485 if ($check > 0 || !$is_grlist) {
486
487 // Setting message:
488 if ($check > 0) {
489 $this->log_setTSlogMessage('Indexing needed, reason: '.$this->reasons[$check],1);
490 } else {
491 $this->log_setTSlogMessage('Indexing needed, reason: Updates gr_list!',1);
492 }
493
494 // Divide into title,keywords,description and body:
495 $this->log_push('Split content','');
496 $this->contentParts = $this->splitHTMLContent($this->conf['content']);
497 if ($this->conf['indexedDocTitle']) {
498 $this->contentParts['title'] = $this->conf['indexedDocTitle'];
499 }
500 $this->log_pull();
501
502 // Calculating a hash over what is to be the actual page content. Maybe this hash should not include title,description and keywords? The bodytext is the primary concern. (on the other hand a changed page-title would make no difference then, so dont!)
503 $this->content_md5h = $this->md5inthash(implode($this->contentParts,''));
504
505 // This function checks if there is already a page (with gr_list = 0,-1) indexed and if that page has the very same contentHash.
506 // If the contentHash is the same, then we can rest assured that this page is already indexed and regardless of mtime and origContent we don't need to do anything more.
507 // This will also prevent pages from being indexed if a fe_users has logged in and it turns out that the page content is not changed anyway. fe_users logged in should always search with hash_gr_list = "0,-1" OR "[their_group_list]". This situation will be prevented only if the page has been indexed with no user login on before hand. Else the page will be indexed by users until that event. However that does not present a serious problem.
508 $checkCHash = $this->checkContentHash();
509 if (!is_array($checkCHash) || $check===1) {
510 $Pstart=t3lib_div::milliseconds();
511
512 $this->log_push('Converting charset of content ('.$this->conf['metaCharset'].') to utf-8','');
513 $this->charsetEntity2utf8($this->contentParts,$this->conf['metaCharset']);
514 $this->log_pull();
515
516 // Splitting words
517 $this->log_push('Extract words from content','');
518 $splitInWords = $this->procesWordsInArrays($this->contentParts);
519 $this->log_pull();
520
521 // Analyse the indexed words.
522 $this->log_push('Analyse the extracted words','');
523 $indexArr = $this->indexAnalyze($splitInWords);
524 $this->log_pull();
525
526 // Submitting page (phash) record
527 $this->log_push('Submitting page','');
528 $this->submitPage();
529 $this->log_pull();
530
531 // Check words and submit to word list if not there
532 $this->log_push('Check word list and submit words','');
533 $this->checkWordList($indexArr);
534 $this->submitWords($indexArr,$this->hash['phash']);
535 $this->log_pull();
536
537 // Set parsetime
538 $this->updateParsetime($this->hash['phash'],t3lib_div::milliseconds()-$Pstart);
539
540 // Checking external files if configured for.
541 $this->log_push('Checking external files','');
542 if ($this->conf['index_externals']) {
543 $this->extractLinks($this->conf['content']);
544 }
545 $this->log_pull();
546 } else {
547 $this->updateTstamp($this->hash['phash'],$this->conf['mtime']); // Update the timestatmp
548 $this->update_grlist($checkCHash['phash'],$this->hash['phash']); // $checkCHash['phash'] is the phash of the result row that is similar to the current phash regarding the content hash.
549 $this->updateRootline();
550 $this->log_setTSlogMessage('Indexing not needed, the contentHash, '.$this->content_md5h.', has not changed. Timestamp, grlist and rootline updated if necessary.');
551 }
552 } else {
553 $this->log_setTSlogMessage('Indexing not needed, reason: '.$this->reasons[$check]);
554 }
555 }
556
557 /**
558 * Splits HTML content and returns an associative array, with title, a list of metatags, and a list of words in the body.
559 *
560 * @param string HTML content to index. To some degree expected to be made by TYPO3 (ei. splitting the header by ":")
561 * @return array Array of content, having keys "title", "body", "keywords" and "description" set.
562 * @see splitRegularContent()
563 */
564 function splitHTMLContent($content) {
565
566 // divide head from body ( u-ouh :) )
567 $contentArr = $this->defaultContentArray;
568 $contentArr['body'] = stristr($content,'<body');
569 $headPart = substr($content,0,-strlen($contentArr['body']));
570
571 // get title
572 $this->embracingTags($headPart,'TITLE',$contentArr['title'],$dummy2,$dummy);
573 $titleParts = explode(':',$contentArr['title'],2);
574 $contentArr['title'] = trim(isset($titleParts[1]) ? $titleParts[1] : $titleParts[0]);
575
576 // get keywords and description metatags
577 for($i=0;$this->embracingTags($headPart,'meta',$dummy,$headPart,$meta[$i]);$i++) { /*nothing*/ }
578 for($i=0;isset($meta[$i]);$i++) {
579 $meta[$i] = t3lib_div::get_tag_attributes($meta[$i]);
580 if(stristr($meta[$i]['name'],'keywords')) $contentArr['keywords'].=','.$meta[$i]['content'];
581 if(stristr($meta[$i]['name'],'description')) $contentArr['description'].=','.$meta[$i]['content'];
582 }
583
584 // Process <!--TYPO3SEARCH_begin--> or <!--TYPO3SEARCH_end--> tags:
585 $this->typoSearchTags($contentArr['body']);
586
587 // Get rid of unwanted sections (ie. scripting and style stuff) in body
588 $tagList = explode(',',$this->excludeSections);
589 foreach($tagList as $tag) {
590 while($this->embracingTags($contentArr['body'],$tag,$dummy,$contentArr['body'],$dummy2));
591 }
592
593 // remove tags, but first make sure we don't concatenate words by doing it
594 $contentArr['body'] = str_replace('<',' <',$contentArr['body']);
595 $contentArr['body'] = trim(strip_tags($contentArr['body']));
596
597 $contentArr['keywords'] = trim($contentArr['keywords']);
598 $contentArr['description'] = trim($contentArr['description']);
599
600 // Return array
601 return $contentArr;
602 }
603
604 /**
605 * Extract the charset value from HTML meta tag.
606 *
607 * @param string HTML content
608 * @return string The charset value if found.
609 */
610 function getHTMLcharset($content) {
611 if (eregi('<meta[[:space:]]+[^>]*http-equiv[[:space:]]*=[[:space:]]*["\']CONTENT-TYPE["\'][^>]*>',$content,$reg)) {
612 if (eregi('charset[[:space:]]*=[[:space:]]*([[:alnum:]-]+)',$reg[0],$reg2)) {
613 return $reg2[1];
614 }
615 }
616 }
617
618 /**
619 * Converts a HTML document to utf-8
620 *
621 * @param string HTML content, any charset
622 * @param string Optional charset (otherwise extracted from HTML)
623 * @return string Converted HTML
624 */
625 function convertHTMLToUtf8($content,$charset='') {
626
627 // Find charset:
628 $charset = $charset ? $charset : $this->getHTMLcharset($content);
629 $charset = $this->csObj->parse_charset($charset);
630
631 // Convert charset:
632 if ($charset && $charset!=='utf-8') {
633 $content = $this->csObj->utf8_encode($content, $charset);
634 }
635 // Convert entities, assuming document is now UTF-8:
636 $content = $this->csObj->entities_to_utf8($content, TRUE);
637
638 return $content;
639 }
640
641 /**
642 * Finds first occurence of embracing tags and returns the embraced content and the original string with
643 * the tag removed in the two passed variables. Returns false if no match found. ie. useful for finding
644 * <title> of document or removing <script>-sections
645 *
646 * @param string String to search in
647 * @param string Tag name, eg. "script"
648 * @param string Passed by reference: Content inside found tag
649 * @param string Passed by reference: Content after found tag
650 * @param string Passed by reference: Attributes of the found tag.
651 * @return boolean Returns false if tag was not found, otherwise true.
652 */
653 function embracingTags($string,$tagName,&$tagContent,&$stringAfter,&$paramList) {
654 $endTag = '</'.$tagName.'>';
655 $startTag = '<'.$tagName;
656
657 $isTagInText = stristr($string,$startTag); // stristr used because we want a case-insensitive search for the tag.
658 if(!$isTagInText) return false; // if the tag was not found, return false
659
660 list($paramList,$isTagInText) = explode('>',substr($isTagInText,strlen($startTag)),2);
661 $afterTagInText = stristr($isTagInText,$endTag);
662 if ($afterTagInText) {
663 $stringBefore = substr($string, 0, strpos(strtolower($string), strtolower($startTag)));
664 $tagContent = substr($isTagInText,0,strlen($isTagInText)-strlen($afterTagInText));
665 $stringAfter = $stringBefore.substr($afterTagInText,strlen($endTag));
666 } else { // If there was no ending tag, the tagContent is blank and anything after the tag it self is returned.
667 $tagContent='';
668 $stringAfter = $isTagInText;
669 }
670
671 return true;
672 }
673
674 /**
675 * Removes content that shouldn't be indexed according to TYPO3SEARCH-tags.
676 *
677 * @param string HTML Content, passed by reference
678 * @return boolean Returns true if a TYPOSEARCH_ tag was found, otherwise false.
679 */
680 function typoSearchTags(&$body) {
681 $expBody = explode('<!--TYPO3SEARCH_',$body);
682
683 if(count($expBody)>1) {
684 $body = '';
685
686 foreach($expBody as $val) {
687 $part = explode('-->',$val,2);
688 if(trim($part[0])=='begin') {
689 $body.= $part[1];
690 $prev = '';
691 } elseif(trim($part[0])=='end') {
692 $body.= $prev;
693 } else {
694 $prev = $val;
695 }
696 }
697 return true;
698 } else {
699 return false;
700 }
701 }
702
703 /**
704 * Extract links (hrefs) from HTML content and if indexable media is found, it is indexed.
705 *
706 * @param string HTML content
707 * @return void
708 */
709 function extractLinks($content) {
710
711 // Get links:
712 $list = $this->extractHyperLinks($content);
713
714 // Traverse links:
715 foreach($list as $linkInfo) {
716
717 // Decode entities:
718 $linkSource = t3lib_div::htmlspecialchars_decode($linkInfo['href']);
719
720 // Parse URL:
721 $qParts = parse_url($linkSource);
722
723 // Check for jumpurl (TYPO3 specific thing...)
724 if ($qParts['query'] && strstr($qParts['query'],'jumpurl=')) {
725 parse_str($qParts['query'],$getP);
726 $linkSource = $getP['jumpurl'];
727 $qParts = parse_url($linkSource); // parse again due to new linkSource!
728 }
729
730 if ($qParts['scheme']) {
731 if ($this->indexerConfig['indexExternalURLs']) {
732 // Index external URL (http or otherwise)
733 $this->indexExternalUrl($linkSource);
734 }
735 } elseif (!$qParts['query']) {
736 $localFile = t3lib_div::getFileAbsFileName(PATH_site.$linkSource);
737 if ($localFile && @is_file($localFile)) {
738 // Index local file:
739 $this->indexRegularDocument($linkSource);
740 }
741 }
742 }
743 }
744
745 /**
746 * Extracts all links to external documents from content string.
747 *
748 * @param string Content to analyse
749 * @return array Array of hyperlinks
750 * @see extractLinks()
751 */
752 function extractHyperLinks($string) {
753 if (!is_object($this->htmlParser)) {
754 $this->htmlParser = t3lib_div::makeInstance('t3lib_parseHtml');
755 }
756
757 $parts = $this->htmlParser->splitTags('a',$string);
758 $list = array();
759 foreach($parts as $k => $v) {
760 if ($k%2) {
761 $params = $this->htmlParser->get_tag_attributes($v,1);
762 $firstTagName = $this->htmlParser->getFirstTagName($v); // The 'name' of the first tag
763
764 switch(strtolower($firstTagName)) {
765 case 'a':
766 $src = $params[0]['href'];
767 if ($src) {
768 $list[] = array(
769 'tag' => $v,
770 'href' => $params[0]['href']
771 );
772 }
773 break;
774 }
775 }
776 }
777
778 return $list;
779 }
780
781
782
783
784
785
786
787
788
789
790
791 /******************************************
792 *
793 * Indexing; external URL
794 *
795 ******************************************/
796
797 /**
798 * Index External URLs HTML content
799 *
800 * @param string URL, eg. "http://typo3.org/"
801 * @return void
802 * @see indexRegularDocument()
803 */
804 function indexExternalUrl($externalUrl) {
805
806 // Parse External URL:
807 $qParts = parse_url($externalUrl);
808 $fI = pathinfo($qParts['path']);
809 $ext = strtolower($fI['extension']);
810
811 // Get headers:
812 $urlHeaders = $this->getUrlHeaders($externalUrl);
813 if (stristr($urlHeaders['Content-Type'],'text/html')) {
814 $content = $this->indexExternalUrl_content = t3lib_div::getUrl($externalUrl);
815 if (strlen($content)) {
816
817 // Create temporary file:
818 $tmpFile = t3lib_div::tempnam('EXTERNAL_URL').'.html';
819 t3lib_div::writeFile($tmpFile, $content);
820
821 // Index that file:
822 $this->indexRegularDocument($externalUrl, FALSE, $tmpFile, 'html');
823 unlink($tmpFile);
824 }
825 }
826 }
827
828 /**
829 * Getting HTTP request headers of URL
830 *
831 * @param string The URL
832 * @param integer Timeout (seconds?)
833 * @return mixed If no answer, returns false. Otherwise an array where HTTP headers are keys
834 */
835 function getUrlHeaders($url, $timeout = 2) {
836 $url = parse_url($url);
837
838 if(!in_array($url['scheme'],array('','http'))) return FALSE;
839
840 $fp = fsockopen ($url['host'], ($url['port'] > 0 ? $url['port'] : 80), $errno, $errstr, $timeout);
841 if (!$fp) {
842 return FALSE;
843 } else {
844 $msg = "GET ".$url['path'].($url['query'] ? '?'.$url['query'] : '')." HTTP/1.0\r\nHost: ".$url['host']."\r\n\r\n";
845 fputs ($fp, $msg);
846 $d = '';
847 while (!feof($fp)) {
848 $line = fgets ($fp,2048);
849
850 $d.=$line;
851 if (!strlen(trim($line))) {
852 break;
853 }
854 }
855 fclose ($fp);
856
857 // Compile headers:
858 $headers = t3lib_div::trimExplode(chr(10),$d,1);
859 $retVal = array();
860 foreach($headers as $line) {
861 list($headKey, $headValue) = explode(':', $line, 2);
862 $retVal[$headKey] = $headValue;
863 }
864 return $retVal;
865 }
866 }
867
868
869
870
871
872
873
874
875
876
877
878
879
880 /******************************************
881 *
882 * Indexing; external files (PDF, DOC, etc)
883 *
884 ******************************************/
885
886 /**
887 * Indexing a regular document given as $file (relative to PATH_site, local file)
888 *
889 * @param string Relative Filename, relative to PATH_site. It can also be an absolute path as long as it is inside the lockRootPath (validated with t3lib_div::isAbsPath()). Finally, if $contentTmpFile is set, this value can be anything, most likely a URL
890 * @param boolean If set, indexing is forced (despite content hashes, mtime etc).
891 * @param string Temporary file with the content to read it from (instead of $file). Used when the $file is a URL.
892 * @param string File extension for temporary file.
893 * @return void
894 */
895 function indexRegularDocument($file, $force=FALSE, $contentTmpFile='', $altExtension='') {
896
897 // Init
898 $fI = pathinfo($file);
899 $ext = $altExtension ? $altExtension : strtolower($fI['extension']);
900
901 // Create abs-path:
902 if (!$contentTmpFile) {
903 if (!t3lib_div::isAbsPath($file)) { // Relative, prepend PATH_site:
904 $absFile = t3lib_div::getFileAbsFileName(PATH_site.$file);
905 } else { // Absolute, pass-through:
906 $absFile = $file;
907 }
908 $absFile = t3lib_div::isAllowedAbsPath($absFile) ? $absFile : '';
909 } else {
910 $absFile = $contentTmpFile;
911 }
912
913 // Indexing the document:
914 if ($absFile && @is_file($absFile)) {
915 if ($this->external_parsers[$ext]) {
916 $mtime = filemtime($absFile);
917 $cParts = $this->fileContentParts($ext,$absFile);
918
919 foreach($cParts as $cPKey) {
920 $this->internal_log = array();
921 $this->log_push('Index: '.str_replace('.','_',basename($file)).($cPKey?'#'.$cPKey:''),'');
922 $Pstart = t3lib_div::milliseconds();
923 $subinfo = array('key' => $cPKey); // Setting page range. This is "0" (zero) when no division is made, otherwise a range like "1-3"
924 $phash_arr = $this->file_phash_arr = $this->setExtHashes($file,$subinfo);
925 $check = $this->checkMtimeTstamp($mtime, $phash_arr['phash']);
926 if ($check > 0 || $force) {
927 if ($check > 0) {
928 $this->log_setTSlogMessage('Indexing needed, reason: '.$this->reasons[$check],1);
929 } else {
930 $this->log_setTSlogMessage('Indexing forced by flag',1);
931 }
932
933 // Check external file counter:
934 if ($this->externalFileCounter < $this->maxExternalFiles || $force) {
935
936 // Divide into title,keywords,description and body:
937 $this->log_push('Split content','');
938 $contentParts = $this->readFileContent($ext,$absFile,$cPKey);
939 $this->log_pull();
940
941 if (is_array($contentParts)) {
942 // Calculating a hash over what is to be the actual content. (see indexTypo3PageContent())
943 $content_md5h = $this->md5inthash(implode($contentParts,''));
944
945 if ($this->checkExternalDocContentHash($phash_arr['phash_grouping'], $content_md5h) || $force) {
946
947 // Increment counter:
948 $this->externalFileCounter++;
949
950 // Splitting words
951 $this->log_push('Extract words from content','');
952 $splitInWords = $this->procesWordsInArrays($contentParts);
953 $this->log_pull();
954
955 // Analyse the indexed words.
956 $this->log_push('Analyse the extracted words','');
957 $indexArr = $this->indexAnalyze($splitInWords);
958 $this->log_pull();
959
960 // Submitting page (phash) record
961 $this->log_push('Submitting page','');
962 $size = filesize($absFile);
963 $ctime = filemtime($absFile); // Unfortunately I cannot determine WHEN a file is originally made - so I must return the modification time...
964 $this->submitFilePage($phash_arr,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts);
965 $this->log_pull();
966
967 // Check words and submit to word list if not there
968 $this->log_push('Check word list and submit words','');
969 $this->checkWordList($indexArr);
970 $this->submitWords($indexArr,$phash_arr['phash']);
971 $this->log_pull();
972
973 // Set parsetime
974 $this->updateParsetime($phash_arr['phash'],t3lib_div::milliseconds()-$Pstart);
975 } else {
976 $this->updateTstamp($phash_arr['phash'],$mtime); // Update the timestamp
977 $this->log_setTSlogMessage('Indexing not needed, the contentHash, '.$content_md5h.', has not changed. Timestamp updated.');
978 }
979 } else $this->log_setTSlogMessage('Could not index file! Unsupported extension.');
980 } else $this->log_setTSlogMessage('The limit of '.$this->maxExternalFiles.' has already been exceeded, so no indexing will take place this time.');
981 } else $this->log_setTSlogMessage('Indexing not needed, reason: '.$this->reasons[$check]);
982
983 // Checking and setting sections:
984 # $this->submitFile_grlist($phash_arr['phash']); // Setting a gr_list record if there is none already (set for default fe_group)
985 $this->submitFile_section($phash_arr['phash']); // Setting a section-record for the file. This is done also if the file is not indexed. Notice that section records are deleted when the page is indexed.
986 $this->log_pull();
987 }
988 } else $this->log_setTSlogMessage('Indexing not possible; The extension "'.$ext.'" was not supported.');
989 } else $this->log_setTSlogMessage('Indexing not possible; File "'.$absFile.'" not found or valid.');
990 }
991
992 /**
993 * Reads the content of an external file being indexed.
994 * The content from the external parser MUST be returned in utf-8!
995 *
996 * @param string File extension, eg. "pdf", "doc" etc.
997 * @param string Absolute filename of file (must exist and be validated OK before calling function)
998 * @param string Pointer to section (zero for all other than PDF which will have an indication of pages into which the document should be splitted.)
999 * @return array Standard content array (title, description, keywords, body keys)
1000 */
1001 function readFileContent($ext,$absFile,$cPKey) {
1002
1003 // Consult relevant external document parser:
1004 if (is_object($this->external_parsers[$ext])) {
1005 $contentArr = $this->external_parsers[$ext]->readFileContent($ext,$absFile,$cPKey);
1006 }
1007
1008 return $contentArr;
1009 }
1010
1011 /**
1012 * Creates an array with pointers to divisions of document.
1013 *
1014 * @param string File extension
1015 * @param string Absolute filename (must exist and be validated OK before calling function)
1016 * @return array Array of pointers to sections that the document should be divided into
1017 */
1018 function fileContentParts($ext,$absFile) {
1019 $cParts = array(0);
1020
1021 // Consult relevant external document parser:
1022 if (is_object($this->external_parsers[$ext])) {
1023 $cParts = $this->external_parsers[$ext]->fileContentParts($ext,$absFile);
1024 }
1025
1026 return $cParts;
1027 }
1028
1029 /**
1030 * Splits non-HTML content (from external files for instance)
1031 *
1032 * @param string Input content (non-HTML) to index.
1033 * @return array Array of content, having the key "body" set (plus "title", "description" and "keywords", but empty)
1034 * @see splitHTMLContent()
1035 */
1036 function splitRegularContent($content) {
1037 $contentArr = $this->defaultContentArray;
1038 $contentArr['body'] = $content;
1039
1040 return $contentArr;
1041 }
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 /**********************************
1057 *
1058 * Analysing content, Extracting words
1059 *
1060 **********************************/
1061
1062 /**
1063 * Convert character set and HTML entities in the value of input content array keys
1064 *
1065 * @param array Standard content array
1066 * @param string Charset of the input content (converted to utf-8)
1067 * @return void
1068 */
1069 function charsetEntity2utf8(&$contentArr, $charset) {
1070
1071 // Convert charset if necessary
1072 reset($contentArr);
1073 while(list($key,)=each($contentArr)) {
1074 if (strlen($contentArr[$key])) {
1075
1076 if ($charset!=='utf-8') {
1077 $contentArr[$key] = $this->csObj->utf8_encode($contentArr[$key], $charset);
1078 }
1079
1080 // decode all numeric / html-entitiesin in the string to real characters:
1081 $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key],TRUE);
1082 }
1083 }
1084 }
1085
1086 /**
1087 * Processing words in the array from split*Content -functions
1088 *
1089 * @param array Array of content to index, see splitHTMLContent() and splitRegularContent()
1090 * @return array Content input array modified so each key is not a unique array of words
1091 */
1092 function procesWordsInArrays($contentArr) {
1093
1094 // split all parts to words
1095 reset($contentArr);
1096 while(list($key,)=each($contentArr)) {
1097 $contentArr[$key] = $this->lexerObj->split2Words($contentArr[$key]);
1098 }
1099
1100 // For title, keywords, and description we don't want duplicates:
1101 $contentArr['title'] = array_unique($contentArr['title']);
1102 $contentArr['keywords'] = array_unique($contentArr['keywords']);
1103 $contentArr['description'] = array_unique($contentArr['description']);
1104
1105 // Return modified array:
1106 return $contentArr;
1107 }
1108
1109 /**
1110 * Extracts the sample description text from the content array.
1111 *
1112 * @param array Content array
1113 * @return string Description string
1114 */
1115 function bodyDescription($contentArr) {
1116
1117 // Setting description
1118 $maxL = t3lib_div::intInRange($this->conf['index_descrLgd'],0,255,200);
1119 if ($maxL) {
1120 // Takes the quadruple lenght first, because whitespace and entities may be removed and thus shorten the string more yet.
1121 # $bodyDescription = implode(' ',split('[[:space:],]+',substr(trim($contentArr['body']),0,$maxL*4)));
1122 $bodyDescription = str_replace(array(' ',"\t","\r","\n"),' ',$contentArr['body']);
1123
1124 // Shorten the string:
1125 $bodyDescription = $this->csObj->strtrunc('utf-8', $bodyDescription, $maxL);
1126 }
1127
1128 return $bodyDescription;
1129 }
1130
1131 /**
1132 * Analyzes content to use for indexing,
1133 *
1134 * @param array Standard content array: an array with the keys title,keywords,description and body, which all contain an array of words.
1135 * @return array Index Array (whatever that is...)
1136 */
1137 function indexAnalyze($content) {
1138 $indexArr = Array();
1139 $counter = 0;
1140
1141 $this->analyzeHeaderinfo($indexArr,$content,'title',7);
1142 $this->analyzeHeaderinfo($indexArr,$content,'keywords',6);
1143 $this->analyzeHeaderinfo($indexArr,$content,'description',5);
1144 $this->analyzeBody($indexArr,$content);
1145
1146 return ($indexArr);
1147 }
1148
1149 /**
1150 * Calculates relevant information for headercontent
1151 *
1152 * @param array Index array, passed by reference
1153 * @param array Standard content array
1154 * @param string Key from standard content array
1155 * @param integer Bit-wise priority to type
1156 * @return void
1157 */
1158 function analyzeHeaderinfo(&$retArr,$content,$key,$offset) {
1159 reset($content[$key]);
1160 while(list(,$val)=each($content[$key])) {
1161 $val = substr($val,0,60); // Max 60 - because the baseword varchar IS 60. This MUST be the same.
1162 $retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset);
1163 $retArr[$val]['count'] = $retArr[$val]['count']+1;
1164 $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
1165 $retArr[$val]['metaphone'] = $this->metaphone($val);
1166 $this->wordcount++;
1167 }
1168 }
1169
1170 /**
1171 * Calculates relevant information for bodycontent
1172 *
1173 * @param array Index array, passed by reference
1174 * @param array Standard content array
1175 * @return void
1176 */
1177 function analyzeBody(&$retArr,$content) {
1178 foreach($content['body'] as $key => $val) {
1179 $val = substr($val,0,60); // Max 60 - because the baseword varchar IS 60. This MUST be the same.
1180 if(!isset($retArr[$val])) {
1181 $retArr[$val]['first'] = $key;
1182 $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
1183 $retArr[$val]['metaphone'] = $this->metaphone($val);
1184 }
1185 $retArr[$val]['count'] = $retArr[$val]['count']+1;
1186 $this->wordcount++;
1187 }
1188 }
1189
1190 /**
1191 * Creating metaphone based hash from input word
1192 *
1193 * @param string Word to convert
1194 * @param boolean If set, returns the raw metaphone value (not hashed)
1195 * @return mixed Metaphone hash integer (or raw value, string)
1196 */
1197 function metaphone($word,$retRaw=FALSE) {
1198
1199 if (is_object($this->metaphoneObj)) {
1200 $tmp = $this->metaphoneObj->metaphone($word);
1201 } else {
1202 $tmp = metaphone($word);
1203 }
1204
1205 // Return raw value?
1206 if ($retRaw) return $tmp;
1207
1208 // Otherwise create hash and return integer
1209 if($tmp=='') $ret=0; else $ret=hexdec(substr(md5($tmp),0,7));
1210 return $ret;
1211 }
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228 /********************************
1229 *
1230 * SQL; TYPO3 Pages
1231 *
1232 *******************************/
1233
1234 /**
1235 * Updates db with information about the page (TYPO3 page, not external media)
1236 *
1237 * @return void
1238 */
1239 function submitPage() {
1240
1241 // Remove any current data for this phash:
1242 $this->removeOldIndexedPages($this->hash['phash']);
1243
1244 // setting new phash_row
1245 $fields = array(
1246 'phash' => $this->hash['phash'],
1247 'phash_grouping' => $this->hash['phash_grouping'],
1248 'cHashParams' => serialize($this->cHashParams),
1249 'contentHash' => $this->content_md5h,
1250 'data_page_id' => $this->conf['id'],
1251 'data_page_reg1' => $this->conf['page_cache_reg1'],
1252 'data_page_type' => $this->conf['type'],
1253 'data_page_mp' => $this->conf['MP'],
1254 'gr_list' => $this->conf['gr_list'],
1255 'item_type' => 0, // TYPO3 page
1256 'item_title' => $this->contentParts['title'],
1257 'item_description' => $this->bodyDescription($this->contentParts),
1258 'item_mtime' => $this->conf['mtime'],
1259 'item_size' => strlen($this->conf['content']),
1260 'tstamp' => time(),
1261 'crdate' => time(),
1262 'item_crdate' => $this->conf['crdate'], // Creation date of page
1263 'sys_language_uid' => $this->conf['sys_language_uid'], // Sys language uid of the page. Should reflect which language it DOES actually display!
1264 'externalUrl' => 0,
1265 'recordUid' => intval($this->conf['recordUid']),
1266 'freeIndexUid' => intval($this->conf['freeIndexUid']),
1267 );
1268 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
1269
1270 // PROCESSING index_section
1271 $this->submit_section($this->hash['phash'],$this->hash['phash']);
1272
1273 // PROCESSING index_grlist
1274 $this->submit_grlist($this->hash['phash'],$this->hash['phash']);
1275
1276 // PROCESSING index_fulltext
1277 $fields = array(
1278 'phash' => $this->hash['phash'],
1279 'fulltextdata' => implode(' ', $this->contentParts)
1280 );
1281 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
1282
1283 // PROCESSING index_debug
1284 if ($this->indexerConfig['debugMode']) {
1285 $fields = array(
1286 'phash' => $this->hash['phash'],
1287 'debuginfo' => serialize(array(
1288 'cHashParams' => $this->cHashParams,
1289 'external_parsers initialized' => array_keys($this->external_parsers),
1290 'conf' => array_merge($this->conf,array('content'=>substr($this->conf['content'],0,1000))),
1291 'contentParts' => array_merge($this->contentParts,array('body' => substr($this->contentParts['body'],0,1000))),
1292 'logs' => $this->internal_log,
1293 'lexer' => $this->lexerObj->debugString,
1294 ))
1295 );
1296 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
1297 }
1298 }
1299
1300 /**
1301 * Stores gr_list in the database.
1302 *
1303 * @param integer Search result record phash
1304 * @param integer Actual phash of current content
1305 * @return void
1306 * @see update_grlist()
1307 */
1308 function submit_grlist($hash,$phash_x) {
1309
1310 // Setting the gr_list record
1311 $fields = array(
1312 'phash' => $hash,
1313 'phash_x' => $phash_x,
1314 'hash_gr_list' => $this->md5inthash($this->conf['gr_list']),
1315 'gr_list' => $this->conf['gr_list']
1316 );
1317 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields);
1318 }
1319
1320 /**
1321 * Stores section
1322 * $hash and $hash_t3 are the same for TYPO3 pages, but different when it is external files.
1323 *
1324 * @param integer phash of TYPO3 parent search result record
1325 * @param integer phash of the file indexation search record
1326 * @return void
1327 */
1328 function submit_section($hash,$hash_t3) {
1329 $fields = array(
1330 'phash' => $hash,
1331 'phash_t3' => $hash_t3,
1332 'page_id' => intval($this->conf['id'])
1333 );
1334
1335 $this->getRootLineFields($fields);
1336
1337 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields);
1338 }
1339
1340 /**
1341 * Removes records for the indexed page, $phash
1342 *
1343 * @param integer phash value to flush
1344 * @return void
1345 */
1346 function removeOldIndexedPages($phash) {
1347 // Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here.
1348 $tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug');
1349 foreach($tableArr as $table) {
1350 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1351 }
1352 // Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file).
1353 $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash));
1354 }
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368 /********************************
1369 *
1370 * SQL; External media
1371 *
1372 *******************************/
1373
1374
1375 /**
1376 * Updates db with information about the file
1377 *
1378 * @param array Array with phash and phash_grouping keys for file
1379 * @param string File name
1380 * @param array Array of "cHashParams" for files: This is for instance the page index for a PDF file (other document types it will be a zero)
1381 * @param string File extension determining the type of media.
1382 * @param integer Modification time of file.
1383 * @param integer Creation time of file.
1384 * @param integer Size of file in bytes
1385 * @param integer Content HASH value.
1386 * @param array Standard content array (using only title and body for a file)
1387 * @return void
1388 */
1389 function submitFilePage($hash,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts) {
1390
1391 // Find item Type:
1392 $storeItemType = $this->external_parsers[$ext]->ext2itemtype_map[$ext];
1393 $storeItemType = $storeItemType ? $storeItemType : $ext;
1394
1395 // Remove any current data for this phash:
1396 $this->removeOldIndexedFiles($hash['phash']);
1397
1398 // Split filename:
1399 $fileParts = parse_url($file);
1400
1401 // setting new
1402 $fields = array(
1403 'phash' => $hash['phash'],
1404 'phash_grouping' => $hash['phash_grouping'],
1405 'cHashParams' => serialize($subinfo),
1406 'contentHash' => $content_md5h,
1407 'data_filename' => $file,
1408 'item_type' => $storeItemType,
1409 'item_title' => trim($contentParts['title']) ? $contentParts['title'] : basename($file),
1410 'item_description' => $this->bodyDescription($contentParts),
1411 'item_mtime' => $mtime,
1412 'item_size' => $size,
1413 'item_crdate' => $ctime,
1414 'tstamp' => time(),
1415 'crdate' => time(),
1416 'gr_list' => $this->conf['gr_list'],
1417 'externalUrl' => $fileParts['scheme'] ? 1 : 0,
1418 'recordUid' => intval($this->conf['recordUid']),
1419 'freeIndexUid' => intval($this->conf['freeIndexUid']),
1420 );
1421 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
1422
1423 // PROCESSING index_fulltext
1424 $fields = array(
1425 'phash' => $hash['phash'],
1426 'fulltextdata' => implode(' ', $contentParts)
1427 );
1428 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
1429
1430 // PROCESSING index_debug
1431 if ($this->indexerConfig['debugMode']) {
1432 $fields = array(
1433 'phash' => $hash['phash'],
1434 'debuginfo' => serialize(array(
1435 'cHashParams' => $subinfo,
1436 'contentParts' => array_merge($contentParts,array('body' => substr($contentParts['body'],0,1000))),
1437 'logs' => $this->internal_log,
1438 'lexer' => $this->lexerObj->debugString,
1439 ))
1440 );
1441 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
1442 }
1443 }
1444
1445 /**
1446 * Stores file gr_list for a file IF it does not exist already
1447 *
1448 * @param integer phash value of file
1449 * @return void
1450 */
1451 function submitFile_grlist($hash) {
1452 // Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one.
1453 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')');
1454 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
1455 $this->submit_grlist($hash,$hash);
1456 }
1457 }
1458
1459 /**
1460 * Stores file section for a file IF it does not exist
1461 *
1462 * @param integer phash value of file
1463 * @return void
1464 */
1465 function submitFile_section($hash) {
1466 // Testing if there is a section
1467 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id']));
1468 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
1469 $this->submit_section($hash,$this->hash['phash']);
1470 }
1471 }
1472
1473 /**
1474 * Removes records for the indexed page, $phash
1475 *
1476 * @param integer phash value to flush
1477 * @return void
1478 */
1479 function removeOldIndexedFiles($phash) {
1480
1481 // Removing old registrations for tables.
1482 $tableArr = explode(',','index_phash,index_grlist,index_fulltext,index_debug');
1483 foreach($tableArr as $table) {
1484 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1485 }
1486 }
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501 /********************************
1502 *
1503 * SQL Helper functions
1504 *
1505 *******************************/
1506
1507 /**
1508 * Check the mtime / tstamp of the currently indexed page/file (based on phash)
1509 * Return positive integer if the page needs to being indexed!
1510 *
1511 * @param integer mtime value to test against limits and indexed page.
1512 * @param integer "phash" used to select any already indexed page to see what its mtime is.
1513 * @return integer Result integer: Generally: <0 = No indexing, >0 = Do indexing (see $this->reasons): -2) Min age was NOT exceed and so indexing cannot occur. -1) Mtimes matched so no need to reindex page. 0) N/A 1) Max age exceeded, page must be indexed again. 2) mtime of indexed page doesn't match mtime given for current content and we must index page. 3) No mtime was set, so we will index... 4) No indexed page found, so of course we will index.
1514 */
1515 function checkMtimeTstamp($mtime,$phash) {
1516
1517 // Select indexed page:
1518 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));
1519 $out = 0;
1520
1521 // If there was an indexing of the page...:
1522 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1523 if ($this->tstamp_maxAge && ($row['tstamp']+$this->tstamp_maxAge) < time()) { // If max age is exceeded, index the page
1524 $out = 1; // The configured max-age was exceeded for the document and thus it's indexed.
1525 } else {
1526 if (!$this->tstamp_minAge || ($row['tstamp']+$this->tstamp_minAge)<time()) { // if minAge is not set or if minAge is exceeded, consider at mtime
1527 if ($mtime) { // It mtime is set, then it's tested. If not, the page must clearly be indexed.
1528 if ($row['item_mtime'] != $mtime) { // And if mtime is different from the index_phash mtime, it's about time to re-index.
1529 $out = 2; // The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.
1530 } else {
1531 $out = -1; // mtime matched the document, so no changes detected and no content updated
1532 if ($this->tstamp_maxAge) {
1533 $this->log_setTSlogMessage('Mtime matched, timestamp NOT updated because a maxAge is set ('.($row['tstamp'] + $this->tstamp_maxAge - time()).' seconds to expire time).',1);
1534 } else {
1535 $this->updateTstamp($phash); // Update the timestatmp
1536 $this->log_setTSlogMessage('Mtime matched, timestamp updated.',1);
1537 }
1538 }
1539 } else {$out = 3; } // The minimum age was exceed, but mtime was not set, so the page was indexed.
1540 } else {$out = -2;} // The minimum age was not exceeded
1541 }
1542 } else {$out = 4;} // Page has never been indexed (is not represented in the index_phash table).
1543 return $out;
1544 }
1545
1546 /**
1547 * Check content hash in phash table
1548 *
1549 * @return mixed Returns true if the page needs to be indexed (that is, there was no result), otherwise the phash value (in an array) of the phash record to which the grlist_record should be related!
1550 */
1551 function checkContentHash() {
1552 // With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page.
1553 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h));
1554 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1555 return $row;
1556 }
1557 return 1;
1558 }
1559
1560 /**
1561 * Check content hash for external documents
1562 * Returns true if the document needs to be indexed (that is, there was no result)
1563 *
1564 * @param integer phash value to check (phash_grouping)
1565 * @param integer Content hash to check
1566 * @return boolean Returns true if the document needs to be indexed (that is, there was no result)
1567 */
1568 function checkExternalDocContentHash($hashGr,$content_md5h) {
1569 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h));
1570 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1571 return 0;
1572 }
1573 return 1;
1574 }
1575
1576 /**
1577 * Checks if a grlist record has been set for the phash value input (looking at the "real" phash of the current content, not the linked-to phash of the common search result page)
1578 *
1579 * @param integer Phash integer to test.
1580 * @return void
1581 */
1582 function is_grlist_set($phash_x) {
1583 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x));
1584 return $GLOBALS['TYPO3_DB']->sql_num_rows($res);
1585 }
1586
1587 /**
1588 * Check if an grlist-entry for this hash exists and if not so, write one.
1589 *
1590 * @param integer phash of the search result that should be found
1591 * @param integer The real phash of the current content. The two values are different when a page with userlogin turns out to contain the exact same content as another already indexed version of the page; This is the whole reason for the grlist table in fact...
1592 * @return void
1593 * @see submit_grlist()
1594 */
1595 function update_grlist($phash,$phash_x) {
1596 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list']));
1597 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
1598 $this->submit_grlist($phash,$phash_x);
1599 $this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1);
1600 }
1601 }
1602
1603 /**
1604 * Update tstamp for a phash row.
1605 *
1606 * @param integer phash value
1607 * @param integer If set, update the mtime field to this value.
1608 * @return void
1609 */
1610 function updateTstamp($phash,$mtime=0) {
1611 $updateFields = array(
1612 'tstamp' => time()
1613 );
1614 if ($mtime) { $updateFields['item_mtime'] = intval($mtime); }
1615
1616 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
1617 }
1618
1619 /**
1620 * Update parsetime for phash row.
1621 *
1622 * @param integer phash value.
1623 * @param integer Parsetime value to set.
1624 * @return void
1625 */
1626 function updateParsetime($phash,$parsetime) {
1627 $updateFields = array(
1628 'parsetime' => intval($parsetime)
1629 );
1630
1631 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
1632 }
1633
1634 /**
1635 * Update section rootline for the page
1636 *
1637 * @return void
1638 */
1639 function updateRootline() {
1640
1641 $updateFields = array();
1642 $this->getRootLineFields($updateFields);
1643
1644 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields);
1645 }
1646
1647 /**
1648 * Adding values for root-line fields.
1649 * rl0, rl1 and rl2 are standard. A hook might add more.
1650 *
1651 * @param array Field array, passed by reference
1652 * @return void
1653 */
1654 function getRootLineFields(&$fieldArr) {
1655
1656 $fieldArr['rl0'] = intval($this->conf['rootline_uids'][0]);
1657 $fieldArr['rl1'] = intval($this->conf['rootline_uids'][1]);
1658 $fieldArr['rl2'] = intval($this->conf['rootline_uids'][2]);
1659
1660 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) {
1661 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
1662 $fieldArr[$fieldName] = intval($this->conf['rootline_uids'][$rootLineLevel]);
1663 }
1664 }
1665 }
1666
1667 /**
1668 * Removes any indexed pages with userlogins which has the same contentHash
1669 * NOT USED anywhere inside this class!
1670 *
1671 * @return void
1672 */
1673 function removeLoginpagesWithContentHash() {
1674 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', '
1675 A.phash=B.phash
1676 AND A.phash_grouping='.intval($this->hash['phash_grouping']).'
1677 AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).'
1678 AND A.contentHash='.intval($this->content_md5h));
1679 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1680 $this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1);
1681 $this->removeOldIndexedPages($row['phash']);
1682 }
1683 }
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696 /********************************
1697 *
1698 * SQL; Submitting words
1699 *
1700 *******************************/
1701
1702 /**
1703 * Adds new words to db
1704 *
1705 * @param array Word List array (where each word has information about position etc).
1706 * @return void
1707 */
1708 function checkWordList($wl) {
1709 reset($wl);
1710 $phashArr = array();
1711 while(list($key,) = each($wl)) {
1712 $phashArr[] = $wl[$key]['hash'];
1713 }
1714 if (count($phashArr)) {
1715 $cwl = implode(',',$phashArr);
1716 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')');
1717
1718 if($GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) {
1719 $this->log_setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']->sql_num_rows($res)),1);
1720 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1721 unset($wl[$row['baseword']]);
1722 }
1723
1724 reset($wl);
1725 while(list($key,$val)=each($wl)) {
1726 $insertFields = array(
1727 'wid' => $val['hash'],
1728 'baseword' => $key,
1729 'metaphone' => $val['metaphone']
1730 );
1731 // A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem.
1732 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields);
1733 }
1734 }
1735 }
1736 }
1737
1738 /**
1739 * Submits RELATIONS between words and phash
1740 *
1741 * @param array Word list array
1742 * @param integer phash value
1743 * @return void
1744 */
1745 function submitWords($wl,$phash) {
1746 $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash));
1747
1748 foreach($wl as $val) {
1749 $insertFields = array(
1750 'phash' => $phash,
1751 'wid' => $val['hash'],
1752 'count' => $val['count'],
1753 'first' => $val['first'],
1754 'freq' => $this->freqMap(($val['count']/$this->wordcount)),
1755 'flags' => ($val['cmp'] & $this->flagBitMask)
1756 );
1757
1758 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
1759 }
1760 }
1761
1762 /**
1763 * maps frequency from a real number in [0;1] to an integer in [0;$this->freqRange] with anything above $this->freqMax as 1
1764 * and back.
1765 *
1766 * @param double Frequency
1767 * @return integer Frequency in range.
1768 */
1769 function freqMap($freq) {
1770 $mapFactor = $this->freqMax*100*$this->freqRange;
1771 if($freq<1) {
1772 $newFreq = $freq*$mapFactor;
1773 $newFreq = $newFreq>$this->freqRange?$this->freqRange:$newFreq;
1774 } else {
1775 $newFreq = $freq/$mapFactor;
1776 }
1777 return $newFreq;
1778
1779 }
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791 /********************************
1792 *
1793 * Hashing
1794 *
1795 *******************************/
1796
1797 /**
1798 * Get search hash, T3 pages
1799 *
1800 * @return void
1801 */
1802 function setT3Hashes() {
1803
1804 // Set main array:
1805 $hArray = array(
1806 'id' => (integer)$this->conf['id'],
1807 'type' => (integer)$this->conf['type'],
1808 'sys_lang' => (integer)$this->conf['sys_language_uid'],
1809 'MP' => (string)$this->conf['MP'],
1810 'cHash' => $this->cHashParams
1811 );
1812
1813 // Set grouping hash (Identifies a "page" combined of id, type, language, mountpoint and cHash parameters):
1814 $this->hash['phash_grouping'] = $this->md5inthash(serialize($hArray));
1815
1816 // Add gr_list and set plain phash (Subdivision where special page composition based on login is taken into account as well. It is expected that such pages are normally similar regardless of the login.)
1817 $hArray['gr_list'] = (string)$this->conf['gr_list'];
1818 $this->hash['phash'] = $this->md5inthash(serialize($hArray));
1819 }
1820
1821 /**
1822 * Get search hash, external files
1823 *
1824 * @param string File name / path which identifies it on the server
1825 * @param array Additional content identifying the (subpart of) content. For instance; PDF files are divided into groups of pages for indexing.
1826 * @return array Array with "phash_grouping" and "phash" inside.
1827 */
1828 function setExtHashes($file,$subinfo=array()) {
1829 // Set main array:
1830 $hash = array();
1831 $hArray = array(
1832 'file' => $file,
1833 );
1834
1835 // Set grouping hash:
1836 $hash['phash_grouping'] = $this->md5inthash(serialize($hArray));
1837
1838 // Add subinfo
1839 $hArray['subinfo'] = $subinfo;
1840 $hash['phash'] = $this->md5inthash(serialize($hArray));
1841
1842 return $hash;
1843 }
1844
1845 /**
1846 * md5 integer hash
1847 * Using 7 instead of 8 just because that makes the integers lower than 32 bit (28 bit) and so they do not interfere with UNSIGNED integers or PHP-versions which has varying output from the hexdec function.
1848 *
1849 * @param string String to hash
1850 * @return integer Integer intepretation of the md5 hash of input string.
1851 */
1852 function md5inthash($str) {
1853 return hexdec(substr(md5($str),0,7));
1854 }
1855
1856 /**
1857 * Calculates the cHash value of input GET array (for constructing cHash values if needed)
1858 *
1859 * @param array Array of GET parameters to encode
1860 * @return void
1861 */
1862 function makeCHash($paramArray) {
1863 $addQueryParams = t3lib_div::implodeArrayForUrl('', $paramArray);
1864 $params = explode('&',substr($addQueryParams,1)); // Splitting parameters up
1865
1866 // Make array:
1867 $pA = array();
1868 foreach($params as $theP) {
1869 $pKV = explode('=', $theP); // SPlitting single param by '=' sign
1870 if (!t3lib_div::inList('id,type,no_cache,cHash,MP,ftu',$pKV[0])) {
1871 $pA[$pKV[0]] = (string)rawurldecode($pKV[1]);
1872 }
1873 }
1874 $pA['encryptionKey'] = $GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey'];
1875 ksort($pA);
1876
1877 return t3lib_div::shortMD5(serialize($pA));
1878 }
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891 /*********************************
1892 *
1893 * Internal logging functions
1894 *
1895 *********************************/
1896
1897 /**
1898 * Push function wrapper for TT logging
1899 *
1900 * @param string Title to set
1901 * @param string Key (?)
1902 * @return void
1903 */
1904 function log_push($msg,$key) {
1905 if (is_object($GLOBALS['TT'])) $GLOBALS['TT']->push($msg,$key);
1906 }
1907
1908 /**
1909 * Pull function wrapper for TT logging
1910 *
1911 * @return void
1912 */
1913 function log_pull() {
1914 if (is_object($GLOBALS['TT'])) $GLOBALS['TT']->pull();
1915 }
1916
1917 /**
1918 * Set log message function wrapper for TT logging
1919 *
1920 * @param string Message to set
1921 * @param integer Error number
1922 * @return void
1923 */
1924 function log_setTSlogMessage($msg, $errorNum=0) {
1925 if (is_object($GLOBALS['TT'])) $GLOBALS['TT']->setTSlogMessage($msg,$errorNum);
1926 $this->internal_log[] = $msg;
1927 }
1928 }
1929
1930
1931 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.indexer.php']) {
1932 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.indexer.php']);
1933 }
1934 ?>