* Added template support to indexed_search. Many thanks to Udo von Eynern for his...
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / modfunc1 / class.tx_indexedsearch_modfunc1.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 *
17 * This script is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * This copyright notice MUST APPEAR in all copies of the script!
23 ***************************************************************/
24 /**
25 * Module extension (addition to function menu) 'Indexed search' for the 'indexed_search' extension.
26 *
27 * @author Kasper Skårhøj <kasperYYYY@typo3.com>
28 */
29 /**
30 * [CLASS/FUNCTION INDEX of SCRIPT]
31 *
32 *
33 *
34 * 110: class tx_indexedsearch_modfunc1 extends t3lib_extobjbase
35 * 124: function modMenu()
36 * 148: function main()
37 *
38 * SECTION: Drawing table of indexed pages
39 * 261: function drawTableOfIndexedPages()
40 * 312: function indexed_info($data, $firstColContent)
41 * 398: function printPhashRow($row,$grouping=0,$extraGrListRows)
42 * 539: function printPhashRowHeader()
43 * 594: function returnNumberOfColumns()
44 *
45 * SECTION: Details display, phash row
46 * 630: function showDetailsForPhash($phash)
47 * 747: function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='')
48 * 796: function listMetaphoneStat($ftrows,$header)
49 * 833: function linkWordDetails($string,$wid)
50 * 845: function linkMetaPhoneDetails($string,$metaphone)
51 * 855: function flagsMsg($flags)
52 *
53 * SECTION: Details display, words / metaphone
54 * 886: function showDetailsForWord($wid)
55 * 945: function showDetailsForMetaphone($metaphone)
56 *
57 * SECTION: Helper functions
58 * 1016: function printRemoveIndexed($phash,$alt)
59 * 1029: function printReindex($resultRow,$alt)
60 * 1044: function linkDetails($string,$phash)
61 * 1053: function linkList()
62 * 1064: function showPageDetails($string,$id)
63 * 1074: function printExtraGrListRows($extraGrListRows)
64 * 1091: function printRootlineInfo($row)
65 * 1125: function makeItemTypeIcon($it,$alt='')
66 * 1150: function utf8_to_currentCharset($string)
67 *
68 * SECTION: Reindexing
69 * 1183: function reindexPhash($phash, $pageId)
70 * 1237: function getUidRootLineForClosestTemplate($id)
71 *
72 * SECTION: Indexing of configurations
73 * 1278: function extraIndexing()
74 * 1389: function indexExtUrlRecursively($url, $depth, $pageId, $rl, $cfgUid)
75 *
76 * SECTION: SQL functions
77 * 1446: function removeIndexedPhashRow($phashList,$clearPageCache=1)
78 * 1483: function getGrListEntriesForPhash($phash,$gr_list)
79 * 1503: function processStopWords($stopWords)
80 * 1523: function processPageKeywords($pageKeywords, $pageUid)
81 *
82 * TOTAL FUNCTIONS: 32
83 * (This index is automatically created/updated by the extension "extdeveval")
84 *
85 */
86
87
88 require_once(PATH_t3lib.'class.t3lib_pagetree.php');
89 require_once(PATH_t3lib.'class.t3lib_extobjbase.php');
90 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php');
91
92
93 // ... all for the rootline!
94 require_once (PATH_t3lib."class.t3lib_page.php");
95 require_once (PATH_t3lib."class.t3lib_tstemplate.php");
96 require_once (PATH_t3lib."class.t3lib_tsparser_ext.php");
97
98 // Keywords mgm:
99 require_once (PATH_t3lib."class.t3lib_tcemain.php");
100
101
102
103 /**
104 * Indexing class for TYPO3 frontend
105 *
106 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
107 * @package TYPO3
108 * @subpackage tx_indexedsearch
109 */
110 class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
111
112 // Internal, dynamic:
113 var $allPhashListed = array(); // phash values accumulations for link to clear all
114 var $external_parsers = array(); // External content parsers - objects set here with file extensions as keys.
115 var $iconFileNameCache = array(); // File extensions - icon map/cache.
116 var $indexerObj; // Indexer object
117
118
119 /**
120 * Initialize menu array internally
121 *
122 * @return void
123 */
124 function modMenu() {
125 global $LANG;
126
127 return array (
128 'depth' => array(
129 0 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_0'),
130 1 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_1'),
131 2 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_2'),
132 3 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_3'),
133 999 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_infi'),
134 ),
135 'type' => array(
136 0 => 'Overview',
137 1 => 'Technical Details',
138 2 => 'Words and content',
139 // 3 => 'Indexing'
140 )
141 );
142 }
143
144 /**
145 * Produces main content of the module
146 *
147 * @return string HTML output
148 */
149 function main() {
150 // Initializes the module. Done in this function because we may need to re-initialize if data is submitted!
151 global $SOBE,$BE_USER,$LANG,$BACK_PATH,$TCA_DESCR,$TCA,$CLIENT,$TYPO3_CONF_VARS;
152
153 // Return if no page id:
154 if ($this->pObj->id<=0) return;
155
156 // Initialize max-list items
157 $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100;
158
159 // Processing deletion of phash rows:
160 if (t3lib_div::_GP('deletePhash')) {
161 $this->removeIndexedPhashRow(t3lib_div::_GP('deletePhash'));
162 }
163
164 // Processing stop-words:
165 if (t3lib_div::_POST('_stopwords')) {
166 $this->processStopWords(t3lib_div::_POST('stopWord'));
167 }
168
169 // Processing stop-words:
170 if (t3lib_div::_POST('_pageKeywords')) {
171 $this->processPageKeywords(t3lib_div::_POST('pageKeyword'), t3lib_div::_POST('pageKeyword_pageUid'));
172 }
173
174 // Initialize external document parsers:
175 // Example configuration, see ext_localconf.php of this file!
176 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) {
177 foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) {
178 $this->external_parsers[$extension] = &t3lib_div::getUserObj($_objRef);
179
180 // Init parser and if it returns false, unset its entry again:
181 if (!$this->external_parsers[$extension]->softInit($extension)) {
182 unset($this->external_parsers[$extension]);
183 }
184 }
185 }
186
187 // Initialize indexer if we need it (metaphone display does...)
188 $this->indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
189
190 // Set CSS styles specific for this document:
191 $this->pObj->content = str_replace('/*###POSTCSSMARKER###*/','
192 TABLE.c-list TR TD { white-space: nowrap; vertical-align: top; }
193 ',$this->pObj->content);
194
195
196 // Check if details for a phash record should be shown:
197 if (t3lib_div::_GET('phash')) {
198
199 // Show title / function menu:
200 $theOutput.=$this->pObj->doc->spacer(5);
201 $theOutput.=$this->pObj->doc->section('Details for a single result row:',$this->showDetailsForPhash(t3lib_div::_GET('phash')),0,1);
202 } elseif (t3lib_div::_GET('wid')) {
203
204 // Show title / function menu:
205 $theOutput.=$this->pObj->doc->spacer(5);
206 $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1);
207 } elseif (t3lib_div::_GET('metaphone')) {
208
209 // Show title / function menu:
210 $theOutput.=$this->pObj->doc->spacer(5);
211 $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1);
212 } elseif (t3lib_div::_GET('reindex')) {
213
214 // Show title / function menu:
215 $theOutput.=$this->pObj->doc->spacer(5);
216 $theOutput.=$this->pObj->doc->section('Reindexing...',$this->reindexPhash(t3lib_div::_GET('reindex'),t3lib_div::_GET('reindex_id')),0,1);
217 } else { // Detail listings:
218 // Depth function menu:
219 $h_func = t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[type]',$this->pObj->MOD_SETTINGS['type'],$this->pObj->MOD_MENU['type'],'index.php');
220 if (t3lib_div::inList('0,1,2',$this->pObj->MOD_SETTINGS['type'])) {
221 $h_func.= t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[depth]',$this->pObj->MOD_SETTINGS['depth'],$this->pObj->MOD_MENU['depth'],'index.php');
222
223 // Show title / function menu:
224 $theOutput.=$this->pObj->doc->spacer(5);
225 $theOutput.=$this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
226
227 $theOutput.=$this->drawTableOfIndexedPages();
228 } else {
229
230 // Show title / function menu:
231 $theOutput.= $this->pObj->doc->spacer(5);
232 $theOutput.= $this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
233
234 $theOutput.= $this->extraIndexing();
235 }
236 }
237
238 return $theOutput;
239 }
240
241
242
243
244
245
246
247
248
249
250
251 /*******************************
252 *
253 * Drawing table of indexed pages
254 *
255 ******************************/
256
257 /**
258 * Produces a table with indexing information for each page.
259 *
260 * @return string HTML output
261 */
262 function drawTableOfIndexedPages() {
263 global $BACK_PATH;
264
265 // Drawing tree:
266 $tree = t3lib_div::makeInstance('t3lib_pageTree');
267 $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1);
268 $tree->init('AND '.$perms_clause);
269
270 $HTML = '<img src="'.$BACK_PATH.t3lib_iconWorks::getIcon('pages',$this->pObj->pageinfo).'" width="18" height="16" align="top" alt="" />';
271 $tree->tree[] = Array(
272 'row' => $this->pObj->pageinfo,
273 'HTML' => $HTML
274 );
275
276 if ($this->pObj->MOD_SETTINGS['depth']) {
277 $tree->getTree($this->pObj->id, $this->pObj->MOD_SETTINGS['depth'], '');
278 }
279
280 // Traverse page tree:
281 $code = '';
282 foreach($tree->tree as $data) {
283 $code.= $this->indexed_info(
284 $data['row'],
285 $data['HTML'].
286 $this->showPageDetails(t3lib_div::fixed_lgd($data['row']['title'], 20),$data['row']['uid'])
287 );
288 }
289
290 if ($code) {
291 $code = '<br/><br/>
292 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
293 $this->printPhashRowHeader().
294 $code.
295 '</table>';
296
297 // Create section to output:
298 $theOutput.=$this->pObj->doc->section('',$code,0,1);
299 } else {
300 $theOutput.=$this->pObj->doc->section('','<br/><br/>'.$this->pObj->doc->icons(1).'There were no indexed pages found in the tree.<br/><br/>',0,1);
301 }
302
303 return $theOutput;
304 }
305
306 /**
307 * Create information table row for a page regarding indexing information.
308 *
309 * @param array Data array for this page
310 * @param string HTML content for first column (page tree icon etc.)
311 * @return string HTML code. (table row)
312 */
313 function indexed_info($data, $firstColContent) {
314
315 // Query:
316 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
317 'ISEC.*, IP.*, count(*) AS count_val',
318 'index_phash IP, index_section ISEC',
319 'IP.phash = ISEC.phash AND ISEC.page_id = '.intval($data['uid']),
320 'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.externalUrl,IP.recordUid,IP.freeIndexUid',
321 'IP.item_type, IP.tstamp',
322 ($this->maxListPerPage+1)
323 );
324
325 // Initialize variables:
326 $rowCount = 0;
327 $lines = array(); // Collecting HTML rows here.
328 $phashAcc = array(); // Collecting phash values (to remove local indexing for)
329 $phashAcc[] = 0;
330
331 // Traverse the result set of phash rows selected:
332 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
333 if ($rowCount == $this->maxListPerPage) {
334 $rowCount++; // Increase to the extra warning row will appear as well.
335 break;
336 }
337
338 // Adds a display row:
339 $lines[$row['phash_grouping']][] = $this->printPhashRow(
340 $row,
341 isset($lines[$row['phash_grouping']]),
342 $this->getGrListEntriesForPhash($row['phash'], $row['gr_list'])
343 );
344 $rowCount++;
345 $phashAcc[] = $row['phash'];
346 $this->allPhashListed[] = $row['phash']; // For removing all shown phash rows.
347 }
348
349 // Compile rows into the table:
350 $out = '';
351 $cellAttrib = ($data['_CSSCLASS'] ? ' class="'.$data['_CSSCLASS'].'"' : '');
352 if (count($lines)) {
353 $firstColContent = '<td rowspan="'.$rowCount.'"'.$cellAttrib.'>'.$firstColContent.'</td>';
354 foreach($lines as $rowSet) {
355 foreach($rowSet as $rows) {
356 $out.='
357 <tr class="bgColor-20">'.$firstColContent.implode('',$rows).'</tr>';
358
359 $firstColContent = '';
360 }
361 }
362
363 if ($rowCount > $this->maxListPerPage) { // Now checking greater than, because we increased $rowCount before...
364 $out.='
365 <tr class="bgColor-20">
366 <td>&nbsp;</td>
367 <td colspan="'.($this->returnNumberOfColumns()-1).'">'.$this->pObj->doc->icons(3).'<span class="">There were more than '.$this->maxListPerPage.' rows. <a href="'.htmlspecialchars('index.php?id='.$this->pObj->id.'&listALL=1').'">Click here to list them ALL!</a></span></td>
368 </tr>';
369 }
370 } else {
371 $out.='
372 <tr class="bgColor-20">
373 <td'.$cellAttrib.'>'.$firstColContent.'</td>
374 <td colspan="'.($this->returnNumberOfColumns()-1).'"><em>Not indexed</em></td>
375 </tr>';
376 }
377
378 // Checking for phash-rows which are NOT joined with the section table:
379 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('IP.*', 'index_phash IP', 'IP.data_page_id = '.intval($data['uid']).' AND IP.phash NOT IN ('.implode(',',$phashAcc).')');
380 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
381 $out.='
382 <tr class="typo3-red">
383 <td colspan="'.$this->returnNumberOfColumns().'"><b>Warning:</b> phash-row "'.$row['phash'].'" didn\'t have a representation in the index_section table!</td>
384 </tr>';
385 $this->allPhashListed[] = $row['phash'];
386 }
387
388 return $out;
389 }
390
391 /**
392 * Render a single row of information about a indexing entry.
393 *
394 * @param array Row from query (combined phash table with sections etc).
395 * @param boolean Set if grouped to previous result; the icon of the element is not shown again.
396 * @param array Array of index_grlist records.
397 * @return array Array of table rows.
398 * @see indexed_info()
399 */
400 function printPhashRow($row,$grouping=0,$extraGrListRows) {
401 $lines = array();
402
403 // Title cell attributes will highlight TYPO3 pages with a slightly darker color (bgColor4) than attached medias. Also IF there are more than one section record for a phash row it will be red as a warning that something is wrong!
404 $titleCellAttribs = $row['count_val']!=1?' bgcolor="red"':($row['item_type']==='0' ? ' class="bgColor4"' : '');
405
406 if ($row['item_type']) {
407 $arr = unserialize($row['cHashParams']);
408 $page = $arr['key'] ? ' ['.$arr['key'].']' : '';
409 } else $page = '';
410 $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($row['item_title']), 20).$page) : '<em>[No Title]</em>',$row['phash']);
411 $cmdLinks = $this->printRemoveIndexed($row['phash'],'Clear phash-row').$this->printReindex($row,'Re-index element');
412
413 switch($this->pObj->MOD_SETTINGS['type']) {
414 case 1: // Technical details:
415 // Display icon:
416 if (!$grouping) {
417 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
418 } else {
419 $lines[] = '<td>&nbsp;</td>';
420 }
421
422 // Title displayed:
423 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
424
425 // Remove-indexing-link:
426 $lines[] = '<td>'.$cmdLinks.'</td>';
427
428 // Various data:
429 $lines[] = '<td>'.$row['phash'].'</td>';
430 $lines[] = '<td>'.$row['contentHash'].'</td>';
431
432 if ($row['item_type']==='0') {
433 $lines[] = '<td>'.($row['data_page_id'] ? $row['data_page_id'] : '&nbsp;').'</td>';
434 $lines[] = '<td>'.($row['data_page_type'] ? $row['data_page_type'] : '&nbsp;').'</td>';
435 $lines[] = '<td>'.($row['sys_language_uid'] ? $row['sys_language_uid'] : '&nbsp;').'</td>';
436 $lines[] = '<td>'.($row['data_page_mp'] ? $row['data_page_mp'] : '&nbsp;').'</td>';
437 } else {
438 $lines[] = '<td colspan="4">'.htmlspecialchars($row['data_filename']).'</td>';
439 }
440 $lines[] = '<td>'.$row['gr_list'].$this->printExtraGrListRows($extraGrListRows).'</td>';
441 $lines[] = '<td>'.$this->printRootlineInfo($row).'</td>';
442 $lines[] = '<td>'.($row['page_id'] ? $row['page_id'] : '&nbsp;').'</td>';
443 $lines[] = '<td>'.($row['phash_t3']!=$row['phash'] ? $row['phash_t3'] : '&nbsp;').'</td>';
444 $lines[] = '<td>'.($row['freeIndexUid'] ? $row['freeIndexUid'] : '&nbsp;').'</td>';
445 $lines[] = '<td>'.($row['recordUid'] ? $row['recordUid'] : '&nbsp;').'</td>';
446
447
448
449 // cHash parameters:
450 $arr = unserialize($row['cHashParams']);
451 if (is_array($arr)) {
452 $theCHash = $arr['cHash'];
453 unset($arr['cHash']);
454 }
455
456 if ($row['item_type']) { // pdf...
457 $lines[] = '<td>'.($arr['key'] ? 'Page '.$arr['key'] : '').'&nbsp;</td>';
458 } elseif ($row['item_type']==0) {
459 $lines[] = '<td>'.htmlspecialchars(t3lib_div::implodeArrayForUrl('',$arr)).'&nbsp;</td>';
460 } else {
461 $lines[] = '<td class="bgColor">&nbsp;</td>';
462 }
463
464 $lines[] = '<td>'.$theCHash.'</td>';
465 break;
466 case 2: // Words and content:
467 // Display icon:
468 if (!$grouping) {
469 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
470 } else {
471 $lines[] = '<td>&nbsp;</td>';
472 }
473
474 // Title displayed:
475 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
476
477 // Remove-indexing-link:
478 $lines[] = '<td>'.$cmdLinks.'</td>';
479
480 // Query:
481 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
482 '*',
483 'index_fulltext',
484 'phash = '.intval($row['phash'])
485 );
486 $lines[] = '<td style="white-space: normal;">'.
487 htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($ftrows[0]['fulltextdata']),3000)).
488 '<hr/><em>Size: '.strlen($ftrows[0]['fulltextdata']).'</em>'.
489 '</td>';
490
491 // Query:
492 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
493 'index_words.baseword, index_rel.*',
494 'index_rel, index_words',
495 'index_rel.phash = '.intval($row['phash']).
496 ' AND index_words.wid = index_rel.wid',
497 '',
498 '',
499 '',
500 'baseword'
501 );
502
503 $wordList = '';
504 if (is_array($ftrows)) {
505 $indexed_words = array_keys($ftrows);
506 sort($indexed_words);
507 $wordList = htmlspecialchars($this->utf8_to_currentCharset(implode(' ',$indexed_words)));
508 $wordList.='<hr/><em>Count: '.count($indexed_words).'</em>';
509 }
510
511 $lines[] = '<td style="white-space: normal;">'.$wordList.'</td>';
512 break;
513 default: // Overview
514 // Display icon:
515 if (!$grouping) {
516 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
517 } else {
518 $lines[] = '<td>&nbsp;</td>';
519 }
520
521 // Title displayed:
522 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
523
524 // Remove-indexing-link:
525 $lines[] = '<td>'.$cmdLinks.'</td>';
526
527 $lines[] = '<td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset($row['item_description'])).'...</td>';
528 $lines[] = '<td>'.t3lib_div::formatSize($row['item_size']).'</td>';
529 $lines[] = '<td>'.t3lib_BEfunc::dateTimeAge($row['tstamp']).'</td>';
530 break;
531 }
532
533 return $lines;
534 }
535
536 /**
537 * Creates the header row for the table
538 *
539 * @return string HTML string (table row)
540 */
541 function printPhashRowHeader() {
542 $lines = array();
543
544 switch($this->pObj->MOD_SETTINGS['type']) {
545 case 1:
546 $lines[] = '<td>&nbsp;</td>';
547 $lines[] = '<td>&nbsp;</td>';
548 $lines[] = '<td>Title</td>';
549 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
550
551 $lines[] = '<td>pHash</td>';
552 $lines[] = '<td>cHash</td>';
553 $lines[] = '<td>&amp;id</td>';
554 $lines[] = '<td>&amp;type</td>';
555 $lines[] = '<td>&amp;L</td>';
556 $lines[] = '<td>&amp;MP</td>';
557 $lines[] = '<td>grlist</td>';
558 $lines[] = '<td>Rootline</td>';
559 $lines[] = '<td>page_id</td>';
560 $lines[] = '<td>phash_t3</td>';
561 $lines[] = '<td>CfgUid</td>';
562 $lines[] = '<td>RecUid</td>';
563 $lines[] = '<td>GET-parameters</td>';
564 $lines[] = '<td>&amp;cHash</td>';
565 break;
566 case 2:
567 $lines[] = '<td>&nbsp;</td>';
568 $lines[] = '<td>&nbsp;</td>';
569 $lines[] = '<td>Title</td>';
570 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
571 $lines[] = '<td>Content<br/>
572 <img src="clear.gif" width="300" height="1" alt="" /></td>';
573 $lines[] = '<td>Words<br/>
574 <img src="clear.gif" width="300" height="1" alt="" /></td>';
575 break;
576 default:
577 $lines[] = '<td>&nbsp;</td>';
578 $lines[] = '<td>&nbsp;</td>';
579 $lines[] = '<td>Title</td>';
580 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
581 $lines[] = '<td>Description</td>';
582 $lines[] = '<td>Size</td>';
583 $lines[] = '<td>Indexed:</td>';
584 break;
585 }
586
587 $out = '<tr class="tableheader bgColor5">'.implode('',$lines).'</tr>';
588 return $out;
589 }
590
591 /**
592 * Returns the number of columns depending on display type of list
593 *
594 * @return integer Number of columns in list:
595 */
596 function returnNumberOfColumns() {
597 switch($this->pObj->MOD_SETTINGS['type']) {
598 case 1:
599 return 18;
600 break;
601 case 2:
602 return 6;
603 break;
604 default:
605 return 7;
606 break;
607 }
608 }
609
610
611
612
613
614
615
616
617
618
619
620 /*******************************
621 *
622 * Details display, phash row
623 *
624 *******************************/
625
626 /**
627 * Showing details for a particular phash row
628 *
629 * @param integer phash value to display details for.
630 * @return string HTML content
631 */
632 function showDetailsForPhash($phash) {
633
634 $content = '';
635
636 // Selects the result row:
637 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
638 '*',
639 'index_phash',
640 'phash = '.intval($phash)
641 );
642 $phashRecord = $ftrows[0];
643
644 // If found, display:
645 if (is_array($phashRecord)) {
646 $content.= '<h4>phash row content:</h4>'.
647 $this->utf8_to_currentCharset(t3lib_div::view_array($phashRecord));
648
649 // Getting debug information if any:
650 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
651 '*',
652 'index_debug',
653 'phash = '.intval($phash)
654 );
655 if (is_array($ftrows)) {
656 $debugInfo = unserialize($ftrows[0]['debuginfo']);
657 $lexer = $debugInfo['lexer'];
658 unset($debugInfo['lexer']);
659
660 $content.= '<h3>Debug information:</h3>'.
661 $this->utf8_to_currentCharset(t3lib_div::view_array($debugInfo));
662
663 $content.= '<h4>Debug information / lexer splitting:</h4>'.
664 '<hr/><b>'.
665 $this->utf8_to_currentCharset($lexer).
666 '</b><hr/>';
667 }
668
669
670
671 $content.='<h3>Word statistics</h3>';
672
673 // Finding all words for this phash:
674 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
675 'index_words.*, index_rel.*',
676 'index_rel, index_words',
677 'index_rel.phash = '.intval($phash).
678 ' AND index_words.wid = index_rel.wid',
679 '',
680 'index_words.baseword',
681 ''
682 );
683 $pageRec = t3lib_BEfunc::getRecord('pages', $phashRecord['data_page_id']);
684 $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin();
685 $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec);
686
687 // Group metaphone hash:
688 $metaphone = array();
689 foreach($ftrows as $row) {
690 $metaphone[$row['metaphone']][] = $row['baseword'];
691 }
692 $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:');
693
694 // Finding top-20 on frequency for this phash:
695 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
696 'index_words.baseword, index_words.metaphone, index_rel.*',
697 'index_rel, index_words',
698 'index_rel.phash = '.intval($phash).
699 ' AND index_words.wid = index_rel.wid
700 AND index_words.is_stopword=0',
701 '',
702 'index_rel.freq DESC',
703 '20'
704 );
705 $content.= $this->listWords($ftrows, 'Top-20 words by frequency:', 2);
706
707 // Finding top-20 on count for this phash:
708 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
709 'index_words.baseword, index_words.metaphone, index_rel.*',
710 'index_rel, index_words',
711 'index_rel.phash = '.intval($phash).
712 ' AND index_words.wid = index_rel.wid
713 AND index_words.is_stopword=0',
714 '',
715 'index_rel.count DESC',
716 '20'
717 );
718 $content.= $this->listWords($ftrows, 'Top-20 words by count:', 2);
719
720
721 $content.='<h3>Section records for this phash</h3>';
722
723 // Finding sections for this record:
724 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
725 '*',
726 'index_section',
727 'index_section.phash = '.intval($phash),
728 '',
729 '',
730 ''
731 );
732 $content.= t3lib_div::view_array($ftrows);
733
734 // Add go-back link:
735 $content = $this->linkList().$content.$this->linkList();
736
737 } else $content.= 'Error: No phash row found';
738
739 return $content;
740 }
741
742 /**
743 * Create table with list of words from $ftrows
744 *
745 * @param array Array of records selected from index_rel/index_words
746 * @param string Header string to show before table.
747 * @param boolean If set, the stopWord checkboxes will be shown in the word list. Only for admins. (because it is a global setting, not per-site).
748 * @param array The page record from which to load the keywords, if any.
749 * @return string HTML table
750 */
751 function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='') {
752
753 // Prepare keywords:
754 $keywords = is_array($page) ? array_flip(t3lib_div::trimExplode(',',$page['keywords'], 1)) : '';
755
756 // Render list:
757 $trows = '';
758 $trows.= '
759 <tr class="tableheader bgColor5">
760 '.($stopWordBoxes ? '<td>'.htmlspecialchars('Stopword:').'</td>' : '').'
761 <td>'.htmlspecialchars('Word:').'</td>
762 <td>'.htmlspecialchars('Count:').'</td>
763 <td>'.htmlspecialchars('First:').'</td>
764 <td>'.htmlspecialchars('Frequency:').'</td>
765 <td>'.htmlspecialchars('Flags:').'</td>
766 '.(is_array($keywords) ? '<td>'.htmlspecialchars('Page keyword:').'</td>' : '').'
767 </tr>
768 ';
769 foreach($ftrows as $row) {
770 $hiddenField = $stopWordBoxes!=2 ? '<input type="hidden" name="stopWord['.$row['wid'].']" value="0" />' : '';
771 $trows.= '
772 <tr class="'.($row['is_stopword'] ? 'bgColor' : 'bgColor4').'">
773 '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'>'.$hiddenField.'<input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').'
774 <td>'.$this->linkWordDetails(htmlspecialchars($this->utf8_to_currentCharset($row['baseword'])), $row['wid']).'</td>
775 <td>'.htmlspecialchars($row['count']).'</td>
776 <td>'.htmlspecialchars($row['first']).'</td>
777 <td>'.htmlspecialchars($row['freq']).'</td>
778 <td>'.htmlspecialchars($this->flagsMsg($row['flags'])).'</td>
779 '.(is_array($keywords) ? '<td align="center"'.(isset($keywords[$row['baseword']]) ? ' class="bgColor2"' : '').'><input type="hidden" name="pageKeyword['.$row['baseword'].']" value="0" /><input type="checkbox" name="pageKeyword['.$row['baseword'].']" value="1"'.(isset($keywords[$row['baseword']])?'checked="checked"':'').' /></td>' : '').'
780 </tr>
781 ';
782 }
783
784 return '<h4>'.htmlspecialchars($header).'</h4>'.
785 '
786 <table border="0" cellspacing="1" cellpadding="2" class="c-list">
787 '.$trows.'
788 </table>'.
789 ($stopWordBoxes ? '<input type="submit" value="Change stop-word settings" name="_stopwords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" />' : '').
790 (is_array($keywords) ? '<input type="submit" value="Set page keywords" name="_pageKeywords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" /><input type="hidden" name="pageKeyword_pageUid" value="'.$page['uid'].'" />'.
791 '<br/>Current keywords are: <em>'.htmlspecialchars(implode(', ',array_keys($keywords))).'</em>' : '');
792 }
793
794 /**
795 * Displays table of metaphone groups larger than 1
796 *
797 * @param array Result from word selection (index_rel/index_words)
798 * @param string Header string
799 * @return string HTML table
800 */
801 function listMetaphoneStat($ftrows,$header) {
802
803 $trows = '';
804 $trows.= '
805 <tr class="tableheader bgColor5">
806 <td>'.htmlspecialchars('Metaphone:').'</td>
807 <td>'.htmlspecialchars('Hash:').'</td>
808 <td>'.htmlspecialchars('Count:').'</td>
809 <td>'.htmlspecialchars('Words:').'</td>
810 </tr>
811 ';
812 foreach($ftrows as $metaphone => $words) {
813 if (count($words)>1) {
814 $trows.= '
815 <tr class="bgColor4">
816 <td>'.$this->linkMetaPhoneDetails($this->indexerObj->metaphone($words[0],1),$metaphone).'</td>
817 <td>'.htmlspecialchars($metaphone).'</td>
818 <td>'.htmlspecialchars(count($words)).'</td>
819 <td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset(implode(', ',$words))).'</td>
820 </tr>
821 ';
822 }
823 }
824
825 return '<h4>'.htmlspecialchars($header).'</h4>'.
826 '<table border="0" cellspacing="1" cellpadding="2" class="c-list">
827 '.$trows.'
828 </table>';
829 }
830
831 /**
832 * Wraps input string in a link that will display details for the word. Eg. which other pages has the word, metaphone associations etc.
833 *
834 * @param string String to wrap, possibly a title or so.
835 * @param integer wid value to show details for
836 * @return string Wrapped string
837 */
838 function linkWordDetails($string,$wid) {
839 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('wid'=>$wid,'phash'=>''))).'">'.$string.'</a>';
840 }
841
842
843 /**
844 * Wraps input string in a link to see more details for metaphone value
845 *
846 * @param string String to wrap
847 * @param integer Metaphone value
848 * @return string Wrapped string
849 */
850 function linkMetaPhoneDetails($string,$metaphone) {
851 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('metaphone'=>$metaphone,'wid'=>'','phash'=>''))).'">'.$string.'</a>';
852 }
853
854 /**
855 * Creates message for flag value
856 *
857 * @param integer Flags integer
858 * @return string Message string
859 */
860 function flagsMsg($flags) {
861 if ($flags > 0) {
862 return
863 ($flags & 128 ? '<title>' : ''). // pow(2,7)
864 ($flags & 64 ? '<meta/keywords>' : ''). // pow(2,6)
865 ($flags & 32 ? '<meta/description>' : ''). // pow(2,5)
866 ' ('.$flags.')';
867 }
868 }
869
870
871
872
873
874
875
876
877
878
879 /*******************************
880 *
881 * Details display, words / metaphone
882 *
883 *******************************/
884
885 /**
886 * Show details for words
887 *
888 * @param integer Word ID (wid)
889 * @return string HTML content
890 */
891 function showDetailsForWord($wid) {
892
893 // Select references to this word
894 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
895 'index_phash.*, index_section.*, index_rel.*',
896 'index_rel, index_section, index_phash',
897 'index_rel.wid = '.intval($wid).
898 ' AND index_rel.phash = index_section.phash'.
899 ' AND index_section.phash = index_phash.phash',
900 '',
901 'index_rel.freq DESC',
902 ''
903 );
904
905 // Headers:
906 $content.='
907 <tr class="tableheader bgColor5">
908 <td>phash</td>
909 <td>page_id</td>
910 <td>data_filename</td>
911 <td>count</td>
912 <td>first</td>
913 <td>freq</td>
914 <td>flags</td>
915 </tr>';
916
917 if (is_array($ftrows)) {
918 foreach($ftrows as $wDat) {
919 $content.='
920 <tr class="bgColor4">
921 <td>'.$this->linkDetails(htmlspecialchars($wDat['phash']),$wDat['phash']).'</td>
922 <td>'.htmlspecialchars($wDat['page_id']).'</td>
923 <td>'.htmlspecialchars($wDat['data_filename']).'</td>
924 <td>'.htmlspecialchars($wDat['count']).'</td>
925 <td>'.htmlspecialchars($wDat['first']).'</td>
926 <td>'.htmlspecialchars($wDat['freq']).'</td>
927 <td>'.htmlspecialchars($wDat['flags']).'</td>
928 </tr>';
929 }
930 }
931
932 // Compile table:
933 $content = '
934 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
935 $content.'
936 </table>';
937
938 // Add go-back link:
939 $content = $content.$this->linkList();
940
941 return $content;
942 }
943
944 /**
945 * Show details for metaphone value
946 *
947 * @param integer Metaphone integer hash
948 * @return string HTML content
949 */
950 function showDetailsForMetaphone($metaphone) {
951
952 // Finding top-20 on frequency for this phash:
953 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
954 'index_words.*',
955 'index_words',
956 'index_words.metaphone = '.intval($metaphone),
957 '',
958 'index_words.baseword',
959 ''
960 );
961
962 if (count($ftrows)) {
963 $content.='<h4>Metaphone: '.$this->indexerObj->metaphone($ftrows[0]['baseword'],1).'</h4>';
964
965 $content.='
966 <tr class="tableheader bgColor5">
967 <td>Word</td>
968 <td>Is stopword?</td>
969 </tr>';
970
971 if (is_array($ftrows)) {
972 foreach($ftrows as $wDat) {
973 $content.='
974 <tr class="bgColor4">
975 <td>'.$this->linkWordDetails(htmlspecialchars($wDat['baseword']),$wDat['wid']).'</td>
976 <td>'.htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No').'</td>
977 </tr>';
978 }
979 }
980
981 $content = '
982 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
983 $content.'
984 </table>';
985
986 if ($this->indexerObj->metaphone($ftrows[0]['baseword'])!=$metaphone) {
987 $content.='ERROR: Metaphone string and hash did not match for some reason!?';
988 }
989
990 // Add go-back link:
991 $content = $content.$this->linkList();
992 }
993
994 return $content;
995 }
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 /*******************************
1009 *
1010 * Helper functions
1011 *
1012 *******************************/
1013
1014 /**
1015 * Creates icon which clears indexes for a certain list of phash values.
1016 *
1017 * @param string List of phash integers
1018 * @param string Alt-text for the garbage bin icon.
1019 * @return string HTML img-tag with link around.
1020 */
1021 function printRemoveIndexed($phash,$alt) {
1022 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('deletePhash'=>$phash))).'">'.
1023 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/garbage.gif" width="11" hspace="1" vspace="2" height="12" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
1024 '</a>';
1025 }
1026
1027 /**
1028 * Button for re-indexing of documents
1029 *
1030 * @param array phash table result row.
1031 * @param string Title attribute text for icon
1032 * @return string HTML content; Icon wrapped in link.
1033 */
1034 function printReindex($resultRow,$alt) {
1035 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') {
1036 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('reindex'=>$resultRow['phash'],'reindex_id'=>$resultRow['page_id']))).'">'.
1037 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/refresh_n.gif" width="14" hspace="1" vspace="2" height="14" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
1038 '</a>';
1039 }
1040 }
1041
1042 /**
1043 * Wraps input string in a link that will display details for the phash value set.
1044 *
1045 * @param string String to wrap, possibly a title or so.
1046 * @param integer phash value to show details for
1047 * @return string Wrapped string
1048 */
1049 function linkDetails($string,$phash) {
1050 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('phash'=>$phash))).'">'.$string.'</a>';
1051 }
1052
1053 /**
1054 * Creates link back to listing
1055 *
1056 * @return string Link back to list
1057 */
1058 function linkList() {
1059 return '<br/><a href="index.php?id='.$this->pObj->id.'">Back to list.</a><br/>';
1060 }
1061
1062 /**
1063 * Wraps input string in a link that will display details for the phash value set.
1064 *
1065 * @param string String to wrap, possibly a title or so.
1066 * @param integer phash value to show details for
1067 * @return string Wrapped string
1068 */
1069 function showPageDetails($string,$id) {
1070 return '<a href="'.htmlspecialchars('index.php?id='.$id.'&SET[depth]=0&SET[type]=1').'">'.$string.'</a>';
1071 }
1072
1073 /**
1074 * Prints the gr_lists attached to a indexed entry.
1075 *
1076 * @param array Array of index_grlist records
1077 * @return string HTML code.
1078 */
1079 function printExtraGrListRows($extraGrListRows) {
1080 if (count($extraGrListRows)) {
1081 reset($extraGrListRows);
1082 $lines=array();
1083 while(list(,$r)=each($extraGrListRows)) {
1084 $lines[] = $r['gr_list'];
1085 }
1086 return "<br/>".$GLOBALS['TBE_TEMPLATE']->dfw(implode('<br/>',$lines));
1087 }
1088 }
1089
1090 /**
1091 * Print path for indexing
1092 *
1093 * @param array Result row with content from index_section
1094 * @return string Rootline information
1095 */
1096 function printRootlineInfo($row) {
1097 $uidCollection = array();
1098
1099 if ($row['rl0']) {
1100 $uidCollection[0] = $row['rl0'];
1101 if ($row['rl1']) {
1102 $uidCollection[1] = $row['rl1'];
1103 if ($row['rl2']) {
1104 $uidCollection[2] = $row['rl2'];
1105
1106 // Additional levels:
1107 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) {
1108 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
1109 if ($row[$fieldName]) {
1110 $uidCollection[$rootLineLevel] = $row[$fieldName];
1111 }
1112 }
1113 }
1114 }
1115 }
1116 }
1117
1118 // Return root line.
1119 ksort($uidCollection);
1120 return implode('/',$uidCollection);
1121 }
1122
1123 /**
1124 * Return icon for file extension
1125 *
1126 * @param string File extension / item type
1127 * @param string Title attribute value in icon.
1128 * @return string <img> tag for icon
1129 */
1130 function makeItemTypeIcon($it,$alt='') {
1131 if (!isset($this->iconFileNameCache[$it])) {
1132 if ($it==='0') {
1133 $icon = 'EXT:indexed_search/pi/res/pages.gif';
1134 } elseif ($this->external_parsers[$it]) {
1135 $icon = $this->external_parsers[$it]->getIcon($it);
1136 }
1137
1138 $fullPath = t3lib_div::getFileAbsFileName($icon);
1139
1140 if ($fullPath) {
1141 $info = @getimagesize($fullPath);
1142 $iconPath = $GLOBALS['BACK_PATH'].'../'.substr($fullPath,strlen(PATH_site));
1143 $this->iconFileNameCache[$it] = is_array($info) ? '<img src="'.$iconPath.'" '.$info[3].' title="###TITLE_ATTRIBUTE###" alt="" />' : '';
1144 }
1145 }
1146 return str_replace('###TITLE_ATTRIBUTE###',htmlspecialchars($it.': '.$alt),$this->iconFileNameCache[$it]);
1147 }
1148
1149 /**
1150 * Converts the input string from utf-8 to the backend charset.
1151 *
1152 * @param string String to convert (utf-8)
1153 * @return string Converted string (backend charset if different from utf-8)
1154 */
1155 function utf8_to_currentCharset($string) {
1156 global $LANG;
1157 if ($LANG->charSet != 'utf-8') {
1158 $string = $LANG->csConvObj->utf8_decode($string, $LANG->charSet, TRUE);
1159 }
1160 return $string;
1161 }
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174 /********************************
1175 *
1176 * Reindexing
1177 *
1178 *******************************/
1179
1180 /**
1181 * Re-indexing files/records attached to a page.
1182 *
1183 * @param integer Phash value
1184 * @param integer The page uid for the section record (file/url could appear more than one place you know...)
1185 * @return string HTML content
1186 */
1187 function reindexPhash($phash, $pageId) {
1188
1189 // Query:
1190 list($resultRow) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1191 'ISEC.*, IP.*',
1192 'index_phash IP, index_section ISEC',
1193 'IP.phash = ISEC.phash
1194 AND IP.phash = '.intval($phash).'
1195 AND ISEC.page_id = '.intval($pageId)
1196 );
1197
1198 $content = '';
1199 if (is_array($resultRow)) {
1200 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') {
1201
1202 // (Re)-Indexing file on page.
1203 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1204 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $this->getUidRootLineForClosestTemplate($pageId));
1205
1206 // URL or local file:
1207 if ($resultRow['externalUrl']) {
1208 $indexerObj->indexExternalUrl($resultRow['data_filename']);
1209 } else {
1210 $indexerObj->indexRegularDocument($resultRow['data_filename'], TRUE);
1211 }
1212
1213 if ($indexerObj->file_phash_arr['phash'] != $resultRow['phash']) {
1214 $content.= 'ERROR: phash ('.$indexerObj->file_phash_arr['phash'].') did NOT match '.$resultRow['phash'].' for strange reasons!';
1215 }
1216
1217 $content.='<h4>Log for re-indexing of "'.htmlspecialchars($resultRow['data_filename']).'":</h4>';
1218 $content.=t3lib_div::view_array($indexerObj->internal_log);
1219
1220 $content.='<h4>Hash-array, page:</h4>';
1221 $content.=t3lib_div::view_array($indexerObj->hash);
1222
1223 $content.='<h4>Hash-array, file:</h4>';
1224 $content.=t3lib_div::view_array($indexerObj->file_phash_arr);
1225 }
1226 }
1227
1228 // Link back to list.
1229 $content.= $this->linkList();
1230
1231 return $content;
1232 }
1233
1234 /**
1235 * Get rootline for closest TypoScript template root.
1236 * Algorithm same as used in Web > Template, Object browser
1237 *
1238 * @param integer The page id to traverse rootline back from
1239 * @return array Array where the root lines uid values are found.
1240 */
1241 function getUidRootLineForClosestTemplate($id) {
1242 $tmpl = t3lib_div::makeInstance("t3lib_tsparser_ext"); // Defined global here!
1243 $tmpl->tt_track = 0; // Do not log time-performance information
1244 $tmpl->init();
1245
1246 // Gets the rootLine
1247 $sys_page = t3lib_div::makeInstance("t3lib_pageSelect");
1248 $rootLine = $sys_page->getRootLine($id);
1249 $tmpl->runThroughTemplates($rootLine,0); // This generates the constants/config + hierarchy info for the template.
1250
1251 // Root line uids
1252 $rootline_uids = array();
1253 foreach($tmpl->rootLine as $rlkey => $rldat) {
1254 $rootline_uids[$rlkey] = $rldat['uid'];
1255 }
1256
1257 return $rootline_uids;
1258 }
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271 /********************************
1272 *
1273 * Indexing of configurations
1274 *
1275 *******************************/
1276
1277 /**
1278 * [Describe function...]
1279 *
1280 * @return [type] ...
1281 */
1282 function extraIndexing() {
1283
1284 // Select index configurations on this page
1285 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1286 '*',
1287 'index_config',
1288 'pid = '.intval($this->pObj->id).
1289 ' AND hidden=0'.
1290 ' AND starttime<'.time()
1291 );
1292
1293
1294 $rl = $this->getUidRootLineForClosestTemplate($this->pObj->id);
1295
1296 foreach($ftrows as $cfgRow) {
1297 switch($cfgRow['type']) {
1298 case 1:
1299 if ($cfgRow['table2index'] && isset($GLOBALS['TCA'][$cfgRow['table2index']])) {
1300
1301 // Init:
1302 $pid = intval($cfgRow['alternative_source_pid']) ? intval($cfgRow['alternative_source_pid']) : $this->pObj->id;
1303 $fieldList = t3lib_div::trimExplode(',',$cfgRow['fieldlist'],1);
1304
1305 // Select
1306 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1307 '*',
1308 $cfgRow['table2index'],
1309 'pid = '.intval($pid)
1310 );
1311
1312 // Traverse:
1313 foreach($recs as $r) {
1314 // (Re)-Indexing a row from a table:
1315 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1316 parse_str(str_replace('###UID###',$r['uid'],$cfgRow['get_params']),$GETparams);
1317 $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl, $GETparams, $cfgRow['chashcalc'] ? TRUE : FALSE);
1318 $indexerObj->backend_setFreeIndexUid($cfgRow['uid']);
1319
1320 $theContent = '';
1321 foreach($fieldList as $k => $v) {
1322 if (!$k) {
1323 $theTitle = $r[$v];
1324 } else {
1325 $theContent.= $r[$v].' ';
1326 }
1327 }
1328 #debug($theContent,$theTitle);
1329 $indexerObj->backend_indexAsTYPO3Page(
1330 $theTitle,
1331 '',
1332 '',
1333 $theContent,
1334 $GLOBALS['LANG']->charSet,
1335 $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['tstamp']],
1336 $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['crdate']],
1337 $r['uid']
1338 );
1339
1340 }
1341 #debug($recs);
1342 }
1343 break;
1344 case 2:
1345 $readpath = $cfgRow['filepath'];
1346 if (!t3lib_div::isAbsPath($readPath)) {
1347 $readpath = t3lib_div::getFileAbsFileName($readpath);
1348 }
1349 #debug($readpath,'$readpath');
1350
1351 if (t3lib_div::isAllowedAbsPath($readpath)) {
1352 $extList = implode(',',t3lib_div::trimExplode(',',$cfgRow['extensions'],1));
1353 $fileArr = array();
1354 $files = t3lib_div::getAllFilesAndFoldersInPath($fileArr,$readpath,$extList,0,$cfgRow['depth']);
1355 $files = t3lib_div::removePrefixPathFromList($files,PATH_site);
1356 #debug($files);
1357 foreach($files as $path) {
1358 // (Re)-Indexing file on page.
1359 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1360 $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl);
1361 $indexerObj->backend_setFreeIndexUid($cfgRow['uid']);
1362 $indexerObj->hash['phash'] = -1; // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
1363
1364 $indexerObj->indexRegularDocument($path, TRUE);
1365
1366 #debug($indexerObj->internal_log,$resultRow['data_filename']);
1367 #debug($indexerObj->file_phash_arr,'file_phash_arr');
1368 #debug($indexerObj->hash,'hash');
1369
1370 }
1371 }
1372 break;
1373 case 3:
1374 if ($cfgRow['externalUrl']) {
1375 $this->indexExtUrlRecursively($cfgRow['externalUrl'], $cfgRow['depth'], $this->pObj->id, $rl, $cfgRow['uid']);
1376 }
1377 break;
1378 }
1379 }
1380 }
1381
1382 /**
1383 * Indexing URL recursively
1384 * Still needs some work; eg. paramters to type, language, MP var is not passed yet...
1385 *
1386 * @param string URL, http://....
1387 * @param integer Depth of recursion. 0 (zero) = only input URL
1388 * @param integer Page id to relate indexing to.
1389 * @param array Rootline array to relate indexing to
1390 * @param integer Configuration UID
1391 * @return void
1392 */
1393 function indexExtUrlRecursively($url, $depth, $pageId, $rl, $cfgUid) {
1394
1395 // Index external URL:
1396 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1397 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
1398 $indexerObj->backend_setFreeIndexUid($cfgUid);
1399
1400 $indexerObj->indexExternalUrl($url);
1401 $url_qParts = parse_url($url);
1402
1403 // Recursion:
1404 if ($depth>0) {
1405 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
1406
1407 // Traverse links:
1408 foreach($list as $count => $linkInfo) {
1409
1410 // Decode entities:
1411 $linkSource = t3lib_div::htmlspecialchars_decode($linkInfo['href']);
1412
1413 $qParts = parse_url($linkSource);
1414 if (!$qParts['scheme']) {
1415 $linkSource = $url_qParts['scheme'].'://'.$url_qParts['host'].'/'.$linkSource;
1416 }
1417
1418 $this->indexExtUrlRecursively($linkSource, $depth-1, $pageId, $rl, $cfgUid);
1419
1420 // Temporary limit until we know how to handle hundreds of URLs with limited parsetime in PHP...
1421 if ($count>3) break;
1422 }
1423 }
1424 }
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437 /********************************
1438 *
1439 * SQL functions
1440 *
1441 *******************************/
1442
1443 /**
1444 * Removes ALL data regarding a certain list of indexed phash-row
1445 *
1446 * @param string List of phash integers
1447 * @param boolean If set, page cache is cleared as well.
1448 * @return void
1449 */
1450 function removeIndexedPhashRow($phashList,$clearPageCache=1) {
1451 $phashRows = t3lib_div::trimExplode(',',$phashList,1);
1452
1453 foreach($phashRows as $phash) {
1454 $phash = intval($phash);
1455 if ($phash>0) {
1456
1457 if ($clearPageCache) {
1458 // Clearing page cache:
1459 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('page_id', 'index_section', 'phash='.intval($phash));
1460 if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
1461 $idList = array();
1462 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1463 $idList[] = $row['page_id'];
1464 }
1465 $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pages', 'page_id IN ('.implode(',',$GLOBALS['TYPO3_DB']->cleanIntArray($idList)).')');
1466 }
1467 }
1468
1469 // Removing old registrations for all tables.
1470 $tableArr = explode(',','index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug');
1471 foreach($tableArr as $table) {
1472 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1473 }
1474
1475 // Did not remove any index_section records for external files where phash_t3 points to this hash!
1476 }
1477 }
1478 }
1479
1480 /**
1481 * Returns an array with gr_list records for a phash
1482 *
1483 * @param integer phash integer to look up on
1484 * @param string gr_list string to filter OUT of the result (first occurence)
1485 * @return array Array of records from index_grlist table
1486 */
1487 function getGrListEntriesForPhash($phash,$gr_list) {
1488 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_grlist', 'phash='.intval($phash));
1489 $lines = array();
1490 $isRemoved = 0;
1491 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1492 if (!$isRemoved && !strcmp($row['gr_list'],$gr_list)) {
1493 $isRemoved = 1;
1494 } else {
1495 $lines[] = $row;
1496 }
1497 }
1498 return $lines;
1499 }
1500
1501 /**
1502 * Setting / Unsetting stopwords
1503 *
1504 * @param array Array of stop-words WIDs with 0/1 to set / unset
1505 * @return void
1506 */
1507 function processStopWords($stopWords) {
1508
1509 if ($GLOBALS['BE_USER']->isAdmin()) {
1510 // Traverse words
1511 foreach($stopWords as $wid => $state) {
1512 $fieldArray = array(
1513 'is_stopword' => $state
1514 );
1515 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_words', 'wid='.$wid, $fieldArray);
1516 }
1517 }
1518 }
1519
1520 /**
1521 * Setting / Unsetting keywords in page header
1522 *
1523 * @param array Page keywords as keys in array with value 0 or 1 for set or unset.
1524 * @param integer The page uid of the header where the keywords are to be set.
1525 * @return void
1526 */
1527 function processPageKeywords($pageKeywords, $pageUid) {
1528
1529 // Get pages current keywords
1530 $pageRec = t3lib_BEfunc::getRecord('pages', $pageUid);
1531 $keywords = array_flip(t3lib_div::trimExplode(',', $pageRec['keywords'], 1));
1532
1533 // Merge keywords:
1534 foreach($pageKeywords as $key => $v) {
1535 if ($v) {
1536 $keywords[$key]=1;
1537 } else {
1538 unset($keywords[$key]);
1539 }
1540 }
1541
1542 // Compile new list:
1543 $data = array();
1544 $data['pages'][$pageUid]['keywords'] = implode(', ',array_keys($keywords));
1545
1546 $tce = t3lib_div::makeInstance('t3lib_TCEmain');
1547 $tce->stripslashes_values = 0;
1548 $tce->start($data,array());
1549 $tce->process_datamap();
1550 }
1551 }
1552
1553
1554
1555 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']) {
1556 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']);
1557 }
1558
1559 ?>