* Merged changes from TYPO3_3-8 branch back
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / modfunc1 / class.tx_indexedsearch_modfunc1.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 *
17 * This script is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * This copyright notice MUST APPEAR in all copies of the script!
23 ***************************************************************/
24 /**
25 * Module extension (addition to function menu) 'Indexed search' for the 'indexed_search' extension.
26 *
27 * @author Kasper Skårhøj <kasperYYYY@typo3.com>
28 */
29 /**
30 * [CLASS/FUNCTION INDEX of SCRIPT]
31 *
32 *
33 *
34 * 110: class tx_indexedsearch_modfunc1 extends t3lib_extobjbase
35 * 124: function modMenu()
36 * 148: function main()
37 *
38 * SECTION: Drawing table of indexed pages
39 * 261: function drawTableOfIndexedPages()
40 * 312: function indexed_info($data, $firstColContent)
41 * 398: function printPhashRow($row,$grouping=0,$extraGrListRows)
42 * 539: function printPhashRowHeader()
43 * 594: function returnNumberOfColumns()
44 *
45 * SECTION: Details display, phash row
46 * 630: function showDetailsForPhash($phash)
47 * 747: function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='')
48 * 796: function listMetaphoneStat($ftrows,$header)
49 * 833: function linkWordDetails($string,$wid)
50 * 845: function linkMetaPhoneDetails($string,$metaphone)
51 * 855: function flagsMsg($flags)
52 *
53 * SECTION: Details display, words / metaphone
54 * 886: function showDetailsForWord($wid)
55 * 945: function showDetailsForMetaphone($metaphone)
56 *
57 * SECTION: Helper functions
58 * 1016: function printRemoveIndexed($phash,$alt)
59 * 1029: function printReindex($resultRow,$alt)
60 * 1044: function linkDetails($string,$phash)
61 * 1053: function linkList()
62 * 1064: function showPageDetails($string,$id)
63 * 1074: function printExtraGrListRows($extraGrListRows)
64 * 1091: function printRootlineInfo($row)
65 * 1125: function makeItemTypeIcon($it,$alt='')
66 * 1150: function utf8_to_currentCharset($string)
67 *
68 * SECTION: Reindexing
69 * 1183: function reindexPhash($phash, $pageId)
70 * 1237: function getUidRootLineForClosestTemplate($id)
71 *
72 * SECTION: Indexing of configurations
73 * 1278: function extraIndexing()
74 * 1389: function indexExtUrlRecursively($url, $depth, $pageId, $rl, $cfgUid)
75 *
76 * SECTION: SQL functions
77 * 1446: function removeIndexedPhashRow($phashList,$clearPageCache=1)
78 * 1483: function getGrListEntriesForPhash($phash,$gr_list)
79 * 1503: function processStopWords($stopWords)
80 * 1523: function processPageKeywords($pageKeywords, $pageUid)
81 *
82 * TOTAL FUNCTIONS: 32
83 * (This index is automatically created/updated by the extension "extdeveval")
84 *
85 */
86
87
88 require_once(PATH_t3lib.'class.t3lib_pagetree.php');
89 require_once(PATH_t3lib.'class.t3lib_extobjbase.php');
90 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php');
91
92
93 // ... all for the rootline!
94 require_once (PATH_t3lib."class.t3lib_page.php");
95 require_once (PATH_t3lib."class.t3lib_tstemplate.php");
96 require_once (PATH_t3lib."class.t3lib_tsparser_ext.php");
97
98 // Keywords mgm:
99 require_once (PATH_t3lib."class.t3lib_tcemain.php");
100
101
102
103 /**
104 * Indexing class for TYPO3 frontend
105 *
106 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
107 * @package TYPO3
108 * @subpackage tx_indexedsearch
109 */
110 class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
111
112 // Internal, dynamic:
113 var $allPhashListed = array(); // phash values accumulations for link to clear all
114 var $external_parsers = array(); // External content parsers - objects set here with file extensions as keys.
115 var $iconFileNameCache = array(); // File extensions - icon map/cache.
116 var $indexerObj; // Indexer object
117
118
119 /**
120 * Initialize menu array internally
121 *
122 * @return void
123 */
124 function modMenu() {
125 global $LANG;
126
127 return array (
128 'depth' => array(
129 0 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_0'),
130 1 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_1'),
131 2 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_2'),
132 3 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_3'),
133 ),
134 'type' => array(
135 0 => 'Overview',
136 1 => 'Technical Details',
137 2 => 'Words and content',
138 // 3 => 'Indexing'
139 )
140 );
141 }
142
143 /**
144 * Produces main content of the module
145 *
146 * @return string HTML output
147 */
148 function main() {
149 // Initializes the module. Done in this function because we may need to re-initialize if data is submitted!
150 global $SOBE,$BE_USER,$LANG,$BACK_PATH,$TCA_DESCR,$TCA,$CLIENT,$TYPO3_CONF_VARS;
151
152 // Return if no page id:
153 if ($this->pObj->id<=0) return;
154
155 // Initialize max-list items
156 $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100;
157
158 // Processing deletion of phash rows:
159 if (t3lib_div::_GP('deletePhash')) {
160 $this->removeIndexedPhashRow(t3lib_div::_GP('deletePhash'));
161 }
162
163 // Processing stop-words:
164 if (t3lib_div::_POST('_stopwords')) {
165 $this->processStopWords(t3lib_div::_POST('stopWord'));
166 }
167
168 // Processing stop-words:
169 if (t3lib_div::_POST('_pageKeywords')) {
170 $this->processPageKeywords(t3lib_div::_POST('pageKeyword'), t3lib_div::_POST('pageKeyword_pageUid'));
171 }
172
173 // Initialize external document parsers:
174 // Example configuration, see ext_localconf.php of this file!
175 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) {
176 foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) {
177 $this->external_parsers[$extension] = &t3lib_div::getUserObj($_objRef);
178
179 // Init parser and if it returns false, unset its entry again:
180 if (!$this->external_parsers[$extension]->softInit($extension)) {
181 unset($this->external_parsers[$extension]);
182 }
183 }
184 }
185
186 // Initialize indexer if we need it (metaphone display does...)
187 $this->indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
188
189 // Set CSS styles specific for this document:
190 $this->pObj->content = str_replace('/*###POSTCSSMARKER###*/','
191 TABLE.c-list TR TD { white-space: nowrap; vertical-align: top; }
192 ',$this->pObj->content);
193
194
195 // Check if details for a phash record should be shown:
196 if (t3lib_div::_GET('phash')) {
197
198 // Show title / function menu:
199 $theOutput.=$this->pObj->doc->spacer(5);
200 $theOutput.=$this->pObj->doc->section('Details for a single result row:',$this->showDetailsForPhash(t3lib_div::_GET('phash')),0,1);
201 } elseif (t3lib_div::_GET('wid')) {
202
203 // Show title / function menu:
204 $theOutput.=$this->pObj->doc->spacer(5);
205 $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1);
206 } elseif (t3lib_div::_GET('metaphone')) {
207
208 // Show title / function menu:
209 $theOutput.=$this->pObj->doc->spacer(5);
210 $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1);
211 } elseif (t3lib_div::_GET('reindex')) {
212
213 // Show title / function menu:
214 $theOutput.=$this->pObj->doc->spacer(5);
215 $theOutput.=$this->pObj->doc->section('Reindexing...',$this->reindexPhash(t3lib_div::_GET('reindex'),t3lib_div::_GET('reindex_id')),0,1);
216 } else { // Detail listings:
217 // Depth function menu:
218 $h_func = t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[type]',$this->pObj->MOD_SETTINGS['type'],$this->pObj->MOD_MENU['type'],'index.php');
219 if (t3lib_div::inList('0,1,2',$this->pObj->MOD_SETTINGS['type'])) {
220 $h_func.= t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[depth]',$this->pObj->MOD_SETTINGS['depth'],$this->pObj->MOD_MENU['depth'],'index.php');
221
222 // Show title / function menu:
223 $theOutput.=$this->pObj->doc->spacer(5);
224 $theOutput.=$this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
225
226 $theOutput.=$this->drawTableOfIndexedPages();
227 } else {
228
229 // Show title / function menu:
230 $theOutput.= $this->pObj->doc->spacer(5);
231 $theOutput.= $this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
232
233 $theOutput.= $this->extraIndexing();
234 }
235 }
236
237 return $theOutput;
238 }
239
240
241
242
243
244
245
246
247
248
249
250 /*******************************
251 *
252 * Drawing table of indexed pages
253 *
254 ******************************/
255
256 /**
257 * Produces a table with indexing information for each page.
258 *
259 * @return string HTML output
260 */
261 function drawTableOfIndexedPages() {
262 global $BACK_PATH;
263
264 // Drawing tree:
265 $tree = t3lib_div::makeInstance('t3lib_pageTree');
266 $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1);
267 $tree->init('AND '.$perms_clause);
268
269 $HTML = '<img src="'.$BACK_PATH.t3lib_iconWorks::getIcon('pages',$this->pObj->pageinfo).'" width="18" height="16" align="top" alt="" />';
270 $tree->tree[] = Array(
271 'row' => $this->pObj->pageinfo,
272 'HTML' => $HTML
273 );
274
275 if ($this->pObj->MOD_SETTINGS['depth']) {
276 $tree->getTree($this->pObj->id, $this->pObj->MOD_SETTINGS['depth'], '');
277 }
278
279 // Traverse page tree:
280 $code = '';
281 foreach($tree->tree as $data) {
282 $code.= $this->indexed_info(
283 $data['row'],
284 $data['HTML'].
285 $this->showPageDetails(t3lib_div::fixed_lgd($data['row']['title'], 20),$data['row']['uid'])
286 );
287 }
288
289 if ($code) {
290 $code = '<br/><br/>
291 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
292 $this->printPhashRowHeader().
293 $code.
294 '</table>';
295
296 // Create section to output:
297 $theOutput.=$this->pObj->doc->section('',$code,0,1);
298 } else {
299 $theOutput.=$this->pObj->doc->section('','<br/><br/>'.$this->pObj->doc->icons(1).'There were no indexed pages found in the tree.<br/><br/>',0,1);
300 }
301
302 return $theOutput;
303 }
304
305 /**
306 * Create information table row for a page regarding indexing information.
307 *
308 * @param array Data array for this page
309 * @param string HTML content for first column (page tree icon etc.)
310 * @return string HTML code. (table row)
311 */
312 function indexed_info($data, $firstColContent) {
313
314 // Query:
315 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
316 'ISEC.*, IP.*, count(*) AS count_val',
317 'index_phash IP, index_section ISEC',
318 'IP.phash = ISEC.phash AND ISEC.page_id = '.intval($data['uid']),
319 'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.externalUrl,IP.recordUid,IP.freeIndexUid',
320 'IP.item_type, IP.tstamp',
321 ($this->maxListPerPage+1)
322 );
323
324 // Initialize variables:
325 $rowCount = 0;
326 $lines = array(); // Collecting HTML rows here.
327 $phashAcc = array(); // Collecting phash values (to remove local indexing for)
328 $phashAcc[] = 0;
329
330 // Traverse the result set of phash rows selected:
331 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
332 if ($rowCount == $this->maxListPerPage) {
333 $rowCount++; // Increase to the extra warning row will appear as well.
334 break;
335 }
336
337 // Adds a display row:
338 $lines[$row['phash_grouping']][] = $this->printPhashRow(
339 $row,
340 isset($lines[$row['phash_grouping']]),
341 $this->getGrListEntriesForPhash($row['phash'], $row['gr_list'])
342 );
343 $rowCount++;
344 $phashAcc[] = $row['phash'];
345 $this->allPhashListed[] = $row['phash']; // For removing all shown phash rows.
346 }
347
348 // Compile rows into the table:
349 $out = '';
350 if (count($lines)) {
351 $firstColContent = '<td rowspan="'.$rowCount.'">'.$firstColContent.'</td>';
352 foreach($lines as $rowSet) {
353 foreach($rowSet as $rows) {
354 $out.='
355 <tr class="bgColor-20">'.$firstColContent.implode('',$rows).'</tr>';
356
357 $firstColContent = '';
358 }
359 }
360
361 if ($rowCount > $this->maxListPerPage) { // Now checking greater than, because we increased $rowCount before...
362 $out.='
363 <tr class="bgColor-20">
364 <td>&nbsp;</td>
365 <td colspan="'.($this->returnNumberOfColumns()-1).'">'.$this->pObj->doc->icons(3).'<span class="">There were more than '.$this->maxListPerPage.' rows. <a href="'.htmlspecialchars('index.php?id='.$this->pObj->id.'&listALL=1').'">Click here to list them ALL!</a></span></td>
366 </tr>';
367 }
368 } else {
369 $out.='
370 <tr class="bgColor-20">
371 <td>'.$firstColContent.'</td>
372 <td colspan="'.($this->returnNumberOfColumns()-1).'"><em>Not indexed</em></td>
373 </tr>';
374 }
375
376 // Checking for phash-rows which are NOT joined with the section table:
377 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('IP.*', 'index_phash IP', 'IP.data_page_id = '.intval($data['uid']).' AND IP.phash NOT IN ('.implode(',',$phashAcc).')');
378 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
379 $out.='
380 <tr class="typo3-red">
381 <td colspan="'.$this->returnNumberOfColumns().'"><b>Warning:</b> phash-row "'.$row['phash'].'" didn\'t have a representation in the index_section table!</td>
382 </tr>';
383 $this->allPhashListed[] = $row['phash'];
384 }
385
386 return $out;
387 }
388
389 /**
390 * Render a single row of information about a indexing entry.
391 *
392 * @param array Row from query (combined phash table with sections etc).
393 * @param boolean Set if grouped to previous result; the icon of the element is not shown again.
394 * @param array Array of index_grlist records.
395 * @return array Array of table rows.
396 * @see indexed_info()
397 */
398 function printPhashRow($row,$grouping=0,$extraGrListRows) {
399 $lines = array();
400
401 // Title cell attributes will highlight TYPO3 pages with a slightly darker color (bgColor4) than attached medias. Also IF there are more than one section record for a phash row it will be red as a warning that something is wrong!
402 $titleCellAttribs = $row['count_val']!=1?' bgcolor="red"':($row['item_type']==='0' ? ' class="bgColor4"' : '');
403
404 if ($row['item_type']) {
405 $arr = unserialize($row['cHashParams']);
406 $page = $arr['key'] ? ' ['.$arr['key'].']' : '';
407 } else $page = '';
408 $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($row['item_title']), 20).$page) : '<em>[No Title]</em>',$row['phash']);
409 $cmdLinks = $this->printRemoveIndexed($row['phash'],'Clear phash-row').$this->printReindex($row,'Re-index element');
410
411 switch($this->pObj->MOD_SETTINGS['type']) {
412 case 1: // Technical details:
413 // Display icon:
414 if (!$grouping) {
415 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
416 } else {
417 $lines[] = '<td>&nbsp;</td>';
418 }
419
420 // Title displayed:
421 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
422
423 // Remove-indexing-link:
424 $lines[] = '<td>'.$cmdLinks.'</td>';
425
426 // Various data:
427 $lines[] = '<td>'.$row['phash'].'</td>';
428 $lines[] = '<td>'.$row['contentHash'].'</td>';
429
430 if ($row['item_type']==='0') {
431 $lines[] = '<td>'.($row['data_page_id'] ? $row['data_page_id'] : '&nbsp;').'</td>';
432 $lines[] = '<td>'.($row['data_page_type'] ? $row['data_page_type'] : '&nbsp;').'</td>';
433 $lines[] = '<td>'.($row['sys_language_uid'] ? $row['sys_language_uid'] : '&nbsp;').'</td>';
434 $lines[] = '<td>'.($row['data_page_mp'] ? $row['data_page_mp'] : '&nbsp;').'</td>';
435 } else {
436 $lines[] = '<td colspan="4">'.htmlspecialchars($row['data_filename']).'</td>';
437 }
438 $lines[] = '<td>'.$row['gr_list'].$this->printExtraGrListRows($extraGrListRows).'</td>';
439 $lines[] = '<td>'.$this->printRootlineInfo($row).'</td>';
440 $lines[] = '<td>'.($row['page_id'] ? $row['page_id'] : '&nbsp;').'</td>';
441 $lines[] = '<td>'.($row['phash_t3']!=$row['phash'] ? $row['phash_t3'] : '&nbsp;').'</td>';
442 $lines[] = '<td>'.($row['freeIndexUid'] ? $row['freeIndexUid'] : '&nbsp;').'</td>';
443 $lines[] = '<td>'.($row['recordUid'] ? $row['recordUid'] : '&nbsp;').'</td>';
444
445
446
447 // cHash parameters:
448 $arr = unserialize($row['cHashParams']);
449 if (is_array($arr)) {
450 $theCHash = $arr['cHash'];
451 unset($arr['cHash']);
452 }
453
454 if ($row['item_type']) { // pdf...
455 $lines[] = '<td>'.($arr['key'] ? 'Page '.$arr['key'] : '').'&nbsp;</td>';
456 } elseif ($row['item_type']==0) {
457 $lines[] = '<td>'.htmlspecialchars(t3lib_div::implodeArrayForUrl('',$arr)).'&nbsp;</td>';
458 } else {
459 $lines[] = '<td class="bgColor">&nbsp;</td>';
460 }
461
462 $lines[] = '<td>'.$theCHash.'</td>';
463 break;
464 case 2: // Words and content:
465 // Display icon:
466 if (!$grouping) {
467 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
468 } else {
469 $lines[] = '<td>&nbsp;</td>';
470 }
471
472 // Title displayed:
473 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
474
475 // Remove-indexing-link:
476 $lines[] = '<td>'.$cmdLinks.'</td>';
477
478 // Query:
479 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
480 '*',
481 'index_fulltext',
482 'phash = '.intval($row['phash'])
483 );
484 $lines[] = '<td style="white-space: normal;">'.
485 htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($ftrows[0]['fulltextdata']),3000)).
486 '<hr/><em>Size: '.strlen($ftrows[0]['fulltextdata']).'</em>'.
487 '</td>';
488
489 // Query:
490 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
491 'index_words.baseword, index_rel.*',
492 'index_rel, index_words',
493 'index_rel.phash = '.intval($row['phash']).
494 ' AND index_words.wid = index_rel.wid',
495 '',
496 '',
497 '',
498 'baseword'
499 );
500
501 $wordList = '';
502 if (is_array($ftrows)) {
503 $indexed_words = array_keys($ftrows);
504 sort($indexed_words);
505 $wordList = htmlspecialchars($this->utf8_to_currentCharset(implode(' ',$indexed_words)));
506 $wordList.='<hr/><em>Count: '.count($indexed_words).'</em>';
507 }
508
509 $lines[] = '<td style="white-space: normal;">'.$wordList.'</td>';
510 break;
511 default: // Overview
512 // Display icon:
513 if (!$grouping) {
514 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
515 } else {
516 $lines[] = '<td>&nbsp;</td>';
517 }
518
519 // Title displayed:
520 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
521
522 // Remove-indexing-link:
523 $lines[] = '<td>'.$cmdLinks.'</td>';
524
525 $lines[] = '<td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset($row['item_description'])).'...</td>';
526 $lines[] = '<td>'.t3lib_div::formatSize($row['item_size']).'</td>';
527 $lines[] = '<td>'.t3lib_BEfunc::dateTimeAge($row['tstamp']).'</td>';
528 break;
529 }
530
531 return $lines;
532 }
533
534 /**
535 * Creates the header row for the table
536 *
537 * @return string HTML string (table row)
538 */
539 function printPhashRowHeader() {
540 $lines = array();
541
542 switch($this->pObj->MOD_SETTINGS['type']) {
543 case 1:
544 $lines[] = '<td>&nbsp;</td>';
545 $lines[] = '<td>&nbsp;</td>';
546 $lines[] = '<td>Title</td>';
547 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
548
549 $lines[] = '<td>pHash</td>';
550 $lines[] = '<td>cHash</td>';
551 $lines[] = '<td>&amp;id</td>';
552 $lines[] = '<td>&amp;type</td>';
553 $lines[] = '<td>&amp;L</td>';
554 $lines[] = '<td>&amp;MP</td>';
555 $lines[] = '<td>grlist</td>';
556 $lines[] = '<td>Rootline</td>';
557 $lines[] = '<td>page_id</td>';
558 $lines[] = '<td>phash_t3</td>';
559 $lines[] = '<td>CfgUid</td>';
560 $lines[] = '<td>RecUid</td>';
561 $lines[] = '<td>GET-parameters</td>';
562 $lines[] = '<td>&amp;cHash</td>';
563 break;
564 case 2:
565 $lines[] = '<td>&nbsp;</td>';
566 $lines[] = '<td>&nbsp;</td>';
567 $lines[] = '<td>Title</td>';
568 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
569 $lines[] = '<td>Content<br/>
570 <img src="clear.gif" width="300" height="1" alt="" /></td>';
571 $lines[] = '<td>Words<br/>
572 <img src="clear.gif" width="300" height="1" alt="" /></td>';
573 break;
574 default:
575 $lines[] = '<td>&nbsp;</td>';
576 $lines[] = '<td>&nbsp;</td>';
577 $lines[] = '<td>Title</td>';
578 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
579 $lines[] = '<td>Description</td>';
580 $lines[] = '<td>Size</td>';
581 $lines[] = '<td>Indexed:</td>';
582 break;
583 }
584
585 $out = '<tr class="tableheader bgColor5">'.implode('',$lines).'</tr>';
586 return $out;
587 }
588
589 /**
590 * Returns the number of columns depending on display type of list
591 *
592 * @return integer Number of columns in list:
593 */
594 function returnNumberOfColumns() {
595 switch($this->pObj->MOD_SETTINGS['type']) {
596 case 1:
597 return 18;
598 break;
599 case 2:
600 return 6;
601 break;
602 default:
603 return 7;
604 break;
605 }
606 }
607
608
609
610
611
612
613
614
615
616
617
618 /*******************************
619 *
620 * Details display, phash row
621 *
622 *******************************/
623
624 /**
625 * Showing details for a particular phash row
626 *
627 * @param integer phash value to display details for.
628 * @return string HTML content
629 */
630 function showDetailsForPhash($phash) {
631
632 $content = '';
633
634 // Selects the result row:
635 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
636 '*',
637 'index_phash',
638 'phash = '.intval($phash)
639 );
640 $phashRecord = $ftrows[0];
641
642 // If found, display:
643 if (is_array($phashRecord)) {
644 $content.= '<h4>phash row content:</h4>'.
645 $this->utf8_to_currentCharset(t3lib_div::view_array($phashRecord));
646
647 // Getting debug information if any:
648 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
649 '*',
650 'index_debug',
651 'phash = '.intval($phash)
652 );
653 if (is_array($ftrows)) {
654 $debugInfo = unserialize($ftrows[0]['debuginfo']);
655 $lexer = $debugInfo['lexer'];
656 unset($debugInfo['lexer']);
657
658 $content.= '<h3>Debug information:</h3>'.
659 $this->utf8_to_currentCharset(t3lib_div::view_array($debugInfo));
660
661 $content.= '<h4>Debug information / lexer splitting:</h4>'.
662 '<hr/><b>'.
663 $this->utf8_to_currentCharset($lexer).
664 '</b><hr/>';
665 }
666
667
668
669 $content.='<h3>Word statistics</h3>';
670
671 // Finding all words for this phash:
672 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
673 'index_words.*, index_rel.*',
674 'index_rel, index_words',
675 'index_rel.phash = '.intval($phash).
676 ' AND index_words.wid = index_rel.wid',
677 '',
678 'index_words.baseword',
679 ''
680 );
681 $pageRec = t3lib_BEfunc::getRecord('pages', $phashRecord['data_page_id']);
682 $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin();
683 $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec);
684
685 // Group metaphone hash:
686 $metaphone = array();
687 foreach($ftrows as $row) {
688 $metaphone[$row['metaphone']][] = $row['baseword'];
689 }
690 $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:');
691
692 // Finding top-20 on frequency for this phash:
693 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
694 'index_words.baseword, index_words.metaphone, index_rel.*',
695 'index_rel, index_words',
696 'index_rel.phash = '.intval($phash).
697 ' AND index_words.wid = index_rel.wid
698 AND index_words.is_stopword=0',
699 '',
700 'index_rel.freq DESC',
701 '20'
702 );
703 $content.= $this->listWords($ftrows, 'Top-20 words by frequency:', 2);
704
705 // Finding top-20 on count for this phash:
706 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
707 'index_words.baseword, index_words.metaphone, index_rel.*',
708 'index_rel, index_words',
709 'index_rel.phash = '.intval($phash).
710 ' AND index_words.wid = index_rel.wid
711 AND index_words.is_stopword=0',
712 '',
713 'index_rel.count DESC',
714 '20'
715 );
716 $content.= $this->listWords($ftrows, 'Top-20 words by count:', 2);
717
718
719 $content.='<h3>Section records for this phash</h3>';
720
721 // Finding sections for this record:
722 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
723 '*',
724 'index_section',
725 'index_section.phash = '.intval($phash),
726 '',
727 '',
728 ''
729 );
730 $content.= t3lib_div::view_array($ftrows);
731
732 // Add go-back link:
733 $content = $this->linkList().$content.$this->linkList();
734
735 } else $content.= 'Error: No phash row found';
736
737 return $content;
738 }
739
740 /**
741 * Create table with list of words from $ftrows
742 *
743 * @param array Array of records selected from index_rel/index_words
744 * @param string Header string to show before table.
745 * @param boolean If set, the stopWord checkboxes will be shown in the word list. Only for admins. (because it is a global setting, not per-site).
746 * @param array The page record from which to load the keywords, if any.
747 * @return string HTML table
748 */
749 function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='') {
750
751 // Prepare keywords:
752 $keywords = is_array($page) ? array_flip(t3lib_div::trimExplode(',',$page['keywords'], 1)) : '';
753
754 // Render list:
755 $trows = '';
756 $trows.= '
757 <tr class="tableheader bgColor5">
758 '.($stopWordBoxes ? '<td>'.htmlspecialchars('Stopword:').'</td>' : '').'
759 <td>'.htmlspecialchars('Word:').'</td>
760 <td>'.htmlspecialchars('Count:').'</td>
761 <td>'.htmlspecialchars('First:').'</td>
762 <td>'.htmlspecialchars('Frequency:').'</td>
763 <td>'.htmlspecialchars('Flags:').'</td>
764 '.(is_array($keywords) ? '<td>'.htmlspecialchars('Page keyword:').'</td>' : '').'
765 </tr>
766 ';
767 foreach($ftrows as $row) {
768 $hiddenField = $stopWordBoxes!=2 ? '<input type="hidden" name="stopWord['.$row['wid'].']" value="0" />' : '';
769 $trows.= '
770 <tr class="'.($row['is_stopword'] ? 'bgColor' : 'bgColor4').'">
771 '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'>'.$hiddenField.'<input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').'
772 <td>'.$this->linkWordDetails(htmlspecialchars($this->utf8_to_currentCharset($row['baseword'])), $row['wid']).'</td>
773 <td>'.htmlspecialchars($row['count']).'</td>
774 <td>'.htmlspecialchars($row['first']).'</td>
775 <td>'.htmlspecialchars($row['freq']).'</td>
776 <td>'.htmlspecialchars($this->flagsMsg($row['flags'])).'</td>
777 '.(is_array($keywords) ? '<td align="center"'.(isset($keywords[$row['baseword']]) ? ' class="bgColor2"' : '').'><input type="hidden" name="pageKeyword['.$row['baseword'].']" value="0" /><input type="checkbox" name="pageKeyword['.$row['baseword'].']" value="1"'.(isset($keywords[$row['baseword']])?'checked="checked"':'').' /></td>' : '').'
778 </tr>
779 ';
780 }
781
782 return '<h4>'.htmlspecialchars($header).'</h4>'.
783 '
784 <table border="0" cellspacing="1" cellpadding="2" class="c-list">
785 '.$trows.'
786 </table>'.
787 ($stopWordBoxes ? '<input type="submit" value="Change stop-word settings" name="_stopwords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" />' : '').
788 (is_array($keywords) ? '<input type="submit" value="Set page keywords" name="_pageKeywords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" /><input type="hidden" name="pageKeyword_pageUid" value="'.$page['uid'].'" />'.
789 '<br/>Current keywords are: <em>'.htmlspecialchars(implode(', ',array_keys($keywords))).'</em>' : '');
790 }
791
792 /**
793 * Displays table of metaphone groups larger than 1
794 *
795 * @param array Result from word selection (index_rel/index_words)
796 * @param string Header string
797 * @return string HTML table
798 */
799 function listMetaphoneStat($ftrows,$header) {
800
801 $trows = '';
802 $trows.= '
803 <tr class="tableheader bgColor5">
804 <td>'.htmlspecialchars('Metaphone:').'</td>
805 <td>'.htmlspecialchars('Hash:').'</td>
806 <td>'.htmlspecialchars('Count:').'</td>
807 <td>'.htmlspecialchars('Words:').'</td>
808 </tr>
809 ';
810 foreach($ftrows as $metaphone => $words) {
811 if (count($words)>1) {
812 $trows.= '
813 <tr class="bgColor4">
814 <td>'.$this->linkMetaPhoneDetails($this->indexerObj->metaphone($words[0],1),$metaphone).'</td>
815 <td>'.htmlspecialchars($metaphone).'</td>
816 <td>'.htmlspecialchars(count($words)).'</td>
817 <td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset(implode(', ',$words))).'</td>
818 </tr>
819 ';
820 }
821 }
822
823 return '<h4>'.htmlspecialchars($header).'</h4>'.
824 '<table border="0" cellspacing="1" cellpadding="2" class="c-list">
825 '.$trows.'
826 </table>';
827 }
828
829 /**
830 * Wraps input string in a link that will display details for the word. Eg. which other pages has the word, metaphone associations etc.
831 *
832 * @param string String to wrap, possibly a title or so.
833 * @param integer wid value to show details for
834 * @return string Wrapped string
835 */
836 function linkWordDetails($string,$wid) {
837 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('wid'=>$wid,'phash'=>''))).'">'.$string.'</a>';
838 }
839
840
841 /**
842 * Wraps input string in a link to see more details for metaphone value
843 *
844 * @param string String to wrap
845 * @param integer Metaphone value
846 * @return string Wrapped string
847 */
848 function linkMetaPhoneDetails($string,$metaphone) {
849 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('metaphone'=>$metaphone,'wid'=>'','phash'=>''))).'">'.$string.'</a>';
850 }
851
852 /**
853 * Creates message for flag value
854 *
855 * @param integer Flags integer
856 * @return string Message string
857 */
858 function flagsMsg($flags) {
859 if ($flags > 0) {
860 return
861 ($flags & 128 ? '<title>' : ''). // pow(2,7)
862 ($flags & 64 ? '<meta/keywords>' : ''). // pow(2,6)
863 ($flags & 32 ? '<meta/description>' : ''). // pow(2,5)
864 ' ('.$flags.')';
865 }
866 }
867
868
869
870
871
872
873
874
875
876
877 /*******************************
878 *
879 * Details display, words / metaphone
880 *
881 *******************************/
882
883 /**
884 * Show details for words
885 *
886 * @param integer Word ID (wid)
887 * @return string HTML content
888 */
889 function showDetailsForWord($wid) {
890
891 // Select references to this word
892 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
893 'index_phash.*, index_section.*, index_rel.*',
894 'index_rel, index_section, index_phash',
895 'index_rel.wid = '.intval($wid).
896 ' AND index_rel.phash = index_section.phash'.
897 ' AND index_section.phash = index_phash.phash',
898 '',
899 'index_rel.freq DESC',
900 ''
901 );
902
903 // Headers:
904 $content.='
905 <tr class="tableheader bgColor5">
906 <td>phash</td>
907 <td>page_id</td>
908 <td>data_filename</td>
909 <td>count</td>
910 <td>first</td>
911 <td>freq</td>
912 <td>flags</td>
913 </tr>';
914
915 if (is_array($ftrows)) {
916 foreach($ftrows as $wDat) {
917 $content.='
918 <tr class="bgColor4">
919 <td>'.$this->linkDetails(htmlspecialchars($wDat['phash']),$wDat['phash']).'</td>
920 <td>'.htmlspecialchars($wDat['page_id']).'</td>
921 <td>'.htmlspecialchars($wDat['data_filename']).'</td>
922 <td>'.htmlspecialchars($wDat['count']).'</td>
923 <td>'.htmlspecialchars($wDat['first']).'</td>
924 <td>'.htmlspecialchars($wDat['freq']).'</td>
925 <td>'.htmlspecialchars($wDat['flags']).'</td>
926 </tr>';
927 }
928 }
929
930 // Compile table:
931 $content = '
932 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
933 $content.'
934 </table>';
935
936 // Add go-back link:
937 $content = $content.$this->linkList();
938
939 return $content;
940 }
941
942 /**
943 * Show details for metaphone value
944 *
945 * @param integer Metaphone integer hash
946 * @return string HTML content
947 */
948 function showDetailsForMetaphone($metaphone) {
949
950 // Finding top-20 on frequency for this phash:
951 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
952 'index_words.*',
953 'index_words',
954 'index_words.metaphone = '.intval($metaphone),
955 '',
956 'index_words.baseword',
957 ''
958 );
959
960 if (count($ftrows)) {
961 $content.='<h4>Metaphone: '.$this->indexerObj->metaphone($ftrows[0]['baseword'],1).'</h4>';
962
963 $content.='
964 <tr class="tableheader bgColor5">
965 <td>Word</td>
966 <td>Is stopword?</td>
967 </tr>';
968
969 if (is_array($ftrows)) {
970 foreach($ftrows as $wDat) {
971 $content.='
972 <tr class="bgColor4">
973 <td>'.$this->linkWordDetails(htmlspecialchars($wDat['baseword']),$wDat['wid']).'</td>
974 <td>'.htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No').'</td>
975 </tr>';
976 }
977 }
978
979 $content = '
980 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
981 $content.'
982 </table>';
983
984 if ($this->indexerObj->metaphone($ftrows[0]['baseword'])!=$metaphone) {
985 $content.='ERROR: Metaphone string and hash did not match for some reason!?';
986 }
987
988 // Add go-back link:
989 $content = $content.$this->linkList();
990 }
991
992 return $content;
993 }
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006 /*******************************
1007 *
1008 * Helper functions
1009 *
1010 *******************************/
1011
1012 /**
1013 * Creates icon which clears indexes for a certain list of phash values.
1014 *
1015 * @param string List of phash integers
1016 * @param string Alt-text for the garbage bin icon.
1017 * @return string HTML img-tag with link around.
1018 */
1019 function printRemoveIndexed($phash,$alt) {
1020 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('deletePhash'=>$phash))).'">'.
1021 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/garbage.gif" width="11" hspace="1" vspace="2" height="12" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
1022 '</a>';
1023 }
1024
1025 /**
1026 * Button for re-indexing of documents
1027 *
1028 * @param array phash table result row.
1029 * @param string Title attribute text for icon
1030 * @return string HTML content; Icon wrapped in link.
1031 */
1032 function printReindex($resultRow,$alt) {
1033 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') {
1034 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('reindex'=>$resultRow['phash'],'reindex_id'=>$resultRow['page_id']))).'">'.
1035 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/refresh_n.gif" width="14" hspace="1" vspace="2" height="14" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
1036 '</a>';
1037 }
1038 }
1039
1040 /**
1041 * Wraps input string in a link that will display details for the phash value set.
1042 *
1043 * @param string String to wrap, possibly a title or so.
1044 * @param integer phash value to show details for
1045 * @return string Wrapped string
1046 */
1047 function linkDetails($string,$phash) {
1048 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('phash'=>$phash))).'">'.$string.'</a>';
1049 }
1050
1051 /**
1052 * Creates link back to listing
1053 *
1054 * @return string Link back to list
1055 */
1056 function linkList() {
1057 return '<br/><a href="index.php?id='.$this->pObj->id.'">Back to list.</a><br/>';
1058 }
1059
1060 /**
1061 * Wraps input string in a link that will display details for the phash value set.
1062 *
1063 * @param string String to wrap, possibly a title or so.
1064 * @param integer phash value to show details for
1065 * @return string Wrapped string
1066 */
1067 function showPageDetails($string,$id) {
1068 return '<a href="'.htmlspecialchars('index.php?id='.$id.'&SET[depth]=0&SET[type]=1').'">'.$string.'</a>';
1069 }
1070
1071 /**
1072 * Prints the gr_lists attached to a indexed entry.
1073 *
1074 * @param array Array of index_grlist records
1075 * @return string HTML code.
1076 */
1077 function printExtraGrListRows($extraGrListRows) {
1078 if (count($extraGrListRows)) {
1079 reset($extraGrListRows);
1080 $lines=array();
1081 while(list(,$r)=each($extraGrListRows)) {
1082 $lines[] = $r['gr_list'];
1083 }
1084 return "<br/>".$GLOBALS['TBE_TEMPLATE']->dfw(implode('<br/>',$lines));
1085 }
1086 }
1087
1088 /**
1089 * Print path for indexing
1090 *
1091 * @param array Result row with content from index_section
1092 * @return string Rootline information
1093 */
1094 function printRootlineInfo($row) {
1095 $uidCollection = array();
1096
1097 if ($row['rl0']) {
1098 $uidCollection[0] = $row['rl0'];
1099 if ($row['rl1']) {
1100 $uidCollection[1] = $row['rl1'];
1101 if ($row['rl2']) {
1102 $uidCollection[2] = $row['rl2'];
1103
1104 // Additional levels:
1105 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) {
1106 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
1107 if ($row[$fieldName]) {
1108 $uidCollection[$rootLineLevel] = $row[$fieldName];
1109 }
1110 }
1111 }
1112 }
1113 }
1114 }
1115
1116 // Return root line.
1117 ksort($uidCollection);
1118 return implode('/',$uidCollection);
1119 }
1120
1121 /**
1122 * Return icon for file extension
1123 *
1124 * @param string File extension / item type
1125 * @param string Title attribute value in icon.
1126 * @return string <img> tag for icon
1127 */
1128 function makeItemTypeIcon($it,$alt='') {
1129 if (!isset($this->iconFileNameCache[$it])) {
1130 if ($it==='0') {
1131 $icon = 'EXT:indexed_search/pi/res/pages.gif';
1132 } elseif ($this->external_parsers[$it]) {
1133 $icon = $this->external_parsers[$it]->getIcon($it);
1134 }
1135
1136 $fullPath = t3lib_div::getFileAbsFileName($icon);
1137
1138 if ($fullPath) {
1139 $info = @getimagesize($fullPath);
1140 $iconPath = $GLOBALS['BACK_PATH'].'../'.substr($fullPath,strlen(PATH_site));
1141 $this->iconFileNameCache[$it] = is_array($info) ? '<img src="'.$iconPath.'" '.$info[3].' title="###TITLE_ATTRIBUTE###" alt="" />' : '';
1142 }
1143 }
1144 return str_replace('###TITLE_ATTRIBUTE###',htmlspecialchars($it.': '.$alt),$this->iconFileNameCache[$it]);
1145 }
1146
1147 /**
1148 * Converts the input string from utf-8 to the backend charset.
1149 *
1150 * @param string String to convert (utf-8)
1151 * @return string Converted string (backend charset if different from utf-8)
1152 */
1153 function utf8_to_currentCharset($string) {
1154 global $LANG;
1155 if ($LANG->charSet != 'utf-8') {
1156 $string = $LANG->csConvObj->utf8_decode($string, $LANG->charSet, TRUE);
1157 }
1158 return $string;
1159 }
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172 /********************************
1173 *
1174 * Reindexing
1175 *
1176 *******************************/
1177
1178 /**
1179 * Re-indexing files/records attached to a page.
1180 *
1181 * @param integer Phash value
1182 * @param integer The page uid for the section record (file/url could appear more than one place you know...)
1183 * @return string HTML content
1184 */
1185 function reindexPhash($phash, $pageId) {
1186
1187 // Query:
1188 list($resultRow) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1189 'ISEC.*, IP.*',
1190 'index_phash IP, index_section ISEC',
1191 'IP.phash = ISEC.phash
1192 AND IP.phash = '.intval($phash).'
1193 AND ISEC.page_id = '.intval($pageId)
1194 );
1195
1196 $content = '';
1197 if (is_array($resultRow)) {
1198 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') {
1199
1200 // (Re)-Indexing file on page.
1201 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1202 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $this->getUidRootLineForClosestTemplate($pageId));
1203
1204 // URL or local file:
1205 if ($resultRow['externalUrl']) {
1206 $indexerObj->indexExternalUrl($resultRow['data_filename']);
1207 } else {
1208 $indexerObj->indexRegularDocument($resultRow['data_filename'], TRUE);
1209 }
1210
1211 if ($indexerObj->file_phash_arr['phash'] != $resultRow['phash']) {
1212 $content.= 'ERROR: phash ('.$indexerObj->file_phash_arr['phash'].') did NOT match '.$resultRow['phash'].' for strange reasons!';
1213 }
1214
1215 $content.='<h4>Log for re-indexing of "'.htmlspecialchars($resultRow['data_filename']).'":</h4>';
1216 $content.=t3lib_div::view_array($indexerObj->internal_log);
1217
1218 $content.='<h4>Hash-array, page:</h4>';
1219 $content.=t3lib_div::view_array($indexerObj->hash);
1220
1221 $content.='<h4>Hash-array, file:</h4>';
1222 $content.=t3lib_div::view_array($indexerObj->file_phash_arr);
1223 }
1224 }
1225
1226 // Link back to list.
1227 $content.= $this->linkList();
1228
1229 return $content;
1230 }
1231
1232 /**
1233 * Get rootline for closest TypoScript template root.
1234 * Algorithm same as used in Web > Template, Object browser
1235 *
1236 * @param integer The page id to traverse rootline back from
1237 * @return array Array where the root lines uid values are found.
1238 */
1239 function getUidRootLineForClosestTemplate($id) {
1240 $tmpl = t3lib_div::makeInstance("t3lib_tsparser_ext"); // Defined global here!
1241 $tmpl->tt_track = 0; // Do not log time-performance information
1242 $tmpl->init();
1243
1244 // Gets the rootLine
1245 $sys_page = t3lib_div::makeInstance("t3lib_pageSelect");
1246 $rootLine = $sys_page->getRootLine($id);
1247 $tmpl->runThroughTemplates($rootLine,0); // This generates the constants/config + hierarchy info for the template.
1248
1249 // Root line uids
1250 $rootline_uids = array();
1251 foreach($tmpl->rootLine as $rlkey => $rldat) {
1252 $rootline_uids[$rlkey] = $rldat['uid'];
1253 }
1254
1255 return $rootline_uids;
1256 }
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269 /********************************
1270 *
1271 * Indexing of configurations
1272 *
1273 *******************************/
1274
1275 /**
1276 * [Describe function...]
1277 *
1278 * @return [type] ...
1279 */
1280 function extraIndexing() {
1281
1282 // Select index configurations on this page
1283 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1284 '*',
1285 'index_config',
1286 'pid = '.intval($this->pObj->id).
1287 ' AND hidden=0'.
1288 ' AND starttime<'.time()
1289 );
1290
1291
1292 $rl = $this->getUidRootLineForClosestTemplate($this->pObj->id);
1293
1294 foreach($ftrows as $cfgRow) {
1295 switch($cfgRow['type']) {
1296 case 1:
1297 if ($cfgRow['table2index'] && isset($GLOBALS['TCA'][$cfgRow['table2index']])) {
1298
1299 // Init:
1300 $pid = intval($cfgRow['alternative_source_pid']) ? intval($cfgRow['alternative_source_pid']) : $this->pObj->id;
1301 $fieldList = t3lib_div::trimExplode(',',$cfgRow['fieldlist'],1);
1302
1303 // Select
1304 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1305 '*',
1306 $cfgRow['table2index'],
1307 'pid = '.intval($pid)
1308 );
1309
1310 // Traverse:
1311 foreach($recs as $r) {
1312 // (Re)-Indexing a row from a table:
1313 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1314 parse_str(str_replace('###UID###',$r['uid'],$cfgRow['get_params']),$GETparams);
1315 $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl, $GETparams, $cfgRow['chashcalc'] ? TRUE : FALSE);
1316 $indexerObj->backend_setFreeIndexUid($cfgRow['uid']);
1317
1318 $theContent = '';
1319 foreach($fieldList as $k => $v) {
1320 if (!$k) {
1321 $theTitle = $r[$v];
1322 } else {
1323 $theContent.= $r[$v].' ';
1324 }
1325 }
1326 #debug($theContent,$theTitle);
1327 $indexerObj->backend_indexAsTYPO3Page(
1328 $theTitle,
1329 '',
1330 '',
1331 $theContent,
1332 $GLOBALS['LANG']->charSet,
1333 $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['tstamp']],
1334 $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['crdate']],
1335 $r['uid']
1336 );
1337
1338 }
1339 #debug($recs);
1340 }
1341 break;
1342 case 2:
1343 $readpath = $cfgRow['filepath'];
1344 if (!t3lib_div::isAbsPath($readPath)) {
1345 $readpath = t3lib_div::getFileAbsFileName($readpath);
1346 }
1347 #debug($readpath,'$readpath');
1348
1349 if (t3lib_div::isAllowedAbsPath($readpath)) {
1350 $extList = implode(',',t3lib_div::trimExplode(',',$cfgRow['extensions'],1));
1351 $fileArr = array();
1352 $files = t3lib_div::getAllFilesAndFoldersInPath($fileArr,$readpath,$extList,0,$cfgRow['depth']);
1353 $files = t3lib_div::removePrefixPathFromList($files,PATH_site);
1354 #debug($files);
1355 foreach($files as $path) {
1356 // (Re)-Indexing file on page.
1357 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1358 $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl);
1359 $indexerObj->backend_setFreeIndexUid($cfgRow['uid']);
1360 $indexerObj->hash['phash'] = -1; // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
1361
1362 $indexerObj->indexRegularDocument($path, TRUE);
1363
1364 #debug($indexerObj->internal_log,$resultRow['data_filename']);
1365 #debug($indexerObj->file_phash_arr,'file_phash_arr');
1366 #debug($indexerObj->hash,'hash');
1367
1368 }
1369 }
1370 break;
1371 case 3:
1372 if ($cfgRow['externalUrl']) {
1373 $this->indexExtUrlRecursively($cfgRow['externalUrl'], $cfgRow['depth'], $this->pObj->id, $rl, $cfgRow['uid']);
1374 }
1375 break;
1376 }
1377 }
1378 }
1379
1380 /**
1381 * Indexing URL recursively
1382 * Still needs some work; eg. paramters to type, language, MP var is not passed yet...
1383 *
1384 * @param string URL, http://....
1385 * @param integer Depth of recursion. 0 (zero) = only input URL
1386 * @param integer Page id to relate indexing to.
1387 * @param array Rootline array to relate indexing to
1388 * @param integer Configuration UID
1389 * @return void
1390 */
1391 function indexExtUrlRecursively($url, $depth, $pageId, $rl, $cfgUid) {
1392
1393 // Index external URL:
1394 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1395 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
1396 $indexerObj->backend_setFreeIndexUid($cfgUid);
1397
1398 $indexerObj->indexExternalUrl($url);
1399 $url_qParts = parse_url($url);
1400
1401 // Recursion:
1402 if ($depth>0) {
1403 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
1404
1405 // Traverse links:
1406 foreach($list as $count => $linkInfo) {
1407
1408 // Decode entities:
1409 $linkSource = t3lib_div::htmlspecialchars_decode($linkInfo['href']);
1410
1411 $qParts = parse_url($linkSource);
1412 if (!$qParts['scheme']) {
1413 $linkSource = $url_qParts['scheme'].'://'.$url_qParts['host'].'/'.$linkSource;
1414 }
1415
1416 $this->indexExtUrlRecursively($linkSource, $depth-1, $pageId, $rl, $cfgUid);
1417
1418 // Temporary limit until we know how to handle hundreds of URLs with limited parsetime in PHP...
1419 if ($count>3) break;
1420 }
1421 }
1422 }
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435 /********************************
1436 *
1437 * SQL functions
1438 *
1439 *******************************/
1440
1441 /**
1442 * Removes ALL data regarding a certain list of indexed phash-row
1443 *
1444 * @param string List of phash integers
1445 * @param boolean If set, page cache is cleared as well.
1446 * @return void
1447 */
1448 function removeIndexedPhashRow($phashList,$clearPageCache=1) {
1449 $phashRows = t3lib_div::trimExplode(',',$phashList,1);
1450
1451 foreach($phashRows as $phash) {
1452 $phash = intval($phash);
1453 if ($phash>0) {
1454
1455 if ($clearPageCache) {
1456 // Clearing page cache:
1457 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('page_id', 'index_section', 'phash='.intval($phash));
1458 if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
1459 $idList = array();
1460 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1461 $idList[] = $row['page_id'];
1462 }
1463 $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pages', 'page_id IN ('.implode(',',$GLOBALS['TYPO3_DB']->cleanIntArray($idList)).')');
1464 }
1465 }
1466
1467 // Removing old registrations for all tables.
1468 $tableArr = explode(',','index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug');
1469 foreach($tableArr as $table) {
1470 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1471 }
1472
1473 // Did not remove any index_section records for external files where phash_t3 points to this hash!
1474 }
1475 }
1476 }
1477
1478 /**
1479 * Returns an array with gr_list records for a phash
1480 *
1481 * @param integer phash integer to look up on
1482 * @param string gr_list string to filter OUT of the result (first occurence)
1483 * @return array Array of records from index_grlist table
1484 */
1485 function getGrListEntriesForPhash($phash,$gr_list) {
1486 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_grlist', 'phash='.intval($phash));
1487 $lines = array();
1488 $isRemoved = 0;
1489 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1490 if (!$isRemoved && !strcmp($row['gr_list'],$gr_list)) {
1491 $isRemoved = 1;
1492 } else {
1493 $lines[] = $row;
1494 }
1495 }
1496 return $lines;
1497 }
1498
1499 /**
1500 * Setting / Unsetting stopwords
1501 *
1502 * @param array Array of stop-words WIDs with 0/1 to set / unset
1503 * @return void
1504 */
1505 function processStopWords($stopWords) {
1506
1507 if ($GLOBALS['BE_USER']->isAdmin()) {
1508 // Traverse words
1509 foreach($stopWords as $wid => $state) {
1510 $fieldArray = array(
1511 'is_stopword' => $state
1512 );
1513 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_words', 'wid='.$wid, $fieldArray);
1514 }
1515 }
1516 }
1517
1518 /**
1519 * Setting / Unsetting keywords in page header
1520 *
1521 * @param array Page keywords as keys in array with value 0 or 1 for set or unset.
1522 * @param integer The page uid of the header where the keywords are to be set.
1523 * @return void
1524 */
1525 function processPageKeywords($pageKeywords, $pageUid) {
1526
1527 // Get pages current keywords
1528 $pageRec = t3lib_BEfunc::getRecord('pages', $pageUid);
1529 $keywords = array_flip(t3lib_div::trimExplode(',', $pageRec['keywords'], 1));
1530
1531 // Merge keywords:
1532 foreach($pageKeywords as $key => $v) {
1533 if ($v) {
1534 $keywords[$key]=1;
1535 } else {
1536 unset($keywords[$key]);
1537 }
1538 }
1539
1540 // Compile new list:
1541 $data = array();
1542 $data['pages'][$pageUid]['keywords'] = implode(', ',array_keys($keywords));
1543
1544 $tce = t3lib_div::makeInstance('t3lib_TCEmain');
1545 $tce->stripslashes_values = 0;
1546 $tce->start($data,array());
1547 $tce->process_datamap();
1548 }
1549 }
1550
1551
1552
1553 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']) {
1554 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']);
1555 }
1556
1557 ?>