* Added Karsten D.s patches for DBAL.
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / modfunc1 / class.tx_indexedsearch_modfunc1.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 2001-2004 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 *
17 * This script is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * This copyright notice MUST APPEAR in all copies of the script!
23 ***************************************************************/
24 /**
25 * Module extension (addition to function menu) 'Indexed search' for the 'indexed_search' extension.
26 *
27 * @author Kasper Skårhøj <kasperYYYY@typo3.com>
28 */
29 /**
30 * [CLASS/FUNCTION INDEX of SCRIPT]
31 *
32 *
33 *
34 * 110: class tx_indexedsearch_modfunc1 extends t3lib_extobjbase
35 * 124: function modMenu()
36 * 148: function main()
37 *
38 * SECTION: Drawing table of indexed pages
39 * 261: function drawTableOfIndexedPages()
40 * 312: function indexed_info($data, $firstColContent)
41 * 398: function printPhashRow($row,$grouping=0,$extraGrListRows)
42 * 539: function printPhashRowHeader()
43 * 592: function returnNumberOfColumns()
44 *
45 * SECTION: Details display, phash row
46 * 628: function showDetailsForPhash($phash)
47 * 745: function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='')
48 * 794: function listMetaphoneStat($ftrows,$header)
49 * 831: function linkWordDetails($string,$wid)
50 * 843: function linkMetaPhoneDetails($string,$metaphone)
51 * 853: function flagsMsg($flags)
52 *
53 * SECTION: Details display, words / metaphone
54 * 884: function showDetailsForWord($wid)
55 * 943: function showDetailsForMetaphone($metaphone)
56 *
57 * SECTION: Helper functions
58 * 1014: function printRemoveIndexed($phash,$alt)
59 * 1027: function printReindex($resultRow,$alt)
60 * 1042: function linkDetails($string,$phash)
61 * 1051: function linkList()
62 * 1062: function showPageDetails($string,$id)
63 * 1072: function printExtraGrListRows($extraGrListRows)
64 * 1089: function printRootlineInfo($row)
65 * 1123: function makeItemTypeIcon($it,$alt='')
66 * 1148: function utf8_to_currentCharset($string)
67 *
68 * SECTION: Reindexing
69 * 1180: function reindexPhash($phash, $pageId)
70 * 1234: function getUidRootLineForClosestTemplate($id)
71 *
72 * SECTION: Indexing of configurations
73 * 1275: function extraIndexing()
74 * 1384: function indexExtUrlRecursively($url, $depth, $pageId, $rl, $cfgUid)
75 *
76 * SECTION: SQL functions
77 * 1441: function removeIndexedPhashRow($phashList,$clearPageCache=1)
78 * 1478: function getGrListEntriesForPhash($phash,$gr_list)
79 * 1498: function processStopWords($stopWords)
80 * 1518: function processPageKeywords($pageKeywords, $pageUid)
81 *
82 * TOTAL FUNCTIONS: 32
83 * (This index is automatically created/updated by the extension "extdeveval")
84 *
85 */
86
87
88 require_once(PATH_t3lib.'class.t3lib_pagetree.php');
89 require_once(PATH_t3lib.'class.t3lib_extobjbase.php');
90 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php');
91
92
93 // ... all for the rootline!
94 require_once (PATH_t3lib."class.t3lib_page.php");
95 require_once (PATH_t3lib."class.t3lib_tstemplate.php");
96 require_once (PATH_t3lib."class.t3lib_tsparser_ext.php");
97
98 // Keywords mgm:
99 require_once (PATH_t3lib."class.t3lib_tcemain.php");
100
101
102
103 /**
104 * Indexing class for TYPO3 frontend
105 *
106 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
107 * @package TYPO3
108 * @subpackage tx_indexedsearch
109 */
110 class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
111
112 // Internal, dynamic:
113 var $allPhashListed = array(); // phash values accumulations for link to clear all
114 var $external_parsers = array(); // External content parsers - objects set here with file extensions as keys.
115 var $iconFileNameCache = array(); // File extensions - icon map/cache.
116 var $indexerObj; // Indexer object
117
118
119 /**
120 * Initialize menu array internally
121 *
122 * @return void
123 */
124 function modMenu() {
125 global $LANG;
126
127 return array (
128 'depth' => array(
129 0 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_0'),
130 1 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_1'),
131 2 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_2'),
132 3 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_3'),
133 ),
134 'type' => array(
135 0 => 'Overview',
136 1 => 'Technical Details',
137 2 => 'Words and content',
138 3 => 'Indexing'
139 )
140 );
141 }
142
143 /**
144 * Produces main content of the module
145 *
146 * @return string HTML output
147 */
148 function main() {
149 // Initializes the module. Done in this function because we may need to re-initialize if data is submitted!
150 global $SOBE,$BE_USER,$LANG,$BACK_PATH,$TCA_DESCR,$TCA,$CLIENT,$TYPO3_CONF_VARS;
151
152 // Return if no page id:
153 if ($this->pObj->id<=0) return;
154
155 // Initialize max-list items
156 $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100;
157
158 // Processing deletion of phash rows:
159 if (t3lib_div::_GP('deletePhash')) {
160 $this->removeIndexedPhashRow(t3lib_div::_GP('deletePhash'));
161 }
162
163 // Processing stop-words:
164 if (t3lib_div::_POST('_stopwords')) {
165 $this->processStopWords(t3lib_div::_POST('stopWord'));
166 }
167
168 // Processing stop-words:
169 if (t3lib_div::_POST('_pageKeywords')) {
170 $this->processPageKeywords(t3lib_div::_POST('pageKeyword'), t3lib_div::_POST('pageKeyword_pageUid'));
171 }
172
173 // Initialize external document parsers:
174 // Example configuration, see ext_localconf.php of this file!
175 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) {
176 foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) {
177 $this->external_parsers[$extension] = &t3lib_div::getUserObj($_objRef);
178
179 // Init parser and if it returns false, unset its entry again:
180 if (!$this->external_parsers[$extension]->initBackend($extension)) {
181 unset($this->external_parsers[$extension]);
182 }
183 }
184 }
185
186 // Initialize indexer if we need it (metaphone display does...)
187 $this->indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
188
189 // Set CSS styles specific for this document:
190 $this->pObj->content = str_replace('/*###POSTCSSMARKER###*/','
191 TABLE.c-list TR TD { white-space: nowrap; vertical-align: top; }
192 ',$this->pObj->content);
193
194
195 // Check if details for a phash record should be shown:
196 if (t3lib_div::_GET('phash')) {
197
198 // Show title / function menu:
199 $theOutput.=$this->pObj->doc->spacer(5);
200 $theOutput.=$this->pObj->doc->section('Details for a single result row:',$this->showDetailsForPhash(t3lib_div::_GET('phash')),0,1);
201 } elseif (t3lib_div::_GET('wid')) {
202
203 // Show title / function menu:
204 $theOutput.=$this->pObj->doc->spacer(5);
205 $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1);
206 } elseif (t3lib_div::_GET('metaphone')) {
207
208 // Show title / function menu:
209 $theOutput.=$this->pObj->doc->spacer(5);
210 $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1);
211 } elseif (t3lib_div::_GET('reindex')) {
212
213 // Show title / function menu:
214 $theOutput.=$this->pObj->doc->spacer(5);
215 $theOutput.=$this->pObj->doc->section('Reindexing...',$this->reindexPhash(t3lib_div::_GET('reindex'),t3lib_div::_GET('reindex_id')),0,1);
216 } else { // Detail listings:
217 // Depth function menu:
218 $h_func = t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[type]',$this->pObj->MOD_SETTINGS['type'],$this->pObj->MOD_MENU['type'],'index.php');
219 if (t3lib_div::inList('0,1,2',$this->pObj->MOD_SETTINGS['type'])) {
220 $h_func.= t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[depth]',$this->pObj->MOD_SETTINGS['depth'],$this->pObj->MOD_MENU['depth'],'index.php');
221
222 // Show title / function menu:
223 $theOutput.=$this->pObj->doc->spacer(5);
224 $theOutput.=$this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
225
226 $theOutput.=$this->drawTableOfIndexedPages();
227 } else {
228
229 // Show title / function menu:
230 $theOutput.= $this->pObj->doc->spacer(5);
231 $theOutput.= $this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
232
233 $theOutput.= $this->extraIndexing();
234 }
235 }
236
237 return $theOutput;
238 }
239
240
241
242
243
244
245
246
247
248
249
250 /*******************************
251 *
252 * Drawing table of indexed pages
253 *
254 ******************************/
255
256 /**
257 * Produces a table with indexing information for each page.
258 *
259 * @return string HTML output
260 */
261 function drawTableOfIndexedPages() {
262 global $BACK_PATH;
263
264 // Drawing tree:
265 $tree = t3lib_div::makeInstance('t3lib_pageTree');
266 $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1);
267 $tree->init('AND '.$perms_clause);
268
269 $HTML = '<img src="'.$BACK_PATH.t3lib_iconWorks::getIcon('pages',$this->pObj->pageinfo).'" width="18" height="16" align="top" alt="" />';
270 $tree->tree[] = Array(
271 'row' => $this->pObj->pageinfo,
272 'HTML' => $HTML
273 );
274
275 if ($this->pObj->MOD_SETTINGS['depth']) {
276 $tree->getTree($this->pObj->id, $this->pObj->MOD_SETTINGS['depth'], '');
277 }
278
279 // Traverse page tree:
280 $code = '';
281 foreach($tree->tree as $data) {
282 $code.= $this->indexed_info(
283 $data['row'],
284 $data['HTML'].
285 $this->showPageDetails(t3lib_div::fixed_lgd($data['row']['title'], 20),$data['row']['uid'])
286 );
287 }
288
289 if ($code) {
290 $code = '<br/><br/>
291 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
292 $this->printPhashRowHeader().
293 $code.
294 '</table>';
295
296 // Create section to output:
297 $theOutput.=$this->pObj->doc->section('',$code,0,1);
298 } else {
299 $theOutput.=$this->pObj->doc->section('','<br/><br/>'.$this->pObj->doc->icons(1).'There were no indexed pages found in the tree.<br/><br/>',0,1);
300 }
301
302 return $theOutput;
303 }
304
305 /**
306 * Create information table row for a page regarding indexing information.
307 *
308 * @param array Data array for this page
309 * @param string HTML content for first column (page tree icon etc.)
310 * @return string HTML code. (table row)
311 */
312 function indexed_info($data, $firstColContent) {
313
314 // Query:
315 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
316 'ISEC.*, IP.*, count(*) AS count_val',
317 'index_phash IP, index_section ISEC',
318 'IP.phash = ISEC.phash AND ISEC.page_id = '.intval($data['uid']),
319 'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid',
320 'IP.item_type, IP.tstamp',
321 ($this->maxListPerPage+1)
322 );
323
324 // Initialize variables:
325 $rowCount = 0;
326 $lines = array(); // Collecting HTML rows here.
327 $phashAcc = array(); // Collecting phash values (to remove local indexing for)
328 $phashAcc[] = 0;
329
330 // Traverse the result set of phash rows selected:
331 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
332 if ($rowCount == $this->maxListPerPage) {
333 $rowCount++; // Increase to the extra warning row will appear as well.
334 break;
335 }
336
337 // Adds a display row:
338 $lines[$row['phash_grouping']][] = $this->printPhashRow(
339 $row,
340 isset($lines[$row['phash_grouping']]),
341 $this->getGrListEntriesForPhash($row['phash'], $row['gr_list'])
342 );
343 $rowCount++;
344 $phashAcc[] = $row['phash'];
345 $this->allPhashListed[] = $row['phash']; // For removing all shown phash rows.
346 }
347
348 // Compile rows into the table:
349 $out = '';
350 if (count($lines)) {
351 $firstColContent = '<td rowspan="'.$rowCount.'">'.$firstColContent.'</td>';
352 foreach($lines as $rowSet) {
353 foreach($rowSet as $rows) {
354 $out.='
355 <tr class="bgColor-20">'.$firstColContent.implode('',$rows).'</tr>';
356
357 $firstColContent = '';
358 }
359 }
360
361 if ($rowCount > $this->maxListPerPage) { // Now checking greater than, because we increased $rowCount before...
362 $out.='
363 <tr class="bgColor-20">
364 <td>&nbsp;</td>
365 <td colspan="'.($this->returnNumberOfColumns()-1).'">'.$this->pObj->doc->icons(3).'<span class="">There were more than '.$this->maxListPerPage.' rows. <a href="'.htmlspecialchars('index.php?id='.$this->pObj->id.'&listALL=1').'">Click here to list them ALL!</a></span></td>
366 </tr>';
367 }
368 } else {
369 $out.='
370 <tr class="bgColor-20">
371 <td>'.$firstColContent.'</td>
372 <td colspan="'.($this->returnNumberOfColumns()-1).'"><em>Not indexed</em></td>
373 </tr>';
374 }
375
376 // Checking for phash-rows which are NOT joined with the section table:
377 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('IP.*', 'index_phash IP', 'IP.data_page_id = '.intval($data['uid']).' AND IP.phash NOT IN ('.implode(',',$phashAcc).')');
378 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
379 $out.='
380 <tr class="typo3-red">
381 <td colspan="'.$this->returnNumberOfColumns().'"><b>Warning:</b> phash-row "'.$row['phash'].'" didn\'t have a representation in the index_section table!</td>
382 </tr>';
383 $this->allPhashListed[] = $row['phash'];
384 }
385
386 return $out;
387 }
388
389 /**
390 * Render a single row of information about a indexing entry.
391 *
392 * @param array Row from query (combined phash table with sections etc).
393 * @param boolean Set if grouped to previous result; the icon of the element is not shown again.
394 * @param array Array of index_grlist records.
395 * @return array Array of table rows.
396 * @see indexed_info()
397 */
398 function printPhashRow($row,$grouping=0,$extraGrListRows) {
399 $lines = array();
400
401 // Title cell attributes will highlight TYPO3 pages with a slightly darker color (bgColor4) than attached medias. Also IF there are more than one section record for a phash row it will be red as a warning that something is wrong!
402 $titleCellAttribs = $row['count_val']!=1?' bgcolor="red"':($row['item_type']==='0' ? ' class="bgColor4"' : '');
403
404 if ($row['item_type']) {
405 $arr = unserialize($row['cHashParams']);
406 $page = $arr['key'] ? ' ['.$arr['key'].']' : '';
407 } else $page = '';
408 $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd($row['item_title'], 20).$page) : '<em>[No Title]</em>',$row['phash']);
409 $cmdLinks = $this->printRemoveIndexed($row['phash'],'Clear phash-row').$this->printReindex($row,'Re-index element');
410
411 switch($this->pObj->MOD_SETTINGS['type']) {
412 case 1: // Technical details:
413 // Display icon:
414 if (!$grouping) {
415 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
416 } else {
417 $lines[] = '<td>&nbsp;</td>';
418 }
419
420 // Title displayed:
421 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
422
423 // Remove-indexing-link:
424 $lines[] = '<td>'.$cmdLinks.'</td>';
425
426 // Various data:
427 $lines[] = '<td>'.$row['phash'].'</td>';
428 $lines[] = '<td>'.$row['contentHash'].'</td>';
429
430 if ($row['item_type']==='0') {
431 $lines[] = '<td>'.($row['data_page_id'] ? $row['data_page_id'] : '&nbsp;').'</td>';
432 $lines[] = '<td>'.($row['data_page_type'] ? $row['data_page_type'] : '&nbsp;').'</td>';
433 $lines[] = '<td>'.($row['sys_language_uid'] ? $row['sys_language_uid'] : '&nbsp;').'</td>';
434 $lines[] = '<td>'.($row['data_page_mp'] ? $row['data_page_mp'] : '&nbsp;').'</td>';
435 } else {
436 $lines[] = '<td colspan="4">'.htmlspecialchars($row['data_filename']).'</td>';
437 }
438 $lines[] = '<td>'.$row['gr_list'].$this->printExtraGrListRows($extraGrListRows).'</td>';
439 $lines[] = '<td>'.$this->printRootlineInfo($row).'</td>';
440 $lines[] = '<td>'.($row['page_id'] ? $row['page_id'] : '&nbsp;').'</td>';
441 $lines[] = '<td>'.($row['phash_t3']!=$row['phash'] ? $row['phash_t3'] : '&nbsp;').'</td>';
442 $lines[] = '<td>'.($row['freeIndexUid'] ? $row['freeIndexUid'] : '&nbsp;').'</td>';
443 $lines[] = '<td>'.($row['recordUid'] ? $row['recordUid'] : '&nbsp;').'</td>';
444
445
446
447 // cHash parameters:
448 $arr = unserialize($row['cHashParams']);
449 if (is_array($arr)) {
450 $theCHash = $arr['cHash'];
451 unset($arr['cHash']);
452 }
453
454 if ($row['item_type']) { // pdf...
455 $lines[] = '<td>'.($arr['key'] ? 'Page '.$arr['key'] : '').'&nbsp;</td>';
456 } elseif ($row['item_type']==0) {
457 $lines[] = '<td>'.htmlspecialchars(t3lib_div::implodeArrayForUrl('',$arr)).'&nbsp;</td>';
458 } else {
459 $lines[] = '<td class="bgColor">&nbsp;</td>';
460 }
461
462 $lines[] = '<td>'.$theCHash.'</td>';
463 break;
464 case 2: // Words and content:
465 // Display icon:
466 if (!$grouping) {
467 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
468 } else {
469 $lines[] = '<td>&nbsp;</td>';
470 }
471
472 // Title displayed:
473 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
474
475 // Remove-indexing-link:
476 $lines[] = '<td>'.$cmdLinks.'</td>';
477
478 // Query:
479 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
480 '*',
481 'index_fulltext',
482 'phash = '.intval($row['phash'])
483 );
484 $lines[] = '<td style="white-space: normal;">'.
485 t3lib_div::fixed_lgd($this->utf8_to_currentCharset(htmlspecialchars($ftrows[0]['fulltextdata'])),3000).
486 '<hr/><em>Size: '.strlen($ftrows[0]['fulltextdata']).'</em>'.
487 '</td>';
488
489 // Query:
490 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
491 'index_words.baseword, index_rel.*',
492 'index_rel, index_words',
493 'index_rel.phash = '.intval($row['phash']).
494 ' AND index_words.wid = index_rel.wid',
495 '',
496 '',
497 '',
498 'baseword'
499 );
500
501 $wordList = '';
502 if (is_array($ftrows)) {
503 $indexed_words = array_keys($ftrows);
504 sort($indexed_words);
505 $wordList = $this->utf8_to_currentCharset(htmlspecialchars(implode(' ',$indexed_words)));
506 $wordList.='<hr/><em>Count: '.count($indexed_words).'</em>';
507 }
508
509 $lines[] = '<td style="white-space: normal;">'.$wordList.'</td>';
510 break;
511 default: // Overview
512 // Display icon:
513 if (!$grouping) {
514 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
515 } else {
516 $lines[] = '<td>&nbsp;</td>';
517 }
518
519 // Title displayed:
520 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
521
522 // Remove-indexing-link:
523 $lines[] = '<td>'.$cmdLinks.'</td>';
524
525 $lines[] = '<td style="white-space: normal;">'.$this->utf8_to_currentCharset(htmlspecialchars($row['item_description'])).'...</td>';
526 $lines[] = '<td>'.t3lib_div::formatSize($row['item_size']).'</td>';
527 $lines[] = '<td>'.t3lib_BEfunc::dateTimeAge($row['tstamp']).'</td>';
528 break;
529 }
530
531 return $lines;
532 }
533
534 /**
535 * Creates the header row for the table
536 *
537 * @return string HTML string (table row)
538 */
539 function printPhashRowHeader() {
540 $lines = array();
541
542 switch($this->pObj->MOD_SETTINGS['type']) {
543 case 1:
544 $lines[] = '<td>&nbsp;</td>';
545 $lines[] = '<td>&nbsp;</td>';
546 $lines[] = '<td>Title</td>';
547 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
548
549 $lines[] = '<td>pHash</td>';
550 $lines[] = '<td>cHash</td>';
551 $lines[] = '<td>&amp;id</td>';
552 $lines[] = '<td>&amp;type</td>';
553 $lines[] = '<td>&amp;L</td>';
554 $lines[] = '<td>&amp;MP</td>';
555 $lines[] = '<td>grlist</td>';
556 $lines[] = '<td>Rootline</td>';
557 $lines[] = '<td>page_id</td>';
558 $lines[] = '<td>phash_t3</td>';
559 $lines[] = '<td>CfgUid</td>';
560 $lines[] = '<td>RecUid</td>';
561 $lines[] = '<td>GET-parameters</td>';
562 $lines[] = '<td>&amp;cHash</td>';
563 break;
564 case 2:
565 $lines[] = '<td>&nbsp;</td>';
566 $lines[] = '<td>&nbsp;</td>';
567 $lines[] = '<td>Title</td>';
568 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
569 $lines[] = '<td>Content</td>';
570 $lines[] = '<td>Words</td>';
571 break;
572 default:
573 $lines[] = '<td>&nbsp;</td>';
574 $lines[] = '<td>&nbsp;</td>';
575 $lines[] = '<td>Title</td>';
576 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
577 $lines[] = '<td>Description</td>';
578 $lines[] = '<td>Size</td>';
579 $lines[] = '<td>Indexed:</td>';
580 break;
581 }
582
583 $out = '<tr class="tableheader bgColor5">'.implode('',$lines).'</tr>';
584 return $out;
585 }
586
587 /**
588 * Returns the number of columns depending on display type of list
589 *
590 * @return integer Number of columns in list:
591 */
592 function returnNumberOfColumns() {
593 switch($this->pObj->MOD_SETTINGS['type']) {
594 case 1:
595 return 18;
596 break;
597 case 2:
598 return 6;
599 break;
600 default:
601 return 7;
602 break;
603 }
604 }
605
606
607
608
609
610
611
612
613
614
615
616 /*******************************
617 *
618 * Details display, phash row
619 *
620 *******************************/
621
622 /**
623 * Showing details for a particular phash row
624 *
625 * @param integer phash value to display details for.
626 * @return string HTML content
627 */
628 function showDetailsForPhash($phash) {
629
630 $content = '';
631
632 // Selects the result row:
633 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
634 '*',
635 'index_phash',
636 'phash = '.intval($phash)
637 );
638 $phashRecord = $ftrows[0];
639
640 // If found, display:
641 if (is_array($phashRecord)) {
642 $content.= '<h4>phash row content:</h4>'.
643 $this->utf8_to_currentCharset(t3lib_div::view_array($phashRecord));
644
645 // Getting debug information if any:
646 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
647 '*',
648 'index_debug',
649 'phash = '.intval($phash)
650 );
651 if (is_array($ftrows)) {
652 $debugInfo = unserialize($ftrows[0]['debuginfo']);
653 $lexer = $debugInfo['lexer'];
654 unset($debugInfo['lexer']);
655
656 $content.= '<h3>Debug information:</h3>'.
657 $this->utf8_to_currentCharset(t3lib_div::view_array($debugInfo));
658
659 $content.= '<h4>Debug information / lexer splitting:</h4>'.
660 '<hr/><b>'.
661 $this->utf8_to_currentCharset($lexer).
662 '</b><hr/>';
663 }
664
665
666
667 $content.='<h3>Word statistics</h3>';
668
669 // Finding all words for this phash:
670 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
671 'index_words.*, index_rel.*',
672 'index_rel, index_words',
673 'index_rel.phash = '.intval($phash).
674 ' AND index_words.wid = index_rel.wid',
675 '',
676 'index_words.baseword',
677 ''
678 );
679 $pageRec = t3lib_BEfunc::getRecord('pages', $phashRecord['data_page_id']);
680 $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin();
681 $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec);
682
683 // Group metaphone hash:
684 $metaphone = array();
685 foreach($ftrows as $row) {
686 $metaphone[$row['metaphone']][] = $row['baseword'];
687 }
688 $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:');
689
690 // Finding top-20 on frequency for this phash:
691 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
692 'index_words.baseword, index_words.metaphone, index_rel.*',
693 'index_rel, index_words',
694 'index_rel.phash = '.intval($phash).
695 ' AND index_words.wid = index_rel.wid',
696 '',
697 'index_rel.freq DESC',
698 '20'
699 );
700 $content.= $this->listWords($ftrows, 'Top-20 words by frequency:');
701
702 // Finding top-20 on count for this phash:
703 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
704 'index_words.baseword, index_words.metaphone, index_rel.*',
705 'index_rel, index_words',
706 'index_rel.phash = '.intval($phash).
707 ' AND index_words.wid = index_rel.wid',
708 '',
709 'index_rel.count DESC',
710 '20'
711 );
712 $content.= $this->listWords($ftrows, 'Top-20 words by count:');
713
714
715 $content.='<h3>Section records for this phash</h3>';
716
717 // Finding sections for this record:
718 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
719 '*',
720 'index_section',
721 'index_section.phash = '.intval($phash),
722 '',
723 '',
724 ''
725 );
726 $content.= t3lib_div::view_array($ftrows);
727
728 // Add go-back link:
729 $content = $this->linkList().$content.$this->linkList();
730
731 } else $content.= 'Error: No phash row found';
732
733 return $content;
734 }
735
736 /**
737 * Create table with list of words from $ftrows
738 *
739 * @param array Array of records selected from index_rel/index_words
740 * @param string Header string to show before table.
741 * @param boolean If set, the stopWord checkboxes will be shown in the word list. Only for admins. (because it is a global setting, not per-site).
742 * @param array The page record from which to load the keywords, if any.
743 * @return string HTML table
744 */
745 function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='') {
746
747 // Prepare keywords:
748 $keywords = is_array($page) ? array_flip(t3lib_div::trimExplode(',',$page['keywords'], 1)) : '';
749
750 // Render list:
751 $trows = '';
752 $trows.= '
753 <tr class="tableheader bgColor5">
754 '.($stopWordBoxes ? '<td>'.htmlspecialchars('Stopword:').'</td>' : '').'
755 <td>'.htmlspecialchars('Word:').'</td>
756 <td>'.htmlspecialchars('Count:').'</td>
757 <td>'.htmlspecialchars('First:').'</td>
758 <td>'.htmlspecialchars('Frequency:').'</td>
759 <td>'.htmlspecialchars('Flags:').'</td>
760 '.(is_array($keywords) ? '<td>'.htmlspecialchars('Page keyword:').'</td>' : '').'
761 </tr>
762 ';
763 foreach($ftrows as $row) {
764 $trows.= '
765 <tr class="'.($row['is_stopword'] ? 'bgColor' : 'bgColor4').'">
766 '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'><input type="hidden" name="stopWord['.$row['wid'].']" value="0" /><input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').'
767 <td>'.$this->linkWordDetails($this->utf8_to_currentCharset(htmlspecialchars($row['baseword'])), $row['wid']).'</td>
768 <td>'.htmlspecialchars($row['count']).'</td>
769 <td>'.htmlspecialchars($row['first']).'</td>
770 <td>'.htmlspecialchars($row['freq']).'</td>
771 <td>'.htmlspecialchars($this->flagsMsg($row['flags'])).'</td>
772 '.(is_array($keywords) ? '<td align="center"'.(isset($keywords[$row['baseword']]) ? ' class="bgColor2"' : '').'><input type="hidden" name="pageKeyword['.$row['baseword'].']" value="0" /><input type="checkbox" name="pageKeyword['.$row['baseword'].']" value="1"'.(isset($keywords[$row['baseword']])?'checked="checked"':'').' /></td>' : '').'
773 </tr>
774 ';
775 }
776
777 return '<h4>'.htmlspecialchars($header).'</h4>'.
778 '
779 <table border="0" cellspacing="1" cellpadding="2" class="c-list">
780 '.$trows.'
781 </table>'.
782 ($stopWordBoxes ? '<input type="submit" value="Change stop-word settings" name="_stopwords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" />' : '').
783 (is_array($keywords) ? '<input type="submit" value="Set page keywords" name="_pageKeywords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" /><input type="hidden" name="pageKeyword_pageUid" value="'.$page['uid'].'" />'.
784 '<br/>Current keywords are: <em>'.htmlspecialchars(implode(', ',array_keys($keywords))).'</em>' : '');
785 }
786
787 /**
788 * Displays table of metaphone groups larger than 1
789 *
790 * @param array Result from word selection (index_rel/index_words)
791 * @param string Header string
792 * @return string HTML table
793 */
794 function listMetaphoneStat($ftrows,$header) {
795
796 $trows = '';
797 $trows.= '
798 <tr class="tableheader bgColor5">
799 <td>'.htmlspecialchars('Metaphone:').'</td>
800 <td>'.htmlspecialchars('Hash:').'</td>
801 <td>'.htmlspecialchars('Count:').'</td>
802 <td>'.htmlspecialchars('Words:').'</td>
803 </tr>
804 ';
805 foreach($ftrows as $metaphone => $words) {
806 if (count($words)>1) {
807 $trows.= '
808 <tr class="bgColor4">
809 <td>'.$this->linkMetaPhoneDetails($this->indexerObj->metaphone($words[0],1),$metaphone).'</td>
810 <td>'.htmlspecialchars($metaphone).'</td>
811 <td>'.htmlspecialchars(count($words)).'</td>
812 <td style="white-space: normal;">'.$this->utf8_to_currentCharset(htmlspecialchars(implode(', ',$words))).'</td>
813 </tr>
814 ';
815 }
816 }
817
818 return '<h4>'.htmlspecialchars($header).'</h4>'.
819 '<table border="0" cellspacing="1" cellpadding="2" class="c-list">
820 '.$trows.'
821 </table>';
822 }
823
824 /**
825 * Wraps input string in a link that will display details for the word. Eg. which other pages has the word, metaphone associations etc.
826 *
827 * @param string String to wrap, possibly a title or so.
828 * @param integer wid value to show details for
829 * @return string Wrapped string
830 */
831 function linkWordDetails($string,$wid) {
832 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('wid'=>$wid,'phash'=>''))).'">'.$string.'</a>';
833 }
834
835
836 /**
837 * Wraps input string in a link to see more details for metaphone value
838 *
839 * @param string String to wrap
840 * @param integer Metaphone value
841 * @return string Wrapped string
842 */
843 function linkMetaPhoneDetails($string,$metaphone) {
844 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('metaphone'=>$metaphone,'wid'=>'','phash'=>''))).'">'.$string.'</a>';
845 }
846
847 /**
848 * Creates message for flag value
849 *
850 * @param integer Flags integer
851 * @return string Message string
852 */
853 function flagsMsg($flags) {
854 if ($flags > 0) {
855 return
856 ($flags & 128 ? '<title>' : ''). // pow(2,7)
857 ($flags & 64 ? '<meta/keywords>' : ''). // pow(2,6)
858 ($flags & 32 ? '<meta/description>' : ''). // pow(2,5)
859 ' ('.$flags.')';
860 }
861 }
862
863
864
865
866
867
868
869
870
871
872 /*******************************
873 *
874 * Details display, words / metaphone
875 *
876 *******************************/
877
878 /**
879 * Show details for words
880 *
881 * @param integer Word ID (wid)
882 * @return string HTML content
883 */
884 function showDetailsForWord($wid) {
885
886 // Select references to this word
887 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
888 'index_phash.*, index_section.*, index_rel.*',
889 'index_rel, index_section, index_phash',
890 'index_rel.wid = '.intval($wid).
891 ' AND index_rel.phash = index_section.phash'.
892 ' AND index_section.phash = index_phash.phash',
893 '',
894 'index_rel.freq DESC',
895 ''
896 );
897
898 // Headers:
899 $content.='
900 <tr class="tableheader bgColor5">
901 <td>phash</td>
902 <td>page_id</td>
903 <td>data_filename</td>
904 <td>count</td>
905 <td>first</td>
906 <td>freq</td>
907 <td>flags</td>
908 </tr>';
909
910 if (is_array($ftrows)) {
911 foreach($ftrows as $wDat) {
912 $content.='
913 <tr class="bgColor4">
914 <td>'.$this->linkDetails(htmlspecialchars($wDat['phash']),$wDat['phash']).'</td>
915 <td>'.htmlspecialchars($wDat['page_id']).'</td>
916 <td>'.htmlspecialchars($wDat['data_filename']).'</td>
917 <td>'.htmlspecialchars($wDat['count']).'</td>
918 <td>'.htmlspecialchars($wDat['first']).'</td>
919 <td>'.htmlspecialchars($wDat['freq']).'</td>
920 <td>'.htmlspecialchars($wDat['flags']).'</td>
921 </tr>';
922 }
923 }
924
925 // Compile table:
926 $content = '
927 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
928 $content.'
929 </table>';
930
931 // Add go-back link:
932 $content = $content.$this->linkList();
933
934 return $content;
935 }
936
937 /**
938 * Show details for metaphone value
939 *
940 * @param integer Metaphone integer hash
941 * @return string HTML content
942 */
943 function showDetailsForMetaphone($metaphone) {
944
945 // Finding top-20 on frequency for this phash:
946 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
947 'index_words.*',
948 'index_words',
949 'index_words.metaphone = '.intval($metaphone),
950 '',
951 'index_words.baseword',
952 ''
953 );
954
955 if (count($ftrows)) {
956 $content.='<h4>Metaphone: '.$this->indexerObj->metaphone($ftrows[0]['baseword'],1).'</h4>';
957
958 $content.='
959 <tr class="tableheader bgColor5">
960 <td>Word</td>
961 <td>Is stopword?</td>
962 </tr>';
963
964 if (is_array($ftrows)) {
965 foreach($ftrows as $wDat) {
966 $content.='
967 <tr class="bgColor4">
968 <td>'.$this->linkWordDetails(htmlspecialchars($wDat['baseword']),$wDat['wid']).'</td>
969 <td>'.htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No').'</td>
970 </tr>';
971 }
972 }
973
974 $content = '
975 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
976 $content.'
977 </table>';
978
979 if ($this->indexerObj->metaphone($ftrows[0]['baseword'])!=$metaphone) {
980 $content.='ERROR: Metaphone string and hash did not match for some reason!?';
981 }
982
983 // Add go-back link:
984 $content = $content.$this->linkList();
985 }
986
987 return $content;
988 }
989
990
991
992
993
994
995
996
997
998
999
1000
1001 /*******************************
1002 *
1003 * Helper functions
1004 *
1005 *******************************/
1006
1007 /**
1008 * Creates icon which clears indexes for a certain list of phash values.
1009 *
1010 * @param string List of phash integers
1011 * @param string Alt-text for the garbage bin icon.
1012 * @return string HTML img-tag with link around.
1013 */
1014 function printRemoveIndexed($phash,$alt) {
1015 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('deletePhash'=>$phash))).'">'.
1016 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/garbage.gif" width="11" hspace="1" vspace="2" height="12" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
1017 '</a>';
1018 }
1019
1020 /**
1021 * Button for re-indexing of documents
1022 *
1023 * @param array phash table result row.
1024 * @param string Title attribute text for icon
1025 * @return string HTML content; Icon wrapped in link.
1026 */
1027 function printReindex($resultRow,$alt) {
1028 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') {
1029 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('reindex'=>$resultRow['phash'],'reindex_id'=>$resultRow['page_id']))).'">'.
1030 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/refresh_n.gif" width="14" hspace="1" vspace="2" height="14" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
1031 '</a>';
1032 }
1033 }
1034
1035 /**
1036 * Wraps input string in a link that will display details for the phash value set.
1037 *
1038 * @param string String to wrap, possibly a title or so.
1039 * @param integer phash value to show details for
1040 * @return string Wrapped string
1041 */
1042 function linkDetails($string,$phash) {
1043 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('phash'=>$phash))).'">'.$string.'</a>';
1044 }
1045
1046 /**
1047 * Creates link back to listing
1048 *
1049 * @return string Link back to list
1050 */
1051 function linkList() {
1052 return '<br/><a href="index.php?id='.$this->pObj->id.'">Back to list.</a><br/>';
1053 }
1054
1055 /**
1056 * Wraps input string in a link that will display details for the phash value set.
1057 *
1058 * @param string String to wrap, possibly a title or so.
1059 * @param integer phash value to show details for
1060 * @return string Wrapped string
1061 */
1062 function showPageDetails($string,$id) {
1063 return '<a href="'.htmlspecialchars('index.php?id='.$id.'&SET[depth]=0&SET[type]=1').'">'.$string.'</a>';
1064 }
1065
1066 /**
1067 * Prints the gr_lists attached to a indexed entry.
1068 *
1069 * @param array Array of index_grlist records
1070 * @return string HTML code.
1071 */
1072 function printExtraGrListRows($extraGrListRows) {
1073 if (count($extraGrListRows)) {
1074 reset($extraGrListRows);
1075 $lines=array();
1076 while(list(,$r)=each($extraGrListRows)) {
1077 $lines[] = $r['gr_list'];
1078 }
1079 return "<br/>".$GLOBALS['TBE_TEMPLATE']->dfw(implode('<br/>',$lines));
1080 }
1081 }
1082
1083 /**
1084 * Print path for indexing
1085 *
1086 * @param array Result row with content from index_section
1087 * @return string Rootline information
1088 */
1089 function printRootlineInfo($row) {
1090 $uidCollection = array();
1091
1092 if ($row['rl0']) {
1093 $uidCollection[0] = $row['rl0'];
1094 if ($row['rl1']) {
1095 $uidCollection[1] = $row['rl1'];
1096 if ($row['rl2']) {
1097 $uidCollection[2] = $row['rl2'];
1098
1099 // Additional levels:
1100 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) {
1101 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
1102 if ($row[$fieldName]) {
1103 $uidCollection[$rootLineLevel] = $row[$fieldName];
1104 }
1105 }
1106 }
1107 }
1108 }
1109 }
1110
1111 // Return root line.
1112 ksort($uidCollection);
1113 return implode('/',$uidCollection);
1114 }
1115
1116 /**
1117 * Return icon for file extension
1118 *
1119 * @param string File extension / item type
1120 * @param string Title attribute value in icon.
1121 * @return string <img> tag for icon
1122 */
1123 function makeItemTypeIcon($it,$alt='') {
1124 if (!isset($this->iconFileNameCache[$it])) {
1125 if ($it==='0') {
1126 $icon = 'EXT:indexed_search/pi/res/pages.gif';
1127 } elseif ($this->external_parsers[$it]) {
1128 $icon = $this->external_parsers[$it]->getIcon($it);
1129 }
1130
1131 $fullPath = t3lib_div::getFileAbsFileName($icon);
1132
1133 if ($fullPath) {
1134 $info = @getimagesize($fullPath);
1135 $iconPath = $GLOBALS['BACK_PATH'].'../'.substr($fullPath,strlen(PATH_site));
1136 $this->iconFileNameCache[$it] = is_array($info) ? '<img src="'.$iconPath.'" '.$info[3].' title="'.htmlspecialchars($alt).'" alt="" />' : '';
1137 }
1138 }
1139 return $this->iconFileNameCache[$it];
1140 }
1141
1142 /**
1143 * Converts the input string from utf-8 to the backend charset.
1144 *
1145 * @param string String to convert (utf-8)
1146 * @return string Converted string (backend charset if different from utf-8)
1147 */
1148 function utf8_to_currentCharset($string) {
1149 global $LANG;
1150 if ($LANG->charSet != 'utf-8') {
1151 $string = $LANG->csConvObj->utf8_decode($string, $LANG->charSet, TRUE);
1152 }
1153 return $string;
1154 }
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167 /********************************
1168 *
1169 * Reindexing
1170 *
1171 *******************************/
1172
1173 /**
1174 * Re-indexing files/records attached to a page.
1175 *
1176 * @param integer Phash value
1177 * @param integer The page uid for the section record (file/url could appear more than one place you know...)
1178 * @return string HTML content
1179 */
1180 function reindexPhash($phash, $pageId) {
1181
1182 // Query:
1183 list($resultRow) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1184 'ISEC.*, IP.*',
1185 'index_phash IP, index_section ISEC',
1186 'IP.phash = ISEC.phash
1187 AND IP.phash = '.intval($phash).'
1188 AND ISEC.page_id = '.intval($pageId)
1189 );
1190
1191 $content = '';
1192 if (is_array($resultRow)) {
1193 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') {
1194
1195 // (Re)-Indexing file on page.
1196 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1197 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $this->getUidRootLineForClosestTemplate($pageId));
1198
1199 // URL or local file:
1200 if ($resultRow['externalUrl']) {
1201 $indexerObj->indexExternalUrl($resultRow['data_filename']);
1202 } else {
1203 $indexerObj->indexRegularDocument($resultRow['data_filename'], TRUE);
1204 }
1205
1206 if ($indexerObj->file_phash_arr['phash'] != $resultRow['phash']) {
1207 $content.= 'ERROR: phash ('.$indexerObj->file_phash_arr['phash'].') did NOT match '.$resultRow['phash'].' for strange reasons!';
1208 }
1209
1210 $content.='<h4>Log for re-indexing of "'.htmlspecialchars($resultRow['data_filename']).'":</h4>';
1211 $content.=t3lib_div::view_array($indexerObj->internal_log);
1212
1213 $content.='<h4>Hash-array, page:</h4>';
1214 $content.=t3lib_div::view_array($indexerObj->hash);
1215
1216 $content.='<h4>Hash-array, file:</h4>';
1217 $content.=t3lib_div::view_array($indexerObj->file_phash_arr);
1218 }
1219 }
1220
1221 // Link back to list.
1222 $content.= $this->linkList();
1223
1224 return $content;
1225 }
1226
1227 /**
1228 * Get rootline for closest TypoScript template root.
1229 * Algorithm same as used in Web > Template, Object browser
1230 *
1231 * @param integer The page id to traverse rootline back from
1232 * @return array Array where the root lines uid values are found.
1233 */
1234 function getUidRootLineForClosestTemplate($id) {
1235 $tmpl = t3lib_div::makeInstance("t3lib_tsparser_ext"); // Defined global here!
1236 $tmpl->tt_track = 0; // Do not log time-performance information
1237 $tmpl->init();
1238
1239 // Gets the rootLine
1240 $sys_page = t3lib_div::makeInstance("t3lib_pageSelect");
1241 $rootLine = $sys_page->getRootLine($id);
1242 $tmpl->runThroughTemplates($rootLine,0); // This generates the constants/config + hierarchy info for the template.
1243
1244 // Root line uids
1245 $rootline_uids = array();
1246 foreach($tmpl->rootLine as $rlkey => $rldat) {
1247 $rootline_uids[$rlkey] = $rldat['uid'];
1248 }
1249
1250 return $rootline_uids;
1251 }
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264 /********************************
1265 *
1266 * Indexing of configurations
1267 *
1268 *******************************/
1269
1270 /**
1271 * [Describe function...]
1272 *
1273 * @return [type] ...
1274 */
1275 function extraIndexing() {
1276
1277 // Select index configurations on this page
1278 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1279 '*',
1280 'index_config',
1281 'pid = '.intval($this->pObj->id).
1282 ' AND hidden=0'.
1283 ' AND starttime<'.time()
1284 );
1285
1286
1287 $rl = $this->getUidRootLineForClosestTemplate($this->pObj->id);
1288
1289 foreach($ftrows as $cfgRow) {
1290 switch($cfgRow['type']) {
1291 case 1:
1292 if ($cfgRow['table2index'] && isset($GLOBALS['TCA'][$cfgRow['table2index']])) {
1293
1294 // Init:
1295 $pid = intval($cfgRow['alternative_source_pid']) ? intval($cfgRow['alternative_source_pid']) : $this->pObj->id;
1296 $fieldList = t3lib_div::trimExplode(',',$cfgRow['fieldlist'],1);
1297
1298 // Select
1299 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
1300 '*',
1301 $cfgRow['table2index'],
1302 'pid = '.intval($pid)
1303 );
1304
1305 // Traverse:
1306 foreach($recs as $r) {
1307 // (Re)-Indexing a row from a table:
1308 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1309 parse_str(str_replace('###UID###',$r['uid'],$cfgRow['get_params']),$GETparams);
1310 $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl, $GETparams, $cfgRow['chashcalc'] ? TRUE : FALSE);
1311 $indexerObj->backend_setFreeIndexUid($cfgRow['uid']);
1312
1313 foreach($fieldList as $k => $v) {
1314 if (!$k) {
1315 $theTitle = $r[$v];
1316 } else {
1317 $theContent.= $r[$v].' ';
1318 }
1319 }
1320 $indexerObj->backend_indexAsTYPO3Page(
1321 $theTitle,
1322 '',
1323 '',
1324 $theContent,
1325 $GLOBALS['LANG']->charSet,
1326 $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['tstamp']],
1327 $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['crdate']],
1328 $r['uid']
1329 );
1330
1331 }
1332 debug($recs);
1333 }
1334 break;
1335 case 2:
1336 $readpath = $cfgRow['filepath'];
1337 if (!t3lib_div::isAbsPath($readPath)) {
1338 $readpath = t3lib_div::getFileAbsFileName($readpath);
1339 }
1340 debug($readpath,'$readpath');
1341
1342 if (t3lib_div::isAllowedAbsPath($readpath)) {
1343 $extList = implode(',',t3lib_div::trimExplode(',',$cfgRow['extensions'],1));
1344 $fileArr = array();
1345 $files = t3lib_div::getAllFilesAndFoldersInPath($fileArr,$readpath,$extList,0,$cfgRow['depth']);
1346 $files = t3lib_div::removePrefixPathFromList($files,PATH_site);
1347 debug($files);
1348 foreach($files as $path) {
1349 // (Re)-Indexing file on page.
1350 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1351 $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl);
1352 $indexerObj->backend_setFreeIndexUid($cfgRow['uid']);
1353 $indexerObj->hash['phash'] = -1; // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
1354
1355 $indexerObj->indexRegularDocument($path, TRUE);
1356
1357 debug($indexerObj->internal_log,$resultRow['data_filename']);
1358 debug($indexerObj->file_phash_arr,'file_phash_arr');
1359 debug($indexerObj->hash,'hash');
1360
1361 }
1362 }
1363 break;
1364 case 3:
1365 if ($cfgRow['externalUrl']) {
1366 $this->indexExtUrlRecursively($cfgRow['externalUrl'], $cfgRow['depth'], $this->pObj->id, $rl, $cfgRow['uid']);
1367 }
1368 break;
1369 }
1370 }
1371 }
1372
1373 /**
1374 * Indexing URL recursively
1375 * Still needs some work; eg. paramters to type, language, MP var is not passed yet...
1376 *
1377 * @param string URL, http://....
1378 * @param integer Depth of recursion. 0 (zero) = only input URL
1379 * @param integer Page id to relate indexing to.
1380 * @param array Rootline array to relate indexing to
1381 * @param integer Configuration UID
1382 * @return void
1383 */
1384 function indexExtUrlRecursively($url, $depth, $pageId, $rl, $cfgUid) {
1385
1386 // Index external URL:
1387 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
1388 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
1389 $indexerObj->backend_setFreeIndexUid($cfgUid);
1390
1391 $indexerObj->indexExternalUrl($url);
1392 $url_qParts = parse_url($url);
1393
1394 // Recursion:
1395 if ($depth>0) {
1396 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
1397
1398 // Traverse links:
1399 foreach($list as $count => $linkInfo) {
1400
1401 // Decode entities:
1402 $linkSource = t3lib_div::htmlspecialchars_decode($linkInfo['href']);
1403
1404 $qParts = parse_url($linkSource);
1405 if (!$qParts['scheme']) {
1406 $linkSource = $url_qParts['scheme'].'://'.$url_qParts['host'].'/'.$linkSource;
1407 }
1408
1409 $this->indexExtUrlRecursively($linkSource, $depth-1, $pageId, $rl, $cfgUid);
1410
1411 // Temporary limit until we know how to handle hundreds of URLs with limited parsetime in PHP...
1412 if ($count>3) break;
1413 }
1414 }
1415 }
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428 /********************************
1429 *
1430 * SQL functions
1431 *
1432 *******************************/
1433
1434 /**
1435 * Removes ALL data regarding a certain list of indexed phash-row
1436 *
1437 * @param string List of phash integers
1438 * @param boolean If set, page cache is cleared as well.
1439 * @return void
1440 */
1441 function removeIndexedPhashRow($phashList,$clearPageCache=1) {
1442 $phashRows = t3lib_div::trimExplode(',',$phashList,1);
1443
1444 foreach($phashRows as $phash) {
1445 $phash = intval($phash);
1446 if ($phash>0) {
1447
1448 if ($clearPageCache) {
1449 // Clearing page cache:
1450 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('page_id', 'index_section', 'phash='.intval($phash));
1451 if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
1452 $idList = array();
1453 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1454 $idList[] = $row['page_id'];
1455 }
1456 $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pages', 'page_id IN ('.implode(',',$GLOBALS['TYPO3_DB']->cleanIntArray($idList)).')');
1457 }
1458 }
1459
1460 // Removing old registrations for all tables.
1461 $tableArr = explode(',','index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug');
1462 foreach($tableArr as $table) {
1463 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1464 }
1465
1466 // Did not remove any index_section records for external files where phash_t3 points to this hash!
1467 }
1468 }
1469 }
1470
1471 /**
1472 * Returns an array with gr_list records for a phash
1473 *
1474 * @param integer phash integer to look up on
1475 * @param string gr_list string to filter OUT of the result (first occurence)
1476 * @return array Array of records from index_grlist table
1477 */
1478 function getGrListEntriesForPhash($phash,$gr_list) {
1479 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_grlist', 'phash='.intval($phash));
1480 $lines = array();
1481 $isRemoved = 0;
1482 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1483 if (!$isRemoved && !strcmp($row['gr_list'],$gr_list)) {
1484 $isRemoved = 1;
1485 } else {
1486 $lines[] = $row;
1487 }
1488 }
1489 return $lines;
1490 }
1491
1492 /**
1493 * Setting / Unsetting stopwords
1494 *
1495 * @param array Array of stop-words WIDs with 0/1 to set / unset
1496 * @return void
1497 */
1498 function processStopWords($stopWords) {
1499
1500 if ($GLOBALS['BE_USER']->isAdmin()) {
1501 // Traverse words
1502 foreach($stopWords as $wid => $state) {
1503 $fieldArray = array(
1504 'is_stopword' => $state
1505 );
1506 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_words', 'wid='.$wid, $fieldArray);
1507 }
1508 }
1509 }
1510
1511 /**
1512 * Setting / Unsetting keywords in page header
1513 *
1514 * @param array Page keywords as keys in array with value 0 or 1 for set or unset.
1515 * @param integer The page uid of the header where the keywords are to be set.
1516 * @return void
1517 */
1518 function processPageKeywords($pageKeywords, $pageUid) {
1519
1520 // Get pages current keywords
1521 $pageRec = t3lib_BEfunc::getRecord('pages', $pageUid);
1522 $keywords = array_flip(t3lib_div::trimExplode(',', $pageRec['keywords'], 1));
1523
1524 // Merge keywords:
1525 foreach($pageKeywords as $key => $v) {
1526 if ($v) {
1527 $keywords[$key]=1;
1528 } else {
1529 unset($keywords[$key]);
1530 }
1531 }
1532
1533 // Compile new list:
1534 $data = array();
1535 $data['pages'][$pageUid]['keywords'] = implode(', ',array_keys($keywords));
1536
1537 $tce = t3lib_div::makeInstance('t3lib_TCEmain');
1538 $tce->stripslashes_values = 0;
1539 $tce->start($data,array());
1540 $tce->process_datamap();
1541 }
1542 }
1543
1544
1545
1546 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']) {
1547 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']);
1548 }
1549
1550 ?>