* Closed Indexed Search before 3.8.0 launch: Despite my hopes I had to significantly...
authorKasper Skårhøj <kasper@typo3.org>
Mon, 9 May 2005 16:09:12 +0000 (16:09 +0000)
committerKasper Skårhøj <kasper@typo3.org>
Mon, 9 May 2005 16:09:12 +0000 (16:09 +0000)
git-svn-id: https://svn.typo3.org/TYPO3v4/Core/trunk@715 709f56b5-9817-0410-a4d7-c38de5d9e867

ChangeLog
typo3/sysext/cms/tslib/class.tslib_content.php
typo3/sysext/indexed_search/class.doublemetaphone.php
typo3/sysext/indexed_search/class.indexer.php
typo3/sysext/indexed_search/doc/TODO.txt
typo3/sysext/indexed_search/ext_tables.php
typo3/sysext/indexed_search/ext_typoscript_setup.txt
typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php
typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php

index fc059df..d720c78 100755 (executable)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2005-05-09  Kasper Skårhøj,,,  <kasper@typo3.com>
+
+       * Closed Indexed Search before 3.8.0 launch: Despite my hopes I had to significantly lower my ambitions for this release; I couldn't find time to complete the overhaul but had to just patch up any immediate problems and make it work for the 3.8.0 release. The TODO list inside is updated with the remaining tasks which is due for 4.0.0 late summer. If "Indexing Configurations" supporting indexing of external files, URLs and individual records have been mentioned earlier in this changelog or on the featurelist I can inform that the incomplete implementation has been disabled and postponed for 4.0.
+
 2005-05-04  Kasper Skårhøj,,,  <kasper@typo3.com>
 
        * Added Hindi language
index 5979e50..f46e928 100755 (executable)
@@ -5117,10 +5117,7 @@ class tslib_cObj {
                }
                if (is_array($urlParameters))   {
                        if (count($urlParameters))      {
-                               reset($urlParameters);
-                               while(list($k,$v)=each($urlParameters)) {
-                                       $conf['additionalParams'].='&'.$k.'='.rawurlencode($v);
-                               }
+                               $conf['additionalParams'].= t3lib_div::implodeArrayForUrl('',$urlParameters);
                        }
                } else {
                        $conf['additionalParams'].=$urlParameters;
index 856218f..13a290c 100755 (executable)
@@ -54,7 +54,7 @@ class user_DoubleMetaPhone
 //  methods
 
                // TYPO3 specific API to this class. BEGIN
-       function metaphone($string)     {
+       function metaphone($string,$sys_language_uid=0) {
                $res = $this->DoubleMetaPhone($string);
                #debug(array($string,$res['primary']));
                return $res['primary'];
index bc9db36..d516bbb 100755 (executable)
@@ -443,6 +443,7 @@ class tx_indexedsearch_indexer {
                        // Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
                if ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) {
                        $this->metaphoneObj = &t3lib_div::getUserObj($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']);
+                       $this->metaphoneObj->pObj = &$this;
                }
 
                        // Init charset class:
@@ -1219,7 +1220,7 @@ class tx_indexedsearch_indexer {
        function metaphone($word,$retRaw=FALSE) {
 
                if (is_object($this->metaphoneObj))     {
-                       $tmp = $this->metaphoneObj->metaphone($word);
+                       $tmp = $this->metaphoneObj->metaphone($word, $this->conf['sys_language_uid']);
                } else {
                        $tmp = metaphone($word);
                }
index 2d347c4..f8e2539 100755 (executable)
@@ -1,51 +1,37 @@
-Visning af indhold i backend module skal konverteres fra utf-8 til lokalt charset.
-
+***************
+TODO / projects:
+*****************
 
+Bugs / Issues:
+- The checkbox "No Search" in the page header is only respected by indexed_search during indexing! (A page will not be indexed when "No Search" is set). However when searching results are not filtered based on this flag - so if a page is indexed before the no search flag is set it will be found in search results. To change this is hard because the getTreeList() function that fetches all page ids cannot take a where-clause to filter it out but must have hardcoded support. Alternatively the pages table must be joined into the search result so we can select on the field. A solution is still not agreed upon.
+- For tt_news with access restricted records: don't show the title of page since it can reveal information
+       - SOLUTIONS: Maybe just hide search results where "resume" is normally just not shown?
+- When there is a page where *content* is access restricted (eg. from a plugin) while the page itself is not, TYPO3 will still display it as a search result (not the description of course, but the title will be revealed); there should be a flag that the plugin can set so the indexer knows that the page as a whole should be indexed as if it was completely access restricted. Or maybe _all_ search results which are NOT indexed under "0,-1" should be hidden? (Reported by Lars Houmark <lars@houmark.com>)
+- Seems that external media / languages are implemented buggy. See mail from Gert Thiel <GertThiel@gmx.net>, 24/2 2005
 
+Errors encountered after spidering, maybe check:
+- testsite: "message appears" - viste external media som IKKE var indekseret!?
+- typo3site_live: Warning: phash-row "114682730" didn't have a representation in the index_section table! on references page!
 
-<diverse>
 Search test:
-- "message appears" - viste external media som IKKE var indekseret!?
 - external media respect privacy of pages?
 - external media on multiple pages with DIFFERENT languages?
-- checkResume: Should it also check for gr_list "0,-1"?
-- case-sensitivity?
-- Warning: phash-row "114682730" didn't have a representation in the index_section table! on references page! (typo3site_live)
-Example på alternativ søgnings SQL
-XHTML i frontend?
-
-Backend modules:
-- Proper skinning? / getLL? / XHTML
-
-Implement stop-word setting in: ""Top-20 words by count:" and a list seperate from that (in main module?)
-- Display filters:
-       - The checkbox "No Search" in the page header is only respected by indexed_search during indexing! (A page will not be indexed when "No Search" is set). However when searching results are not filtered based on this flag - so if a page is indexed before the no search flag is set it will be found in search results. To change this is hard because the getTreeList() function that fetches all page ids cannot take a where-clause to filter it out but must have hardcoded support. Alternatively the pages table must be joined into the search result so we can select on the field. A solution is still not agreed upon.
-       - For tt_news with access restricted records: don't show the title of page since it can reveal information
-               SOLUTIONS: Maybe just hide search results where "resume" is normally just not shown?
-</diverse>
 
-Unittest for t3lib_cs converting Euc/shift_jis
+Templating / Display in plugin:
+- Templating
+       - with new Template API?
+       - Still need to put a group together.
+- Support for FE visning af resultater i extra niveaer (ud over niv. 1,2 som er hardcoded)?
+- Configurable language parameter (hardcoded to "L" now)
 
-- Søgning i tabeller (reg. tabeller!)
+Indexing configurations (temporarily disabled):
        - Alternative presentationer af når records er indexerede.
        - incl. meta-data?
-       - Tabelvælger som en del af sektionsvælgeren
-- Support for visning i extra niveaer?
-
-
-
-
-Test kaniner (indexed search / caching?):
-       - 3DS
-       - TYPO3.org copy
-       - Metropol
-       - FI
-       - Link Factory
-       - Brunata
-
-**************
-getLL with XML-support?
-
+       - Tabelvælger som en del af sektionsvælgeren i frontend
+       - Record-indexing: support languageField in records
+       Config i backend through flexforms:
+               - baseUrl for external files?
+               - language setting for files and external URLs?
 
 CLI:
 - Removal of old indexes
@@ -57,12 +43,27 @@ CLI:
                - For files: read files from directories, compare mtime with records;
                - For URLs: Forced
                - For records: read records
-                       - All new files are indexed, all old are removed, all changed are re-indexed
+                       - All new entries are indexed, all old are removed, all changed are re-indexed
 
+Backend modules:
+- Much nicer detail display
+- Proper skinning? / getLL? / XHTML
+- The Tools>Indexing module could need some shining up and more useful features (Someone else does this?)
 
-***************
-TODO / projects:
-*****************
+Ideas:
+- (Jan Slusarczyk <janslu@grupaiis.pl>, 26/11 2004): Searchterms matching exact keywords on pages shows a special result/shortcut on top of result page?
+- Implement that extended chars are translated: ü => u, ç => c, etc. Thus "Français" will be found when "Francais" is searched for.
+
+Hook development:
+- Example of search-SQL hook
+
+Testing indexing crawler for:
+       - 3DS
+       - TYPO3.org copy
+       - Metropol
+       - FI
+       - Link Factory
+       - Brunata
 
 Documentation:
 - Configuration possibilities (piVars, TypoScript, Hooks etc)
@@ -71,15 +72,3 @@ Documentation:
        - utf-8 internally.
        - Updates on tables structure
 
-Statistics module:
-- Someone write a statistics display module for the search operation! (Displaying content from index_stat_search and index_stat_word)
-       Olivier Dobberkau / dkd is on this.
-
-Various:
-- The Tools>Indexing module could need some shining up and more useful features (Someone else does this?)
-
-Templating / Display in plugin:
-- Templating
-       - with new Template API?
-       - Still need to put a group together.
-
index ea84c6a..6b0b4eb 100755 (executable)
@@ -24,6 +24,7 @@ if (TYPO3_MODE=="BE") {
 
 t3lib_extMgm::allowTableOnStandardPages('index_config');
 
+/*
 $TCA['index_config'] = Array (
     'ctrl' => Array (
         'title' => 'LLL:EXT:indexed_search/locallang_db.php:index_config',
@@ -44,5 +45,5 @@ $TCA['index_config'] = Array (
         'fe_admin_fieldList' => 'hidden, starttime, title, description, type, depth, table2index, alternative_source_pid, get_params, chashcalc, filepath, extensions',
     )
 );
-
+*/
 ?>
\ No newline at end of file
index f6e430f..474c83a 100755 (executable)
@@ -57,10 +57,11 @@ plugin.tx_indexedsearch {
   }
 */
   tableParams {
-    secHead = border=0 cellpadding=0 cellspacing=0 width="100%"
-    searchBox =  border=0 cellpadding=0 cellspacing=0
-       searchRes =  border=0 cellpadding=0 cellspacing=0 width="100%"
+    secHead = border="0" cellpadding="0" cellspacing="0" width="100%"
+    searchBox =  border="0" cellpadding="0" cellspacing="0"
+       searchRes =  border="0" cellpadding="0" cellspacing="0" width="100%"
   }
+  forwardSearchWordsInResultLink = 0
 
    # Setting default values for piVars (please see the source code for the form-field names which you can preset values for here)
   _DEFAULT_PI_VARS.extResume=1
index af8ad8d..3296ef7 100755 (executable)
@@ -135,7 +135,7 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                                0 => 'Overview',
                                1 => 'Technical Details',
                                2 => 'Words and content',
-                               3 => 'Indexing'
+//                             3 => 'Indexing'
                        )
                );
     }
@@ -405,7 +405,7 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                        $arr = unserialize($row['cHashParams']);
                        $page = $arr['key'] ? ' ['.$arr['key'].']' : '';
                } else $page = '';
-               $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd($row['item_title'], 20).$page) : '<em>[No Title]</em>',$row['phash']);
+               $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($row['item_title']), 20).$page) : '<em>[No Title]</em>',$row['phash']);
                $cmdLinks = $this->printRemoveIndexed($row['phash'],'Clear phash-row').$this->printReindex($row,'Re-index element');
 
                switch($this->pObj->MOD_SETTINGS['type'])       {
@@ -482,7 +482,7 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                                                        'phash = '.intval($row['phash'])
                                                );
                                $lines[] = '<td style="white-space: normal;">'.
-                                                       t3lib_div::fixed_lgd($this->utf8_to_currentCharset(htmlspecialchars($ftrows[0]['fulltextdata'])),3000).
+                                                       htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($ftrows[0]['fulltextdata']),3000)).
                                                        '<hr/><em>Size: '.strlen($ftrows[0]['fulltextdata']).'</em>'.
                                                        '</td>';
 
@@ -502,7 +502,7 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                                if (is_array($ftrows))  {
                                        $indexed_words = array_keys($ftrows);
                                        sort($indexed_words);
-                                       $wordList = $this->utf8_to_currentCharset(htmlspecialchars(implode(' ',$indexed_words)));
+                                       $wordList = htmlspecialchars($this->utf8_to_currentCharset(implode(' ',$indexed_words)));
                                        $wordList.='<hr/><em>Count: '.count($indexed_words).'</em>';
                                }
 
@@ -522,7 +522,7 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                                        // Remove-indexing-link:
                                $lines[] = '<td>'.$cmdLinks.'</td>';
 
-                               $lines[] = '<td style="white-space: normal;">'.$this->utf8_to_currentCharset(htmlspecialchars($row['item_description'])).'...</td>';
+                               $lines[] = '<td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset($row['item_description'])).'...</td>';
                                $lines[] = '<td>'.t3lib_div::formatSize($row['item_size']).'</td>';
                                $lines[] = '<td>'.t3lib_BEfunc::dateTimeAge($row['tstamp']).'</td>';
                        break;
@@ -694,24 +694,26 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                                                'index_words.baseword, index_words.metaphone, index_rel.*',
                                                'index_rel, index_words',
                                                'index_rel.phash = '.intval($phash).
-                                                       ' AND index_words.wid = index_rel.wid',
+                                                       ' AND index_words.wid = index_rel.wid
+                                                        AND index_words.is_stopword=0',
                                                '',
                                                'index_rel.freq DESC',
                                                '20'
                                        );
-                       $content.= $this->listWords($ftrows, 'Top-20 words by frequency:');
+                       $content.= $this->listWords($ftrows, 'Top-20 words by frequency:', 2);
 
                                // Finding top-20 on count for this phash:
                        $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
                                                'index_words.baseword, index_words.metaphone, index_rel.*',
                                                'index_rel, index_words',
                                                'index_rel.phash = '.intval($phash).
-                                                       ' AND index_words.wid = index_rel.wid',
+                                                       ' AND index_words.wid = index_rel.wid
+                                                        AND index_words.is_stopword=0',
                                                '',
                                                'index_rel.count DESC',
                                                '20'
                                        );
-                       $content.= $this->listWords($ftrows, 'Top-20 words by count:');
+                       $content.= $this->listWords($ftrows, 'Top-20 words by count:', 2);
 
 
                        $content.='<h3>Section records for this phash</h3>';
@@ -763,10 +765,11 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                        </tr>
                ';
                foreach($ftrows as $row)        {
+                       $hiddenField = $stopWordBoxes!=2 ? '<input type="hidden" name="stopWord['.$row['wid'].']" value="0" />' : '';
                        $trows.= '
                                <tr class="'.($row['is_stopword'] ? 'bgColor' : 'bgColor4').'">
-                                       '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'><input type="hidden" name="stopWord['.$row['wid'].']" value="0" /><input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').'
-                                       <td>'.$this->linkWordDetails($this->utf8_to_currentCharset(htmlspecialchars($row['baseword'])), $row['wid']).'</td>
+                                       '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'>'.$hiddenField.'<input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').'
+                                       <td>'.$this->linkWordDetails(htmlspecialchars($this->utf8_to_currentCharset($row['baseword'])), $row['wid']).'</td>
                                        <td>'.htmlspecialchars($row['count']).'</td>
                                        <td>'.htmlspecialchars($row['first']).'</td>
                                        <td>'.htmlspecialchars($row['freq']).'</td>
@@ -811,7 +814,7 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                                                <td>'.$this->linkMetaPhoneDetails($this->indexerObj->metaphone($words[0],1),$metaphone).'</td>
                                                <td>'.htmlspecialchars($metaphone).'</td>
                                                <td>'.htmlspecialchars(count($words)).'</td>
-                                               <td style="white-space: normal;">'.$this->utf8_to_currentCharset(htmlspecialchars(implode(', ',$words))).'</td>
+                                               <td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset(implode(', ',$words))).'</td>
                                        </tr>
                                ';
                        }
@@ -1149,7 +1152,6 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
         */
        function utf8_to_currentCharset($string)        {
                global $LANG;
-
                if ($LANG->charSet != 'utf-8')  {
                        $string = $LANG->csConvObj->utf8_decode($string, $LANG->charSet, TRUE);
                }
index 7c51dfd..f261476 100755 (executable)
@@ -1459,7 +1459,17 @@ class tx_indexedsearch extends tslib_pibase {
                                $title = $this->linkPage($row['page_id'],htmlspecialchars($this->makeTitle($row)),$copy_row);
                        }
                } else {        // Else the page:
-                       $title = $this->linkPage($row['data_page_id'],htmlspecialchars($this->makeTitle($row)),$row);
+
+                               // Prepare search words for markup in content:
+                       if ($this->conf['forwardSearchWordsInResultLink'])      {
+                               $markUpSwParams = array('no_cache' => 1);
+                               foreach($this->sWArr as $d)     {
+                                       $markUpSwParams['sword_list'][] = $d['sword'];
+                               }
+                       } else {
+                               $markUpSwParams = array();
+                       }
+                       $title = $this->linkPage($row['data_page_id'],htmlspecialchars($this->makeTitle($row)),$row,$markUpSwParams);
                }
 
                $tmplContent = array();
@@ -1802,7 +1812,11 @@ class tx_indexedsearch extends tslib_pibase {
                        $tmplArray['path'] = '<a href="'.htmlspecialchars($row['data_filename']).'">'.htmlspecialchars($row['data_filename']).'</a>';
                } else {
                        $pathStr = htmlspecialchars($this->getPathFromPageId($pathId,$pathMP));
-                       $tmplArray['path'] = $this->linkPage($pathId,htmlspecialchars($pathStr),array('data_page_mp'=>$pathMP));
+                       $tmplArray['path'] = $this->linkPage($pathId,htmlspecialchars($pathStr),array(
+                               'data_page_type' => $row['data_page_type'],
+                               'data_page_mp' => $pathMP,
+                               'sys_language_uid' => $row['sys_language_uid'],
+                       ));
                }
 
                return $tmplArray;
@@ -1889,14 +1903,18 @@ class tx_indexedsearch extends tslib_pibase {
         * @param       array           Result row
         * @return      string          <A> tag wrapped title string.
         */
-       function linkPage($id,$str,$row=array())        {
+       function linkPage($id,$str,$row=array(),$markUpSwParams=array())        {
 
                        // Parameters for link:
-               $urlParameters = unserialize($row['cHashParams']);
+               $urlParameters = (array)unserialize($row['cHashParams']);
 
                        // Add &type and &MP variable:
                if ($row['data_page_type']) $urlParameters['type'] = $row['data_page_type'];
                if ($row['data_page_mp']) $urlParameters['MP'] = $row['data_page_mp'];
+               if ($row['sys_language_uid']) $urlParameters['L'] = $row['sys_language_uid'];
+
+                       // markup-GET vars:
+               $urlParameters = array_merge($urlParameters, $markUpSwParams);
 
                        // This will make sure that the path is retrieved if it hasn't been already. Used only for the sake of the domain_record thing...
                if (!is_array($this->domain_records[$id]))      {
@@ -1912,10 +1930,7 @@ class tx_indexedsearch extends tslib_pibase {
                        $addParams = '';
                        if (is_array($urlParameters))   {
                                if (count($urlParameters))      {
-                                       reset($urlParameters);
-                                       while(list($k,$v)=each($urlParameters)) {
-                                               $addParams.= '&'.$k.'='.rawurlencode($v);
-                                       }
+                                       $addParams.= t3lib_div::implodeArrayForUrl('',$urlParameters);
                                }
                        }