[FEATURE] Indexed Search: add mySQL fulltext support
authorBenjamin Mack <benni@typo3.org>
Thu, 17 Nov 2011 15:22:26 +0000 (16:22 +0100)
committerSteffen Ritter <info@rs-websystems.de>
Thu, 17 Nov 2011 15:52:32 +0000 (16:52 +0100)
Adds some minor modifications for restructuring in the pi code,
and some hooks in order to allow for other search engines to be
used.
Additionally, a new extension "indexed_search_mysql" is added
to the core that allows the mysql fulltext search with it.
Thanks to Michael Stucki for all the work!

Change-Id: I7e739baa6d35b6678d008f19737f86c3bdaa6dd5
Resolves: #28613
Reviewed-on: http://review.typo3.org/6657
Reviewed-by: Steffen Ritter
Tested-by: Steffen Ritter
19 files changed:
typo3/sysext/indexed_search/class.doublemetaphone.php
typo3/sysext/indexed_search/class.indexer.php
typo3/sysext/indexed_search/class.tx_indexedsearch_util.php [new file with mode: 0644]
typo3/sysext/indexed_search/doc/README.txt
typo3/sysext/indexed_search/ext_autoload.php [new file with mode: 0644]
typo3/sysext/indexed_search/ext_conf_template.txt
typo3/sysext/indexed_search/ext_emconf.php
typo3/sysext/indexed_search/ext_localconf.php
typo3/sysext/indexed_search/ext_tables.php
typo3/sysext/indexed_search/ext_tables.sql
typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php
typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php
typo3/sysext/indexed_search/pi/considerations.txt
typo3/sysext/indexed_search_mysql/ChangeLog [new file with mode: 0644]
typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php [new file with mode: 0644]
typo3/sysext/indexed_search_mysql/ext_emconf.php [new file with mode: 0644]
typo3/sysext/indexed_search_mysql/ext_icon.gif [new file with mode: 0755]
typo3/sysext/indexed_search_mysql/ext_localconf.php [new file with mode: 0644]
typo3/sysext/indexed_search_mysql/ext_tables.sql [new file with mode: 0644]

index f38ad07..c5c8dea 100755 (executable)
@@ -36,9 +36,9 @@
 
 
 // TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
-// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you simply configure TYPO3 so by setting the line below in your localconf.php file:
-// TYPO3:                      $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
-// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example.
+// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you can enable it in the extension configuration
+// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php)
+
 
 class user_DoubleMetaPhone
 {
index 95e950a..34e0a17 100755 (executable)
@@ -90,6 +90,10 @@ class tx_indexedsearch_indexer {
        var $freqRange = 32000;
        var $freqMax = 0.1;
 
+       var $enableMetaphoneSearch = FALSE;
+       var $storeMetaphoneInfoAsWords;
+       var $metaphoneContent = '';
+
                // Objects:
        /**
         * Charset class object
@@ -112,7 +116,7 @@ class tx_indexedsearch_indexer {
         */
        var $lexerObj;
 
-
+       var $flagBitMask;
 
        /**
         * Parent Object (TSFE) Initialization
@@ -351,6 +355,11 @@ class tx_indexedsearch_indexer {
                $this->maxExternalFiles = t3lib_utility_Math::forceIntegerInRange($this->indexerConfig['maxExternalFiles'],0,1000,5);
                $this->flagBitMask = t3lib_utility_Math::forceIntegerInRange($this->indexerConfig['flagBitMask'],0,255);
 
+                       // Workaround: If the extension configuration was not updated yet, the value is not existing
+               $this->enableMetaphoneSearch = isset($this->indexerConfig['enableMetaphoneSearch']) ? ($this->indexerConfig['enableMetaphoneSearch'] ? TRUE : FALSE) : TRUE;
+
+               $this->storeMetaphoneInfoAsWords = tx_indexedsearch_util::isTableUsed('index_words') ? FALSE : ($this->enableMetaphoneSearch ? TRUE : FALSE);
+
                        // Initialize external document parsers:
                        // Example configuration, see ext_localconf.php of this file!
                if ($this->conf['index_externals'])     {
@@ -367,7 +376,8 @@ class tx_indexedsearch_indexer {
 
                        // Initialize metaphone hook:
                        // Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
-               if ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) {
+                       // Make sure that the hook is loaded _after_ indexed_search as this may overwrite the hook depending on the configuration.
+               if ($this->enableMetaphoneSearch && $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) {
                        $this->metaphoneObj = t3lib_div::getUserObj($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']);
                        $this->metaphoneObj->pObj = $this;
                }
@@ -449,7 +459,7 @@ class tx_indexedsearch_indexer {
                        $this->log_pull();
 
                                // Calculating a hash over what is to be the actual page content. Maybe this hash should not include title,description and keywords? The bodytext is the primary concern. (on the other hand a changed page-title would make no difference then, so dont!)
-                       $this->content_md5h = $this->md5inthash(implode($this->contentParts,''));
+                       $this->content_md5h = tx_indexedsearch_util::md5inthash(implode('', $this->contentParts));
 
                                // This function checks if there is already a page (with gr_list = 0,-1) indexed and if that page has the very same contentHash.
                                // If the contentHash is the same, then we can rest assured that this page is already indexed and regardless of mtime and origContent we don't need to do anything more.
@@ -479,8 +489,10 @@ class tx_indexedsearch_indexer {
 
                                                // Check words and submit to word list if not there
                                $this->log_push('Check word list and submit words','');
-                                       $this->checkWordList($indexArr);
-                                       $this->submitWords($indexArr,$this->hash['phash']);
+                                       if (tx_indexedsearch_util::isTableUsed('index_words')) {
+                                               $this->checkWordList($indexArr);
+                                               $this->submitWords($indexArr, $this->hash['phash']);
+                                       }
                                $this->log_pull();
 
                                                // Set parsetime
@@ -524,9 +536,14 @@ class tx_indexedsearch_indexer {
                $contentArr['title'] = trim(isset($titleParts[1]) ? $titleParts[1] : $titleParts[0]);
 
                        // get keywords and description metatags
-               if($this->conf['index_metatags']) {
-                       for($i=0;$this->embracingTags($headPart,'meta',$dummy,$headPart,$meta[$i]);$i++) { /*nothing*/ }
-                       for($i=0;isset($meta[$i]);$i++) {
+               if ($this->conf['index_metatags']) {
+                       $meta = array();
+                       $i = 0;
+                       while ($this->embracingTags($headPart,'meta',$dummy,$headPart, $meta[$i])) {
+                               $i++;
+                       }
+                               // TODO The code below stops at first unset tag. Is that correct?
+                       for ($i = 0; isset($meta[$i]); $i++) {
                                $meta[$i] = t3lib_div::get_tag_attributes($meta[$i]);
                                if (stristr($meta[$i]['name'], 'keywords')) {
                                        $contentArr['keywords'] .= ',' . $this->addSpacesToKeywordList($meta[$i]['content']);
@@ -1079,7 +1096,7 @@ class tx_indexedsearch_indexer {
 
                                                        if (is_array($contentParts))    {
                                                                        // Calculating a hash over what is to be the actual content. (see indexTypo3PageContent())
-                                                               $content_md5h = $this->md5inthash(implode($contentParts,''));
+                                                               $content_md5h = tx_indexedsearch_util::md5inthash(implode($contentParts,''));
 
                                                                if ($this->checkExternalDocContentHash($phash_arr['phash_grouping'], $content_md5h) || $force)  {
 
@@ -1105,8 +1122,10 @@ class tx_indexedsearch_indexer {
 
                                                                                // Check words and submit to word list if not there
                                                                        $this->log_push('Check word list and submit words','');
-                                                                               $this->checkWordList($indexArr);
-                                                                               $this->submitWords($indexArr,$phash_arr['phash']);
+                                                                               if (tx_indexedsearch_util::isTableUsed('index_words')) {
+                                                                                       $this->checkWordList($indexArr);
+                                                                                       $this->submitWords($indexArr, $phash_arr['phash']);
+                                                                               }
                                                                        $this->log_pull();
 
                                                                                // Set parsetime
@@ -1137,14 +1156,15 @@ class tx_indexedsearch_indexer {
         * @param       string          Pointer to section (zero for all other than PDF which will have an indication of pages into which the document should be splitted.)
         * @return      array           Standard content array (title, description, keywords, body keys)
         */
-       function readFileContent($ext,$absFile,$cPKey)  {
+       function readFileContent($fileExtension, $absoluteFileName, $sectionPointer) {
+               $contentArray = null;
 
                        // Consult relevant external document parser:
-               if (is_object($this->external_parsers[$ext]))   {
-                       $contentArr = $this->external_parsers[$ext]->readFileContent($ext,$absFile,$cPKey);
+               if (is_object($this->external_parsers[$fileExtension])) {
+                       $contentArray = $this->external_parsers[$fileExtension]->readFileContent($fileExtension, $absoluteFileName, $sectionPointer);
                }
 
-               return $contentArr;
+               return $contentArray;
        }
 
        /**
@@ -1280,7 +1300,7 @@ class tx_indexedsearch_indexer {
                $this->analyzeHeaderinfo($indexArr,$content,'description',5);
                $this->analyzeBody($indexArr,$content);
 
-               return ($indexArr);
+               return $indexArr;
        }
 
        /**
@@ -1294,11 +1314,29 @@ class tx_indexedsearch_indexer {
         */
        function analyzeHeaderinfo(&$retArr,$content,$key,$offset) {
                foreach ($content[$key] as $val) {
-                       $val = substr($val,0,60);       // Max 60 - because the baseword varchar IS 60. This MUST be the same.
+                       $val = substr($val, 0, 60);     // Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same.
+
+                       if (!isset($retArr[$val])) {
+                                       // Word ID (wid)
+                               $retArr[$val]['hash'] = tx_indexedsearch_util::md5inthash($val);
+
+                                       // Metaphone value is also 60 only chars long
+                               $metaphone = $this->enableMetaphoneSearch
+                                               ? substr($this->metaphone($val, $this->storeMetaphoneInfoAsWords), 0, 60)
+                                               : '';
+                               $retArr[$val]['metaphone'] = $metaphone;
+                       }
+
+                               // Build metaphone fulltext string (can be used for fulltext indexing)
+                       if ($this->storeMetaphoneInfoAsWords) {
+                               $this->metaphoneContent .= ' ' . $retArr[$val]['metaphone'];
+                       }
+
+                               // Priority used for flagBitMask feature (see extension configuration)
                        $retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset);
-                       $retArr[$val]['count'] = $retArr[$val]['count']+1;
-                       $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
-                       $retArr[$val]['metaphone'] = $this->metaphone($val);
+
+                               // Increase number of occurences
+                       $retArr[$val]['count']++;
                        $this->wordcount++;
                }
        }
@@ -1311,14 +1349,30 @@ class tx_indexedsearch_indexer {
         * @return      void
         */
        function analyzeBody(&$retArr,$content) {
-               foreach($content['body'] as $key => $val)       {
-                       $val = substr($val,0,60);       // Max 60 - because the baseword varchar IS 60. This MUST be the same.
-                       if(!isset($retArr[$val])) {
+               foreach ($content['body'] as $key => $val) {
+                       $val = substr($val, 0, 60);     // Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same.
+
+                       if (!isset($retArr[$val])) {
+                                       // First occurence (used for ranking results)
                                $retArr[$val]['first'] = $key;
-                               $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
-                               $retArr[$val]['metaphone'] = $this->metaphone($val);
+
+                                       // Word ID (wid)
+                               $retArr[$val]['hash'] = tx_indexedsearch_util::md5inthash($val);
+
+                                       // Metaphone value is also only 60 chars long
+                               $metaphone = $this->enableMetaphoneSearch
+                                               ? substr($this->metaphone($val, $this->storeMetaphoneInfoAsWords), 0, 60)
+                                               : '';
+                               $retArr[$val]['metaphone'] = $metaphone;
+                       }
+
+                               // Build metaphone fulltext string (can be used for fulltext indexing)
+                       if ($this->storeMetaphoneInfoAsWords) {
+                               $this->metaphoneContent .= ' ' . $retArr[$val]['metaphone'];
                        }
-                       $retArr[$val]['count'] = $retArr[$val]['count']+1;
+
+                               // Increase number of occurences
+                       $retArr[$val]['count']++;
                        $this->wordcount++;
                }
        }
@@ -1330,24 +1384,25 @@ class tx_indexedsearch_indexer {
         * @param       boolean         If set, returns the raw metaphone value (not hashed)
         * @return      mixed           Metaphone hash integer (or raw value, string)
         */
-       function metaphone($word,$retRaw=FALSE) {
+       function metaphone($word, $returnRawMetaphoneValue=FALSE) {
 
                if (is_object($this->metaphoneObj))     {
-                       $tmp = $this->metaphoneObj->metaphone($word, $this->conf['sys_language_uid']);
+                       $metaphoneRawValue = $this->metaphoneObj->metaphone($word, $this->conf['sys_language_uid']);
                } else {
-                       $tmp = metaphone($word);
+                               // Use native PHP function instead of advanced doubleMetaphone class
+                       $metaphoneRawValue = metaphone($word);
                }
 
-                       // Return raw value?
-               if ($retRaw)    return $tmp;
-
-                       // Otherwise create hash and return integer
-               if ($tmp == '') {
-                       $ret = 0;
+               if ($returnRawMetaphoneValue) {
+                       $result = $metaphoneRawValue;
+               } elseif (strlen($metaphoneRawValue)) {
+                               // Create hash and return integer
+                       $result = tx_indexedsearch_util::md5inthash($metaphoneRawValue);
                } else {
-                       $ret = hexdec(substr(md5($tmp), 0, 7));
+                       $result = 0;
                }
-               return $ret;
+
+               return $result;
        }
 
 
@@ -1407,7 +1462,9 @@ class tx_indexedsearch_indexer {
                        'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
                );
 
-               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
+               if (tx_indexedsearch_util::isTableUsed('index_phash')) {
+                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
+               }
 
                        // PROCESSING index_section
                $this->submit_section($this->hash['phash'],$this->hash['phash']);
@@ -1418,12 +1475,15 @@ class tx_indexedsearch_indexer {
                        // PROCESSING index_fulltext
                $fields = array(
                        'phash' => $this->hash['phash'],
-                       'fulltextdata' => implode(' ', $this->contentParts)
+                       'fulltextdata' => implode(' ', $this->contentParts),
+                       'metaphonedata' => $this->metaphoneContent
                );
                if ($this->indexerConfig['fullTextDataLength']>0)       {
                        $fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']);
                }
-               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
+               if (tx_indexedsearch_util::isTableUsed('index_fulltext')) {
+                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
+               }
 
                        // PROCESSING index_debug
                if ($this->indexerConfig['debugMode'])  {
@@ -1438,7 +1498,9 @@ class tx_indexedsearch_indexer {
                                                'lexer' => $this->lexerObj->debugString,
                                        ))
                        );
-                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
+                       if (tx_indexedsearch_util::isTableUsed('index_debug')) {
+                               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
+                       }
                }
        }
 
@@ -1456,10 +1518,12 @@ class tx_indexedsearch_indexer {
                $fields = array(
                        'phash' => $hash,
                        'phash_x' => $phash_x,
-                       'hash_gr_list' => $this->md5inthash($this->conf['gr_list']),
+                       'hash_gr_list' => tx_indexedsearch_util::md5inthash($this->conf['gr_list']),
                        'gr_list' => $this->conf['gr_list']
                );
-               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields);
+               if (tx_indexedsearch_util::isTableUsed('index_grlist')) {
+                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields);
+               }
        }
 
        /**
@@ -1479,7 +1543,9 @@ class tx_indexedsearch_indexer {
 
                $this->getRootLineFields($fields);
 
-               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields);
+               if (tx_indexedsearch_util::isTableUsed('index_section')) {
+                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields);
+               }
        }
 
        /**
@@ -1490,12 +1556,16 @@ class tx_indexedsearch_indexer {
         */
        function removeOldIndexedPages($phash)  {
                        // Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here.
-               $tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug');
-               foreach($tableArr as $table)    {
-                       $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
+               $tableArray = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug');
+               foreach ($tableArray as $table) {
+                       if (tx_indexedsearch_util::isTableUsed($table)) {
+                               $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash=' . intval($phash));
+                       }
                }
                        // Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file).
-               $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash));
+               if (tx_indexedsearch_util::isTableUsed('index_section')) {
+                       $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3=' . intval($phash));
+               }
        }
 
 
@@ -1559,22 +1629,27 @@ class tx_indexedsearch_indexer {
                        'tstamp' => $GLOBALS['EXEC_TIME'],
                        'crdate' => $GLOBALS['EXEC_TIME'],
                        'gr_list' => $this->conf['gr_list'],
-                       'externalUrl' => $fileParts['scheme'] ? 1 : 0,
-                       'recordUid' => intval($this->conf['recordUid']),
-                       'freeIndexUid' => intval($this->conf['freeIndexUid']),
-                       'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
+                       'externalUrl' => $fileParts['scheme'] ? 1 : 0,
+                       'recordUid' => intval($this->conf['recordUid']),
+                       'freeIndexUid' => intval($this->conf['freeIndexUid']),
+                       'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
                );
-               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
+               if (tx_indexedsearch_util::isTableUsed('index_phash')) {
+                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
+               }
 
                        // PROCESSING index_fulltext
                $fields = array(
                        'phash' => $hash['phash'],
-                       'fulltextdata' => implode(' ', $contentParts)
+                       'fulltextdata' => implode(' ', $contentParts),
+                       'metaphonedata' => $this->metaphoneContent
                );
                if ($this->indexerConfig['fullTextDataLength']>0)       {
                        $fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']);
                }
-               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
+               if (tx_indexedsearch_util::isTableUsed('index_fulltext')) {
+                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
+               }
 
                        // PROCESSING index_debug
                if ($this->indexerConfig['debugMode'])  {
@@ -1587,7 +1662,9 @@ class tx_indexedsearch_indexer {
                                                'lexer' => $this->lexerObj->debugString,
                                        ))
                        );
-                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
+                       if (tx_indexedsearch_util::isTableUsed('index_debug')) {
+                               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
+                       }
                }
        }
 
@@ -1599,15 +1676,11 @@ class tx_indexedsearch_indexer {
         */
        function submitFile_grlist($hash)       {
                        // Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one.
-               $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows(
-                       'phash',
-                       'index_grlist',
-                       'phash=' . intval($hash) .
-                               ' AND (hash_gr_list=' . $this->md5inthash($this->defaultGrList) .
-                               ' OR hash_gr_list=' . $this->md5inthash($this->conf['gr_list']) . ')'
-               );
-               if (!$count) {
-                       $this->submit_grlist($hash,$hash);
+               if (tx_indexedsearch_util::isTableUsed('index_grlist')) {
+                       $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('phash', 'index_grlist', 'phash=' . intval($hash) . ' AND (hash_gr_list=' . tx_indexedsearch_util::md5inthash($this->defaultGrList) . ' OR hash_gr_list=' . tx_indexedsearch_util::md5inthash($this->conf['gr_list']) . ')');
+                       if ($count == 0) {
+                               $this->submit_grlist($hash, $hash);
+                       }
                }
        }
 
@@ -1617,11 +1690,13 @@ class tx_indexedsearch_indexer {
         * @param       integer         phash value of file
         * @return      void
         */
-       function submitFile_section($hash)      {
-                       // Testing if there is a section
-               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id']));
-               if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res))  {
-                       $this->submit_section($hash,$this->hash['phash']);
+       function submitFile_section($hash) {
+                       // Testing if there is already a section
+               if (tx_indexedsearch_util::isTableUsed('index_section')) {
+                       $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('phash', 'index_section', 'phash=' . intval($hash) . ' AND page_id=' . intval($this->conf['id']));
+                       if ($count == 0) {
+                               $this->submit_section($hash,$this->hash['phash']);
+                       }
                }
        }
 
@@ -1632,11 +1707,12 @@ class tx_indexedsearch_indexer {
         * @return      void
         */
        function removeOldIndexedFiles($phash)  {
-
                        // Removing old registrations for tables.
-               $tableArr = explode(',','index_phash,index_grlist,index_fulltext,index_debug');
-               foreach($tableArr as $table)    {
-                       $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
+               $tableArray = explode(',','index_phash,index_grlist,index_fulltext,index_debug');
+               foreach ($tableArray as $table) {
+                       if (tx_indexedsearch_util::isTableUsed($table)) {
+                               $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash=' . intval($phash));
+                       }
                }
        }
 
@@ -1668,34 +1744,53 @@ class tx_indexedsearch_indexer {
         * @return      integer         Result integer: Generally: <0 = No indexing, >0 = Do indexing (see $this->reasons): -2) Min age was NOT exceeded and so indexing cannot occur.  -1) mtime matched so no need to reindex page. 0) N/A   1) Max age exceeded, page must be indexed again.   2) mtime of indexed page doesn't match mtime given for current content and we must index page.  3) No mtime was set, so we will index...  4) No indexed page found, so of course we will index.
         */
        function checkMtimeTstamp($mtime,$phash)        {
-
-                       // Select indexed page:
-               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));
-               $out = 0;
+               if (!tx_indexedsearch_util::isTableUsed('index_phash')) {
+                               // Not indexed (not in index_phash)
+                       $result = 4;
+               }
+               else {
+                       $row = $GLOBALS['TYPO3_DB']->exec_SELECTgetSingleRow('item_mtime,tstamp', 'index_phash', 'phash=' . intval($phash));
 
                        // If there was an indexing of the page...:
-               if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
-                       if ($this->tstamp_maxAge && ($row['tstamp'] + $this->tstamp_maxAge) < $GLOBALS['EXEC_TIME']) {  // If max age is exceeded, index the page
-                               $out = 1;               // The configured max-age was exceeded for the document and thus it's indexed.
-                       } else {
-                               if (!$this->tstamp_minAge || ($row['tstamp'] + $this->tstamp_minAge) < $GLOBALS['EXEC_TIME']) { // if minAge is not set or if minAge is exceeded, consider at mtime
-                                       if ($mtime)     {               // It mtime is set, then it's tested. If not, the page must clearly be indexed.
-                                               if ($row['item_mtime'] != $mtime)       {       // And if mtime is different from the index_phash mtime, it's about time to re-index.
-                                                       $out = 2;               // The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.
-                                               } else {
-                                                       $out = -1;              // mtime matched the document, so no changes detected and no content updated
-                                                       if ($this->tstamp_maxAge)       {
-                                                               $this->log_setTSlogMessage('mtime matched, timestamp NOT updated because a maxAge is set (' . ($row['tstamp'] + $this->tstamp_maxAge - $GLOBALS['EXEC_TIME']) . ' seconds to expire time).', 1);
+                       if ($row) {
+                               if ($this->tstamp_maxAge && ($row['tstamp'] + $this->tstamp_maxAge) < $GLOBALS['EXEC_TIME']) {
+                                               // If max age is exceeded, index the page
+                                               // The configured max-age was exceeded for the document and thus it's indexed.
+                                       $result = 1;
+                               } else {
+                                       if (!$this->tstamp_minAge || ($row['tstamp'] + $this->tstamp_minAge) < $GLOBALS['EXEC_TIME']) {
+                                                       // if minAge is not set or if minAge is exceeded, consider at mtime
+                                               if ($mtime)     {
+                                                               // It mtime is set, then it's tested. If not, the page must clearly be indexed.
+                                                       if ($row['item_mtime'] != $mtime) {
+                                                                       // And if mtime is different from the index_phash mtime, it's about time to re-index.
+                                                                       // The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.
+                                                               $result = 2;
                                                        } else {
-                                                               $this->updateTstamp($phash);    // Update the timestatmp
-                                                               $this->log_setTSlogMessage('mtime matched, timestamp updated.',1);
+                                                                       // mtime matched the document, so no changes detected and no content updated
+                                                               $result = -1;
+                                                               if ($this->tstamp_maxAge)       {
+                                                                       $this->log_setTSlogMessage('mtime matched, timestamp NOT updated because a maxAge is set (' . ($row['tstamp'] + $this->tstamp_maxAge - $GLOBALS['EXEC_TIME']) . ' seconds to expire time).', 1);
+                                                               } else {
+                                                                       $this->updateTstamp($phash);
+                                                                       $this->log_setTSlogMessage('mtime matched, timestamp updated.',1);
+                                                               }
                                                        }
+                                               } else {
+                                                               // The minimum age was exceed, but mtime was not set, so the page was indexed.
+                                                       $result = 3;
                                                }
-                                       } else {$out = 3;       }       // The minimum age was exceed, but mtime was not set, so the page was indexed.
-                               } else {$out = -2;}                     // The minimum age was not exceeded
+                                       } else {
+                                                       // The minimum age was not exceeded
+                                               $result = -2;
+                                       }
+                               }
+                       } else {
+                                       // Page has never been indexed (is not represented in the index_phash table).
+                               $result = 4;
                        }
-               } else {$out = 4;}      // Page has never been indexed (is not represented in the index_phash table).
-               return $out;
+               }
+               return $result;
        }
 
        /**
@@ -1705,11 +1800,15 @@ class tx_indexedsearch_indexer {
         */
        function checkContentHash()     {
                        // With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page.
-               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h));
-               if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
-                       return $row;
+               $result = TRUE;
+               if (tx_indexedsearch_util::isTableUsed('index_phash')) {
+                       $row = $GLOBALS['TYPO3_DB']->exec_SELECTgetSingleRow('phash', 'index_phash', 'phash_grouping='.intval($this->hash['phash_grouping']).' AND contentHash='.intval($this->content_md5h));
+                       if ($row) {
+                               $result = $row;
+                       }
                }
-               return 1;
+
+               return $result;
        }
 
        /**
@@ -1721,25 +1820,28 @@ class tx_indexedsearch_indexer {
         * @return      boolean         Returns TRUE if the document needs to be indexed (that is, there was no result)
         */
        function checkExternalDocContentHash($hashGr,$content_md5h)     {
-               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h));
-               if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
-                       return 0;
+               $result = TRUE;
+               if (tx_indexedsearch_util::isTableUsed('index_phash')) {
+                       $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('*', 'index_phash', 'phash_grouping=' . intval($hashGr) . ' AND contentHash=' . intval($content_md5h));
+                       $result = ($count == 0);
                }
-               return 1;
+
+               return $result;
        }
 
        /**
         * Checks if a grlist record has been set for the phash value input (looking at the "real" phash of the current content, not the linked-to phash of the common search result page)
         *
         * @param       integer         Phash integer to test.
-        * @return      void
+        * @return      boolean
         */
        function is_grlist_set($phash_x)        {
-               return $GLOBALS['TYPO3_DB']->exec_SELECTcountRows(
-                       'phash_x',
-                       'index_grlist',
-                       'phash_x=' . intval($phash_x)
-               );
+               $result = FALSE;
+               if (tx_indexedsearch_util::isTableUsed('index_grlist')) {
+                       $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('phash_x', 'index_grlist', 'phash_x=' . intval($phash_x));
+                       $result = ($count > 0);
+               }
+               return $result;
        }
 
        /**
@@ -1750,11 +1852,13 @@ class tx_indexedsearch_indexer {
         * @return      void
         * @see submit_grlist()
         */
-       function update_grlist($phash,$phash_x) {
-               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list']));
-               if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res))  {
-                       $this->submit_grlist($phash,$phash_x);
-                       $this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1);
+       function update_grlist($phash, $phash_x) {
+               if (tx_indexedsearch_util::isTableUsed('index_grlist')) {
+                       $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('phash', 'index_grlist', 'phash=' . intval($phash) . ' AND hash_gr_list=' . tx_indexedsearch_util::md5inthash($this->conf['gr_list']));
+                       if ($count == 0) {
+                               $this->submit_grlist($phash, $phash_x);
+                               $this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1);
+                       }
                }
        }
 
@@ -1765,13 +1869,16 @@ class tx_indexedsearch_indexer {
         * @param       integer         If set, update the mtime field to this value.
         * @return      void
         */
-       function updateTstamp($phash,$mtime=0)  {
-               $updateFields = array(
-                       'tstamp' => $GLOBALS['EXEC_TIME']
-               );
-               if ($mtime)     { $updateFields['item_mtime'] = intval($mtime); }
-
-               $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
+       function updateTstamp($phash, $mtime = 0) {
+               if (tx_indexedsearch_util::isTableUsed('index_phash')) {
+                       $updateFields = array(
+                               'tstamp' => $GLOBALS['EXEC_TIME']
+                       );
+                       if ($mtime)     {
+                               $updateFields['item_mtime'] = intval($mtime);
+                       }
+                       $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash=' . intval($phash), $updateFields);
+               }
        }
 
        /**
@@ -1780,12 +1887,13 @@ class tx_indexedsearch_indexer {
         * @param       integer         phash value
         * @return      void
         */
-       function updateSetId($phash)    {
-               $updateFields = array(
-                       'freeIndexSetId' => intval($this->conf['freeIndexSetId'])
-               );
-
-               $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
+       function updateSetId($phash) {
+               if (tx_indexedsearch_util::isTableUsed('index_phash')) {
+                       $updateFields = array(
+                               'freeIndexSetId' => intval($this->conf['freeIndexSetId'])
+                       );
+                       $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash=' . intval($phash), $updateFields);
+               }
        }
 
        /**
@@ -1795,12 +1903,13 @@ class tx_indexedsearch_indexer {
         * @param       integer         Parsetime value to set.
         * @return      void
         */
-       function updateParsetime($phash,$parsetime)     {
-               $updateFields = array(
-                       'parsetime' => intval($parsetime)
-               );
-
-               $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
+       function updateParsetime($phash, $parsetime) {
+               if (tx_indexedsearch_util::isTableUsed('index_phash')) {
+                       $updateFields = array(
+                               'parsetime' => intval($parsetime)
+                       );
+                       $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash=' . intval($phash), $updateFields);
+               }
        }
 
        /**
@@ -1808,12 +1917,12 @@ class tx_indexedsearch_indexer {
         *
         * @return      void
         */
-       function updateRootline()       {
-
-               $updateFields = array();
-               $this->getRootLineFields($updateFields);
-
-               $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields);
+       function updateRootline() {
+               if (tx_indexedsearch_util::isTableUsed('index_section')) {
+                       $updateFields = array();
+                       $this->getRootLineFields($updateFields);
+                       $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id=' . intval($this->conf['id']), $updateFields);
+               }
        }
 
        /**
@@ -1823,15 +1932,14 @@ class tx_indexedsearch_indexer {
         * @param       array           Field array, passed by reference
         * @return      void
         */
-       function getRootLineFields(&$fieldArr)  {
-
-               $fieldArr['rl0'] = intval($this->conf['rootline_uids'][0]);
-               $fieldArr['rl1'] = intval($this->conf['rootline_uids'][1]);
-               $fieldArr['rl2'] = intval($this->conf['rootline_uids'][2]);
+       function getRootLineFields(array &$fieldArray) {
+               $fieldArray['rl0'] = intval($this->conf['rootline_uids'][0]);
+               $fieldArray['rl1'] = intval($this->conf['rootline_uids'][1]);
+               $fieldArray['rl2'] = intval($this->conf['rootline_uids'][2]);
 
                if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields']))    {
                        foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel)  {
-                               $fieldArr[$fieldName] = intval($this->conf['rootline_uids'][$rootLineLevel]);
+                               $fieldArray[$fieldName] = intval($this->conf['rootline_uids'][$rootLineLevel]);
                        }
                }
        }
@@ -1843,14 +1951,17 @@ class tx_indexedsearch_indexer {
         * @return      void
         */
        function removeLoginpagesWithContentHash()      {
-               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', '
+               if (tx_indexedsearch_util::isTableUsed('index_phash') && tx_indexedsearch_util::isTableUsed('index_grlist')) {
+                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('A.phash', 'index_phash A,index_grlist B', '
                                        A.phash=B.phash
-                                       AND A.phash_grouping='.intval($this->hash['phash_grouping']).'
-                                       AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).'
+                                       AND A.phash_grouping='.intval($this->hash['phash_grouping']) . '
+                                       AND B.hash_gr_list!='.tx_indexedsearch_util::md5inthash($this->defaultGrList) . '
                                        AND A.contentHash='.intval($this->content_md5h));
-               while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))       {
-                       $this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1);
-                       $this->removeOldIndexedPages($row['phash']);
+                       while ($res && FALSE !== ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))) {
+                               $this->log_setTSlogMessage('The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash=\'' . $row['phash'] . '\' are now removed.', 1);
+                               $this->removeOldIndexedPages($row['phash']);
+                       }
+                       $GLOBALS['TYPO3_DB']->sql_free_result($res);
                }
        }
 
@@ -1860,9 +1971,7 @@ class tx_indexedsearch_indexer {
         * @return      void
         */
        function includeCrawlerClass()  {
-               global $TYPO3_CONF_VARS;
-
-               require_once(t3lib_extMgm::extPath('crawler').'class.tx_crawler_lib.php');
+               t3lib_div::requireOnce(t3lib_extMgm::extPath('crawler') . 'class.tx_crawler_lib.php');
        }
 
 
@@ -1883,34 +1992,38 @@ class tx_indexedsearch_indexer {
        /**
         * Adds new words to db
         *
-        * @param       array           Word List array (where each word has information about position etc).
-        * @return      void
+        * @param array $wordListArray Word List array (where each word has information about position etc).
+        * @return void
         */
-       function checkWordList($wl) {
-               $phashArr = array();
-               foreach ($wl as $key => $value) {
-                       $phashArr[] = $wl[$key]['hash'];
-               }
-               if (count($phashArr))   {
-                       $cwl = implode(',',$phashArr);
-                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')');
-
-                       if($GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) {
-                               $this->log_setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']->sql_num_rows($res)),1);
-                               while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
-                                       unset($wl[$row['baseword']]);
+       function checkWordList($wordListArray) {
+               if (tx_indexedsearch_util::isTableUsed('index_words')) {
+                       if (count($wordListArray))      {
+                               $phashArray = array();
+                               foreach ($wordListArray as $value) {
+                                       $phashArray[] = intval($value['hash']);
                                }
-
-                               foreach ($wl as $key => $val) {
-                                       $insertFields = array(
-                                               'wid' => $val['hash'],
-                                               'baseword' => $key,
-                                               'metaphone' => $val['metaphone']
-                                       );
-                                               // A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem.
-                                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields);
+                               $cwl = implode(',', $phashArray);
+                               $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('baseword', 'index_words', 'wid IN (' . $cwl . ')');
+                               if ($count != count($wordListArray)) {
+                                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN (' . $cwl . ')');
+                                       $this->log_setTSlogMessage('Inserting words: ' . (count($wordListArray) - $count), 1);
+                                       while (FALSE != ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))) {
+                                               unset($wordListArray[$row['baseword']]);
+                                       }
+                                       $GLOBALS['TYPO3_DB']->sql_free_result($res);
+
+                                       foreach ($wordListArray as $key => $val) {
+                                               $insertFields = array(
+                                                       'wid' => $val['hash'],
+                                                       'baseword' => $key,
+                                                       'metaphone' => $val['metaphone']
+                                               );
+                                                       // A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem.
+                                               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields);
+                                       }
                                }
                        }
+
                }
        }
 
@@ -1921,20 +2034,22 @@ class tx_indexedsearch_indexer {
         * @param       integer         phash value
         * @return      void
         */
-       function submitWords($wl,$phash) {
-               $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash));
-
-               foreach($wl as $val)    {
-                       $insertFields = array(
-                               'phash' => $phash,
-                               'wid' => $val['hash'],
-                               'count' => $val['count'],
-                               'first' => $val['first'],
-                               'freq' => $this->freqMap(($val['count']/$this->wordcount)),
-                               'flags' => ($val['cmp'] & $this->flagBitMask)
-                       );
-
-                       $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
+       function submitWords($wordList, $phash) {
+               if (tx_indexedsearch_util::isTableUsed('index_rel')) {
+                       $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash=' . intval($phash));
+
+                       foreach ($wordList as $val) {
+                               $insertFields = array(
+                                       'phash' => $phash,
+                                       'wid' => $val['hash'],
+                                       'count' => $val['count'],
+                                       'first' => $val['first'],
+                                       'freq' => $this->freqMap(($val['count']/$this->wordcount)),
+                                       'flags' => ($val['cmp'] & $this->flagBitMask)
+                               );
+
+                               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
+                       }
                }
        }
 
@@ -1947,14 +2062,13 @@ class tx_indexedsearch_indexer {
         */
        function freqMap($freq) {
                $mapFactor = $this->freqMax*100*$this->freqRange;
-               if($freq<1) {
+               if ($freq < 1) {
                        $newFreq = $freq*$mapFactor;
-                       $newFreq = $newFreq>$this->freqRange?$this->freqRange:$newFreq;
+                       $newFreq = $newFreq>$this->freqRange ? $this->freqRange : $newFreq;
                } else {
                        $newFreq = $freq/$mapFactor;
                }
                return $newFreq;
-
        }
 
 
@@ -1990,11 +2104,11 @@ class tx_indexedsearch_indexer {
                );
 
                        // Set grouping hash (Identifies a "page" combined of id, type, language, mountpoint and cHash parameters):
-               $this->hash['phash_grouping'] = $this->md5inthash(serialize($hArray));
+               $this->hash['phash_grouping'] = tx_indexedsearch_util::md5inthash(serialize($hArray));
 
                        // Add gr_list and set plain phash (Subdivision where special page composition based on login is taken into account as well. It is expected that such pages are normally similar regardless of the login.)
                $hArray['gr_list'] = (string)$this->conf['gr_list'];
-               $this->hash['phash'] = $this->md5inthash(serialize($hArray));
+               $this->hash['phash'] = tx_indexedsearch_util::md5inthash(serialize($hArray));
        }
 
        /**
@@ -2012,36 +2126,28 @@ class tx_indexedsearch_indexer {
                );
 
                        // Set grouping hash:
-               $hash['phash_grouping'] = $this->md5inthash(serialize($hArray));
+               $hash['phash_grouping'] = tx_indexedsearch_util::md5inthash(serialize($hArray));
 
                        // Add subinfo
                $hArray['subinfo'] = $subinfo;
-               $hash['phash'] = $this->md5inthash(serialize($hArray));
+               $hash['phash'] = tx_indexedsearch_util::md5inthash(serialize($hArray));
 
                return $hash;
        }
 
        /**
-        * md5 integer hash
-        * Using 7 instead of 8 just because that makes the integers lower than 32 bit (28 bit) and so they do not interfere with UNSIGNED integers or PHP-versions which has varying output from the hexdec function.
+        * Calculates md5 integer hash. This is kept for the compatibility with
+        * previous versions. Delegates actual call to tx_indexedsearch_util.
         *
-        * @param       string          String to hash
-        * @return      integer         Integer intepretation of the md5 hash of input string.
+        * @param string $stringToHash String to hash
+        * @return int Integer intepretation of the md5 hash of input string.
+        * @deprecated
         */
-       function md5inthash($str)       {
-               return hexdec(substr(md5($str),0,7));
+       function md5inthash($stringToHash) {
+               t3lib_div::logDeprecatedFunction();
+               return tx_indexedsearch_util::md5inthash($stringToHash);
        }
 
-
-
-
-
-
-
-
-
-
-
        /*********************************
         *
         * Internal logging functions
diff --git a/typo3/sysext/indexed_search/class.tx_indexedsearch_util.php b/typo3/sysext/indexed_search/class.tx_indexedsearch_util.php
new file mode 100644 (file)
index 0000000..f48e74d
--- /dev/null
@@ -0,0 +1,64 @@
+<?php
+/***************************************************************
+*  Copyright notice
+*
+*  (c) 2011 Dmitry Dulepov (dmitry@typo3.com)
+*  All rights reserved
+*
+*  This script is part of the TYPO3 project. The TYPO3 project is
+*  free software; you can redistribute it and/or modify
+*  it under the terms of the GNU General Public License as published by
+*  the Free Software Foundation; either version 2 of the License, or
+*  (at your option) any later version.
+*
+*  The GNU General Public License can be found at
+*  http://www.gnu.org/copyleft/gpl.html.
+*  A copy is found in the textfile GPL.txt and important notices to the license
+*  from the author is found in LICENSE.txt distributed with these scripts.
+*
+*
+*  This script is distributed in the hope that it will be useful,
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*  GNU General Public License for more details.
+*
+*  This copyright notice MUST APPEAR in all copies of the script!
+***************************************************************/
+
+/**
+ * Class with common methods used across various classes in the indexed search.
+ * Impementation is provided by various people from the TYPO3 community.
+ *
+ * This class is final because it contains only static methods.
+ *
+ * @author Dmitry Dulepov <dmitry@typo3.com>
+ * @package TYPO3
+ * @subpackage tx_indexedsearch
+ */
+final class tx_indexedsearch_util {
+
+       /**
+        * Check if the tables provided are configured for usage. This becomes
+        * neccessary for extensions that provide additional database functionality
+        * like indexed_search_mysql.
+        *
+        * @param string $tableName Table name to check
+        * @return boolean True if the given table is used
+        */
+       static function isTableUsed($tableName) {
+               $tableList = $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['use_tables'];
+               return t3lib_div::inList($tableList, $tableName);
+       }
+
+       /**
+        * md5 integer hash
+        * Using 7 instead of 8 just because that makes the integers lower than 32 bit (28 bit) and so they do not interfere with UNSIGNED integers or PHP-versions which has varying output from the hexdec function.
+        *
+        * @param string $stringToHash String to hash
+        * @return int Integer intepretation of the md5 hash of input string.
+        */
+       static function md5inthash($stringToHash) {
+               return hexdec(substr(md5($stringToHash), 0, 7));
+       }
+
+}
index 677f90d..3f113c1 100644 (file)
@@ -1,2 +1,23 @@
 A full documentation manual for the indexed search extension can be found in the extension "doc_indexed_search" in the TER.
 See http://typo3.org/documentation/document-library/extension-manuals/doc_indexed_search/current/view/
+
+This is a list of all tables which are used by this extension:
+
+index_phash
+- Page information
+
+index_fulltext
+- Fulltext data
+
+index_rel
+- Relations between index_phash and index_words
+
+index_words
+- baseword table
+
+index_section
+- section index (= first 3 levels of the rootline for this document)
+
+index_grlist
+- group list information
+- indicates which gr_list has access to which phash
\ No newline at end of file
diff --git a/typo3/sysext/indexed_search/ext_autoload.php b/typo3/sysext/indexed_search/ext_autoload.php
new file mode 100644 (file)
index 0000000..fc4dc18
--- /dev/null
@@ -0,0 +1,9 @@
+<?php
+
+$extensionPath = t3lib_extMgm::extPath('indexed_search');
+return array(
+       'tx_indexedsearch_indexer' => $extensionPath . 'class.indexer.php',
+       'tx_indexedsearch_util' => $extensionPath . 'class.tx_indexedsearch_util.php',
+);
+
+?>
\ No newline at end of file
index 694bdc8..b0af701 100755 (executable)
@@ -28,6 +28,9 @@ fullTextDataLength = 0
   # cat=basic; type=boolean; label=Disable Indexing in Frontend: By default pages are indexed during viewing of pages in the frontend. You can disable this features so indexing of pages is only initiated through the backend page crawler.
 disableFrontendIndexing = 0
 
+  # cat=basic; type=int; label=Enable metaphone search (sounds like). 0=disabled, 1=use internal metaphone parser, 2=use advanced doubleMetaphone parser.
+enableMetaphoneSearch = 1
+
   # cat=basic; type=int; label=Min TTL (hours) for indexed page: The time in hours that must pass before an indexed page can be indexed again regardless of changes on the page.
 minAge = 24
 
@@ -40,7 +43,7 @@ maxExternalFiles = 5
   # cat=basic; type=boolean; label=Use "crawler" extension to index external files: When external files are found on a page they are added to the "crawler" extensions queue and indexed via the cronscript running the crawler. This eliminates problems with for example many PDF files on a page. Requires a proper configuration of the "crawler" extension.
 useCrawlerForExternalFiles = 0
 
-  # cat=basic; type=int; label=Bitmask for Flags (Advanced): By this value (0-255) you can filter the importance of <title> (128), <keywords> (64) and <description> (32) content from HTML documents. By default none of these will have any importance over the other. Setting the value to eg. 192 means that title-tag content and meta-keywords will be flagged (and rate higher in search results)
+  # cat=basic; type=int; label=Bitmask for Flags (Advanced): By this value (0-255) you can filter the importance of <title> (128), <keywords> (64) and <description> (32) content from HTML documents. By setting this to 0, none of these fields will have any importance over the other. The default value 192 means that title-tag content and meta-keywords will be flagged (and rated higher in search results)
 flagBitMask = 192
 
   # cat=basic; type=string; label=Ignore Extensions: List of file extensions that the external parser will ignore (despite having support for them). Comma list.
index 182457a..5e9be9a 100755 (executable)
@@ -3,7 +3,7 @@
 ########################################################################
 # Extension Manager/Repository config file for ext "indexed_search".
 #
-# Auto generated 25-10-2011 13:11
+# Auto generated 24-08-2010 14:45
 #
 # Manual updates:
 # Only the data in the array - everything else is removed by next
@@ -32,13 +32,13 @@ $EM_CONF[$_EXTKEY] = array(
        'author_company' => 'Curby Soft Multimedia',
        'CGLcompliance' => '',
        'CGLcompliance_note' => '',
-       'version' => '2.13.0',
-       '_md5_values_when_last_written' => 'a:56:{s:9:"ChangeLog";s:4:"7479";s:17:"class.crawler.php";s:4:"ec8d";s:25:"class.doublemetaphone.php";s:4:"f691";s:25:"class.external_parser.php";s:4:"ad1e";s:17:"class.indexer.php";s:4:"d169";s:15:"class.lexer.php";s:4:"c88d";s:21:"ext_conf_template.txt";s:4:"0c64";s:12:"ext_icon.gif";s:4:"4cbf";s:17:"ext_localconf.php";s:4:"4c42";s:14:"ext_tables.php";s:4:"3a84";s:14:"ext_tables.sql";s:4:"7084";s:28:"ext_typoscript_editorcfg.txt";s:4:"0a34";s:24:"ext_typoscript_setup.txt";s:4:"9b5f";s:13:"locallang.xlf";s:4:"94af";s:26:"locallang_csh_indexcfg.xlf";s:4:"5d7c";s:16:"locallang_db.xlf";s:4:"0a5e";s:7:"tca.php";s:4:"f24a";s:12:"cli/conf.php";s:4:"19fe";s:14:"doc/README.txt";s:4:"a737";s:12:"doc/TODO.txt";s:4:"c804";s:29:"example/class.crawlerhook.php";s:4:"6662";s:24:"example/class.pihook.php";s:4:"f7dd";s:46:"hooks/class.tx_indexedsearch_tslib_fe_hook.php";s:4:"1bb8";s:13:"mod/clear.gif";s:4:"cc11";s:12:"mod/conf.php";s:4:"7a2f";s:13:"mod/index.php";s:4:"378e";s:15:"mod/isearch.gif";s:4:"4cbf";s:21:"mod/locallang_mod.xlf";s:4:"e903";s:21:"mod/mod_template.html";s:4:"a7f2";s:44:"modfunc1/class.tx_indexedsearch_modfunc1.php";s:4:"1963";s:22:"modfunc1/locallang.xlf";s:4:"0abc";s:44:"modfunc2/class.tx_indexedsearch_modfunc2.php";s:4:"edce";s:22:"modfunc2/locallang.xlf";s:4:"6e83";s:29:"pi/class.tx_indexedsearch.php";s:4:"467f";s:21:"pi/considerations.txt";s:4:"e3df";s:22:"pi/indexed_search.tmpl";s:4:"4b28";s:16:"pi/locallang.xlf";s:4:"c0a4";s:20:"pi/template_css.tmpl";s:4:"5251";s:14:"pi/res/csv.gif";s:4:"6a23";s:14:"pi/res/doc.gif";s:4:"2ec9";s:15:"pi/res/html.gif";s:4:"5647";s:14:"pi/res/jpg.gif";s:4:"e8df";s:17:"pi/res/locked.gif";s:4:"c212";s:16:"pi/res/pages.gif";s:4:"1405";s:14:"pi/res/pdf.gif";s:4:"9451";s:14:"pi/res/pps.gif";s:4:"926b";s:14:"pi/res/ppt.gif";s:4:"ada5";s:14:"pi/res/rtf.gif";s:4:"f660";s:14:"pi/res/sxc.gif";s:4:"00a6";s:14:"pi/res/sxi.gif";s:4:"4223";s:14:"pi/res/sxw.gif";s:4:"4a8f";s:14:"pi/res/tif.gif";s:4:"533b";s:14:"pi/res/txt.gif";s:4:"0004";s:14:"pi/res/xls.gif";s:4:"f106";s:14:"pi/res/xml.gif";s:4:"c32d";s:38:"tests/tx_indexedsearch_indexerTest.php";s:4:"3bb1";}',
+       'version' => '2.12.0',
+       '_md5_values_when_last_written' => 'a:56:{s:9:"ChangeLog";s:4:"7479";s:17:"class.crawler.php";s:4:"fabe";s:25:"class.doublemetaphone.php";s:4:"28e4";s:25:"class.external_parser.php";s:4:"b3f9";s:17:"class.indexer.php";s:4:"bf71";s:15:"class.lexer.php";s:4:"72fd";s:21:"ext_conf_template.txt";s:4:"0c64";s:12:"ext_icon.gif";s:4:"4cbf";s:17:"ext_localconf.php";s:4:"4c42";s:14:"ext_tables.php";s:4:"3a84";s:14:"ext_tables.sql";s:4:"7084";s:28:"ext_typoscript_editorcfg.txt";s:4:"0a34";s:24:"ext_typoscript_setup.txt";s:4:"9b5f";s:13:"locallang.xml";s:4:"cd0c";s:26:"locallang_csh_indexcfg.xml";s:4:"f4f3";s:16:"locallang_db.xml";s:4:"f142";s:7:"tca.php";s:4:"f24a";s:12:"cli/conf.php";s:4:"19fe";s:14:"doc/README.txt";s:4:"a737";s:12:"doc/TODO.txt";s:4:"c804";s:29:"example/class.crawlerhook.php";s:4:"626a";s:24:"example/class.pihook.php";s:4:"bf31";s:46:"hooks/class.tx_indexedsearch_tslib_fe_hook.php";s:4:"c27f";s:13:"mod/clear.gif";s:4:"cc11";s:12:"mod/conf.php";s:4:"9062";s:13:"mod/index.php";s:4:"c619";s:15:"mod/isearch.gif";s:4:"4cbf";s:21:"mod/locallang_mod.xml";s:4:"1624";s:21:"mod/mod_template.html";s:4:"a7f2";s:44:"modfunc1/class.tx_indexedsearch_modfunc1.php";s:4:"e6d8";s:22:"modfunc1/locallang.xml";s:4:"4806";s:44:"modfunc2/class.tx_indexedsearch_modfunc2.php";s:4:"81bb";s:22:"modfunc2/locallang.xml";s:4:"a889";s:29:"pi/class.tx_indexedsearch.php";s:4:"d0cf";s:21:"pi/considerations.txt";s:4:"e3df";s:22:"pi/indexed_search.tmpl";s:4:"4b28";s:16:"pi/locallang.xml";s:4:"4f34";s:20:"pi/template_css.tmpl";s:4:"0df0";s:14:"pi/res/csv.gif";s:4:"e413";s:14:"pi/res/doc.gif";s:4:"0975";s:15:"pi/res/html.gif";s:4:"5647";s:14:"pi/res/jpg.gif";s:4:"23ac";s:17:"pi/res/locked.gif";s:4:"c212";s:16:"pi/res/pages.gif";s:4:"1923";s:14:"pi/res/pdf.gif";s:4:"9451";s:14:"pi/res/pps.gif";s:4:"926b";s:14:"pi/res/ppt.gif";s:4:"ada5";s:14:"pi/res/rtf.gif";s:4:"f660";s:14:"pi/res/sxc.gif";s:4:"00a6";s:14:"pi/res/sxi.gif";s:4:"ef83";s:14:"pi/res/sxw.gif";s:4:"4a8f";s:14:"pi/res/tif.gif";s:4:"533b";s:14:"pi/res/txt.gif";s:4:"c576";s:14:"pi/res/xls.gif";s:4:"4a22";s:14:"pi/res/xml.gif";s:4:"2e7b";s:38:"tests/tx_indexedsearch_indexerTest.php";s:4:"22ed";}',
        'constraints' => array(
                'depends' => array(
                        'cms' => '',
-                       'php' => '5.3.0-0.0.0',
-                       'typo3' => '4.6.0-0.0.0',
+                       'php' => '5.1.0-0.0.0',
+                       'typo3' => '4.4.0-0.0.0',
                ),
                'conflicts' => array(
                ),
index 189534d..982069e 100755 (executable)
@@ -43,6 +43,19 @@ $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] = array(
        'tif' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse',
 );
 
+$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_rel,index_words,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config';
+
+  // unserializing the configuration so we can use it here:
+$_EXTCONF = unserialize($_EXTCONF);
+
+       // Use the advanced doubleMetaphone parser instead of the internal one (usage of metaphone parsers is generally disabled by default)
+if (isset($_EXTCONF['enableMetaphoneSearch']) && intval($_EXTCONF['enableMetaphoneSearch'])==2) {
+       $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
+}
+
+
+
+
 
        // EXAMPLE configuration of hooks:
 /*
index b101880..5f74258 100755 (executable)
@@ -57,4 +57,4 @@ if (t3lib_extMgm::isLoaded('crawler'))        {
        $TCA['index_config']['types']['tx_myext_example1'] = $TCA['index_config']['types']['0'];
 */
 
-?>
\ No newline at end of file
+?>
index a267695..02d6506 100755 (executable)
@@ -38,6 +38,7 @@ CREATE TABLE index_phash (
 CREATE TABLE index_fulltext (
   phash int(11) DEFAULT '0' NOT NULL,
   fulltextdata mediumtext,
+  metaphonedata mediumtext NOT NULL,
   PRIMARY KEY (phash)
 ) ENGINE=InnoDB;
 
index 44261c4..2bb060b 100755 (executable)
@@ -44,6 +44,8 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
        var $allPhashListed = array();          // phash values accumulations for link to clear all
        var $external_parsers = array();        // External content parsers - objects set here with file extensions as keys.
        var $iconFileNameCache = array();       // File extensions - icon map/cache.
+       var $indexerConfig = array();   // Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']
+       var $enableMetaphoneSearch = FALSE;
 
        /**
         * Indexer object
@@ -89,6 +91,12 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                        // Return if no page id:
                if ($this->pObj->id<=0)         return;
 
+                       // Indexer configuration from Extension Manager interface:
+               $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
+
+                       // Workaround: If the extension configuration was not updated yet, the value is not existing
+               $this->enableMetaphoneSearch = isset($this->indexerConfig['enableMetaphoneSearch']) ? ($this->indexerConfig['enableMetaphoneSearch'] ? TRUE : FALSE) : TRUE;
+
                        // Initialize max-list items
                $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100;
 
@@ -141,7 +149,7 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                        $theOutput.=$this->pObj->doc->spacer(5);
                        $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1);
 
-               } elseif (t3lib_div::_GET('metaphone')) {
+               } elseif ($this->enableMetaphoneSearch && t3lib_div::_GET('metaphone')) {
                                // Show title / function menu:
                        $theOutput.=$this->pObj->doc->spacer(5);
                        $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1);
@@ -619,12 +627,14 @@ class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
                        $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin();
                        $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec);
 
-                               // Group metaphone hash:
-                       $metaphone = array();
-                       foreach($ftrows as $row)        {
-                               $metaphone[$row['metaphone']][] = $row['baseword'];
+                       if ($this->enableMetaphoneSearch) {
+                                       // Group metaphone hash:
+                               $metaphone = array();
+                               foreach ($ftrows as $row) {
+                                       $metaphone[$row['metaphone']][] = $row['baseword'];
+                               }
+                               $content .= $this->listMetaphoneStat($metaphone, 'Metaphone stats:');
                        }
-                       $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:');
 
                                // Finding top-20 on frequency for this phash:
                        $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
index c112e8f..d19a244 100755 (executable)
@@ -77,10 +77,15 @@ class tx_indexedsearch extends tslib_pibase {
        var $cache_rl = array();                // Caching of root line data
        var $fe_groups_required = array();      // Required fe_groups memberships for display of a result.
        var $domain_records = array();          // Domain records (?)
-       var $wSelClauses = array();             // Select clauses for individual words
        var $resultSections = array();          // Page tree sections for search result.
        var $external_parsers = array();        // External parser objects
        var $iconFileNameCache = array();       // Storage of icons....
+       var $templateCode;                      // Will hold the content of $conf['templateFile']
+       var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results';
+       var $indexerConfig = array();    // Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']
+       var $enableMetaphoneSearch = FALSE;
+       var $storeMetaphoneInfoAsWords;
+
 
        /**
         * Lexer object
@@ -89,14 +94,8 @@ class tx_indexedsearch extends tslib_pibase {
         */
        var $lexerObj;
 
-       /**
-        * Indexer object
-        *
-        * @var tx_indexedsearch_indexer
-        */
-       var $indexerObj;
-       var $templateCode;                      // Will hold the content of $conf['templateFile']
-       var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results';
+       const WILDCARD_LEFT  = 1;
+       const WILDCARD_RIGHT = 2;
 
 
        /**
@@ -113,12 +112,8 @@ class tx_indexedsearch extends tslib_pibase {
                $this->pi_loadLL();
                $this->pi_setPiVarDefaults();
 
-                       // Initialize the indexer-class - just to use a few function (for making hashes)
-               $this->indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');
-
                        // Initialize:
                $this->initialize();
-
                        // Do search:
                        // If there were any search words entered...
                if (is_array($this->sWArr))     {
@@ -130,8 +125,8 @@ class tx_indexedsearch extends tslib_pibase {
                        $this->printRules().
                        $content;
 
-        return $this->pi_wrapInBaseClass($content);
-    }
+               return $this->pi_wrapInBaseClass($content);
+       }
 
        /**
         * Initialize internal variables, especially selector box values for the search form and search words
@@ -141,6 +136,11 @@ class tx_indexedsearch extends tslib_pibase {
        function initialize()   {
                global $TYPO3_CONF_VARS;
 
+                       // Indexer configuration from Extension Manager interface:
+               $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
+               $this->enableMetaphoneSearch = $this->indexerConfig['enableMetaphoneSearch'] ? TRUE : FALSE;
+               $this->storeMetaphoneInfoAsWords = tx_indexedsearch_util::isTableUsed('index_words') ? FALSE : TRUE;
+
                        // Initialize external document parsers for icon display and other soft operations
                if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers']))        {
                        foreach ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef)   {
@@ -232,6 +232,11 @@ class tx_indexedsearch extends tslib_pibase {
                        )
                );
 
+                       // Remove this option if metaphone search is disabled)
+               if (!$this->enableMetaphoneSearch) {
+                       unset ($this->optValues['type']['10']);
+               }
+
                        // Free Index Uid:
                if ($this->conf['search.']['defaultFreeIndexUidList'])  {
                        $uidList = t3lib_div::intExplode(',', $this->conf['search.']['defaultFreeIndexUidList']);
@@ -275,7 +280,7 @@ class tx_indexedsearch extends tslib_pibase {
                        // This selects the first and secondary menus for the "sections" selector - so we can search in sections and sub sections.
                if ($this->conf['show.']['L1sections']) {
                        $firstLevelMenu = $this->getMenu($this->wholeSiteIdList);
-                       foreach ($firstLevelMenu as $kk => $mR) {
+                       foreach ($firstLevelMenu as $optionName => $mR) {
                                if (!$mR['nav_hide']) {
                                        $this->optValues['sections']['rl1_'.$mR['uid']] = trim($this->pi_getLL('opt_RL1').' '.$mR['title']);
                                        if ($this->conf['show.']['L2sections']) {
@@ -287,7 +292,7 @@ class tx_indexedsearch extends tslib_pibase {
                                                }
                                                $this->optValues['sections']['rl2_'.implode(',',array_keys($secondLevelMenu))] = $this->pi_getLL('opt_RL2ALL');
                                        }
-                               } else unset($firstLevelMenu[$kk]);
+                               } else unset($firstLevelMenu[$optionName]);
                        }
                        $this->optValues['sections']['rl1_'.implode(',',array_keys($firstLevelMenu))] = $this->pi_getLL('opt_RL1ALL');
                }
@@ -302,10 +307,11 @@ class tx_indexedsearch extends tslib_pibase {
                $this->templateCode = $this->cObj->fileResource($this->conf['templateFile']);
 
                        // Add search languages:
-               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'sys_language', '1=1'.$this->cObj->enableFields('sys_language'));
-               while($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))        {
-                       $this->optValues['lang'][$lR['uid']] = $lR['title'];
+               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'sys_language', '1=1' . $this->cObj->enableFields('sys_language'));
+               while (FALSE !== ($data = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))) {
+                       $this->optValues['lang'][$data['uid']] = $data['title'];
                }
+               $GLOBALS['TYPO3_DB']->sql_free_result($res);
 
                        // Calling hook for modification of initialized content
                if ($hookObj = $this->hookRequest('initialize_postProc')) {
@@ -314,24 +320,25 @@ class tx_indexedsearch extends tslib_pibase {
 
                        // Default values set:
                        // Setting first values in optValues as default values IF there is not corresponding piVar value set already.
-               foreach ($this->optValues as $kk => $vv)        {
-                       if (!isset($this->piVars[$kk])) {
-                               reset($vv);
-                               $this->piVars[$kk] = key($vv);
+               foreach ($this->optValues as $optionName => $optionValue)       {
+                       if (!isset($this->piVars[$optionName])) {
+                               reset($optionValue);
+                               $this->piVars[$optionName] = key($optionValue);
                        }
                }
 
                        // Blind selectors:
                if (is_array($this->conf['blind.']))    {
-                       foreach ($this->conf['blind.'] as $kk => $vv)   {
-                               if (is_array($vv))      {
-                                       foreach ($vv as $kkk => $vvv)   {
-                                               if (!is_array($vvv) && $vvv && is_array($this->optValues[substr($kk,0,-1)]))    {
-                                                       unset($this->optValues[substr($kk,0,-1)][$kkk]);
+                       foreach ($this->conf['blind.'] as $optionName => $optionValue) {
+                               if (is_array($optionValue))     {
+                                       foreach ($optionValue as $optionValueSubKey => $optionValueSubValue) {
+                                               if (!is_array($optionValueSubValue) && $optionValueSubValue && is_array($this->optValues[substr($optionName, 0, -1)])) {
+                                                       unset($this->optValues[substr($optionName, 0, -1)][$optionValueSubKey]);
                                                }
                                        }
-                               } elseif ($vv) {        // If value is not set, unset the option array.
-                                       unset($this->optValues[$kk]);
+                               } elseif ($optionValue) {
+                                               // If value is not set, unset the option array
+                                       unset($this->optValues[$optionName]);
                                }
                        }
                }
@@ -341,11 +348,11 @@ class tx_indexedsearch extends tslib_pibase {
        }
 
        /**
-        * Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holds the SQL operator (eg. AND, OR)
+        * Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holding the SQL operator (eg. AND, OR)
         *
         * Only words with 2 or more characters are accepted
         * Max 200 chars total
-        * Space is used to split words, "" can be used search for a whole string (not indexed search then)
+        * Space is used to split words, "" can be used search for a whole string
         * AND, OR and NOT are prefix words, overruling the default operator
         * +/|/- equals AND, OR and NOT as operators.
         * All search words are converted to lowercase.
@@ -363,12 +370,14 @@ class tx_indexedsearch extends tslib_pibase {
                $inSW = $GLOBALS['TSFE']->csConvObj->utf8_encode($inSW, $GLOBALS['TSFE']->metaCharset);
                $inSW = $GLOBALS['TSFE']->csConvObj->entities_to_utf8($inSW,TRUE);
 
+               $sWordArray = FALSE;
                if ($hookObj = $this->hookRequest('getSearchWords')) {
-                       return $hookObj->getSearchWords_splitSWords($inSW, $defOp);
+                       $sWordArray = $hookObj->getSearchWords_splitSWords($inSW, $defOp);
                } else {
 
-                       if ($this->piVars['type']==20)  {
-                               return array(array('sword'=>trim($inSW), 'oper'=>'AND'));
+                       if ($this->piVars['type'] == 20) {
+                               // type = Sentence
+                               $sWordArray = array(array('sword' => trim($inSW), 'oper' => 'AND'));
                        } else {
                                $search = t3lib_div::makeInstance('tslib_search');
                                $search->default_operator = $defOp==1 ? 'OR' : 'AND';
@@ -376,10 +385,12 @@ class tx_indexedsearch extends tslib_pibase {
                                $search->register_and_explode_search_string($inSW);
 
                                if (is_array($search->sword_array))     {
-                                       return $this->procSearchWordsByLexer($search->sword_array);
+                                       $sWordArray = $this->procSearchWordsByLexer($search->sword_array);
                                }
                        }
                }
+
+               return $sWordArray;
        }
 
        /**
@@ -487,77 +498,111 @@ class tx_indexedsearch extends tslib_pibase {
        /**
         * Get search result rows / data from database. Returned as data in array.
         *
-        * @param       array           Search word array
-        * @param       integer         Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
-        * @return      array           False if no result, otherwise an array with keys for first row, result rows and total number of results found.
+        * @param array $searchWordArray Search word array
+        * @param int Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
+        * @return array False if no result, otherwise an array with keys for first row, result rows and total number of results found.
         */
-       function getResultRows($sWArr,$freeIndexUid=-1) {
+       function getResultRows($searchWordArray, $freeIndexUid = -1) {
 
-                       // Getting SQL result pointer:
-                       $GLOBALS['TT']->push('Searching result');
-               $res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid);
-                       $GLOBALS['TT']->pull();
+                       // Getting SQL result pointer. This fetches ALL results (1,000,000 if found)
+               $GLOBALS['TT']->push('Searching result');
+               if ($hookObj = &$this->hookRequest('getResultRows_SQLpointer')) {
+                       $res = $hookObj->getResultRows_SQLpointer($searchWordArray, $freeIndexUid);
+               } else {
+                       $res = $this->getResultRows_SQLpointer($searchWordArray, $freeIndexUid);
+               }
+               $GLOBALS['TT']->pull();
 
                        // Organize and process result:
-               if ($res)       {
-
-                       $count = $GLOBALS['TYPO3_DB']->sql_num_rows($res);      // Total search-result count
-                       $pointer = t3lib_utility_Math::forceIntegerInRange($this->piVars['pointer'], 0, floor($count/$this->piVars['results']));        // The pointer is set to the result page that is currently being viewed
+               $result = FALSE;
+               if ($res) {
+                       $totalSearchResultCount = $GLOBALS['TYPO3_DB']->sql_num_rows($res);     // Total search-result count
+                       $currentPageNumber = t3lib_utility_Math::forceIntegerInRange($this->piVars['pointer'], 0, floor($totalSearchResultCount/$this->piVars['results']));     // The pointer is set to the result page that is currently being viewed
 
                                // Initialize result accumulation variables:
-                       $c = 0; // Result pointer: Counts up the position in the current search-result
-                       $grouping_phashes = array();    // Used to filter out duplicates.
-                       $grouping_chashes = array();    // Used to filter out duplicates BASED ON cHash.
+                       $positionInSearchResults = 0;
+                       $groupingPhashes = array();     // Used for filtering out duplicates
+                       $groupingChashes = array();     // Used for filtering out duplicates BASED ON cHash
                        $firstRow = array();    // Will hold the first row in result - used to calculate relative hit-ratings.
                        $resultRows = array();  // Will hold the results rows for display.
 
-                       $exactCount = $this->conf['search.']['exactCount'];     // Continue counting and checking of results even if we are sure they are not displayed in this request. This will slow down your page rendering, but it allows precise search result counters.
+                               // Should we continue counting and checking of results even if
+                               // we are sure they are not displayed in this request?
+                               // This will slow down your page rendering, but it allows
+                               // precise search result counters.
+                       $calculateExactCount = (bool)$this->conf['search.']['exactCount'];
+
+                       $lastResultNumberOnPreviousPage = $currentPageNumber * $this->piVars['results'];
+                       $firstResultNumberOnNextPage = ($currentPageNumber + 1)*$this->piVars['results'];
+                       $lastResultNumberToAnalyze = ($currentPageNumber + 1) * $this->piVars['results'] + $this->piVars['results'];
 
                                // Now, traverse result and put the rows to be displayed into an array
                                // Each row should contain the fields from 'ISEC.*, IP.*' combined + artificial fields "show_resume" (boolean) and "result_number" (counter)
-                       while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))       {
+                       while (FALSE !== ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))) {
+
+                               if (!$this->checkExistance($row)) {
+                                       // Check if the record is still available or if it has been deleted meanwhile.
+                                       // Currently this works for files only, since extending it to content elements would cause a lot of overhead...
+                                       // Otherwise, skip the row.
+                                       $totalSearchResultCount--;
+                                       continue;
+                               }
 
                                        // Set first row:
-                               if (!$c)        {
+                               if ($positionInSearchResults === 0) {
                                        $firstRow = $row;
                                }
 
                                $row['show_resume'] = $this->checkResume($row); // Tells whether we can link directly to a document or not (depends on possible right problems)
 
-                               $phashGr = !in_array($row['phash_grouping'], $grouping_phashes);
-                               $chashGr = !in_array($row['contentHash'].'.'.$row['data_page_id'], $grouping_chashes);
+                               $phashGr = !in_array($row['phash_grouping'], $groupingPhashes);
+                               $chashGr = !in_array($row['contentHash'] . '.' . $row['data_page_id'], $groupingChashes);
                                if ($phashGr && $chashGr)       {
                                        if ($row['show_resume'] || $this->conf['show.']['forbiddenRecords'])    {       // Only if the resume may be shown are we going to filter out duplicates...
                                                if (!$this->multiplePagesType($row['item_type']))       {       // Only on documents which are not multiple pages documents
-                                                       $grouping_phashes[] = $row['phash_grouping'];
+                                                       $groupingPhashes[] = $row['phash_grouping'];
                                                }
-                                               $grouping_chashes[] = $row['contentHash'].'.'.$row['data_page_id'];
+                                               $groupingChashes[] = $row['contentHash'] . '.' . $row['data_page_id'];
 
-                                               $c++;   // Increase the result pointer
+                                               $positionInSearchResults++;
 
-                                                       // All rows for display is put into resultRows[]
-                                               if ($c > $pointer * $this->piVars['results'] && $c <= ($pointer * $this->piVars['results'] + $this->piVars['results'])) {
-                                                       $row['result_number'] = $c;
+                                                       // Check if we are inside result range for current page
+                                               if ($positionInSearchResults > $lastResultNumberOnPreviousPage && $positionInSearchResults <= $lastResultNumberToAnalyze) {
+                                                               // Collect results to display
+                                                       $row['result_number'] = $positionInSearchResults;
                                                        $resultRows[] = $row;
-                                                               // This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above).
-                                                       if (!$exactCount && (($c+1) > ($pointer+1)*$this->piVars['results']))   { break; }
+                                                               // This may lead to a problem: If the result
+                                                               // check is not stopped here, the search will
+                                                               // take longer. However the result counter
+                                                               // will not filter out grouped cHashes/pHashes
+                                                               // that were not processed yet. You can change
+                                                               // this behavior using the "search.exactCount"
+                                                               // property (see above).
+                                                       $nextResultPosition = $positionInSearchResults + 1;
+                                                       if (!$calculateExactCount && $nextResultPosition > $firstResultNumberOnNextPage) {
+                                                               break;
+                                                       }
                                                }
                                        } else {
-                                               $count--;       // Skip this row if the user cannot view it (missing permission)
+                                                       // Skip this row if the user cannot view it (missing permission)
+                                               $totalSearchResultCount--;
                                        }
                                } else {
-                                       $count--;       // For each time a phash_grouping document is found (which is thus not displayed) the search-result count is reduced, so that it matches the number of rows displayed.
+                                               // For each time a phash_grouping document is found
+                                               // (which is thus not displayed) the search-result count
+                                               // is reduced, so that it matches the number of rows displayed.
+                                       $totalSearchResultCount--;
                                }
                        }
+                       $GLOBALS['TYPO3_DB']->sql_free_result($res);
 
-                       return array(
-                                               'resultRows' => $resultRows,
-                                               'firstRow' => $firstRow,
-                                               'count' => $count
-                                       );
-               } else {        // No results found:
-                       return FALSE;
+                       $result = array(
+                               'resultRows' => $resultRows,
+                               'firstRow' => $firstRow,
+                               'count' => $totalSearchResultCount
+                       );
                }
+               return $result;
        }
 
        /**
@@ -750,7 +795,6 @@ class tx_indexedsearch extends tslib_pibase {
                        // Initialize variables:
                $c=0;
                $totalHashList = array();       // This array accumulates the phash-values
-               $this->wSelClauses = array();
 
                        // Traverse searchwords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator)
                foreach ($sWArr as $k => $v)    {
@@ -761,49 +805,39 @@ class tx_indexedsearch extends tslib_pibase {
 
                        $GLOBALS['TT']->push('SearchWord "'.$sWord.'" - $theType='.$theType);
 
-                       $res = '';
-                       $wSel='';
-
                                // Perform search for word:
                        switch($theType)        {
                                case '1':       // Part of word
-                                       $wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'";
-                                       $res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
+                                       $res = $this->searchWord($sWord, self::WILDCARD_LEFT | self::WILDCARD_RIGHT);
                                break;
                                case '2':       // First part of word
-                                       $wSel = "IW.baseword LIKE '".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'";
-                                       $res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
+                                       $res = $this->searchWord($sWord, self::WILDCARD_RIGHT);
                                break;
                                case '3':       // Last part of word
-                                       $wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."'";
-                                       $res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
+                                       $res = $this->searchWord($sWord, self::WILDCARD_LEFT);
                                break;
                                case '10':      // Sounds like
-                                       $wSel = 'IW.metaphone = '.$this->indexerObj->metaphone($sWord);
-                                       $res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
+
+                                       /**
+                                        * Indexer object
+                                        *
+                                        * @var tx_indexedsearch_indexer
+                                        */
+                                               // Initialize the indexer-class
+                                       $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');
+                                         // Perform metaphone search
+                                       $res = $this->searchMetaphone($indexerObj->metaphone($sWord, $this->storeMetaphoneInfoAsWords));
+                                       unset($indexerObj);
                                break;
                                case '20':      // Sentence
-                                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
-                                                               'ISEC.phash',
-                                                               'index_section ISEC, index_fulltext IFT',
-                                                               'IFT.fulltextdata LIKE \'%'.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_fulltext').'%\' AND
-                                                                       ISEC.phash = IFT.phash
-                                                                       '.$this->sectionTableWhere(),
-                                                               'ISEC.phash'
-                                                       );
-                                       $wSel = '1=1';
-
-                                       if ($this->piVars['type']==20)  $this->piVars['order'] = 'mtime';               // If there is a fulltext search for a sentence there is a likeliness that sorting cannot be done by the rankings from the rel-table (because no relations will exist for the sentence in the word-table). So therefore mtime is used instaed. It is not required, but otherwise some hits may be left out.
+                                       $res = $this->searchSentence($sWord);
+                                       $this->piVars['order'] = 'mtime';  // If there is a fulltext search for a sentence there is a likeliness that sorting cannot be done by the rankings from the rel-table (because no relations will exist for the sentence in the word-table). So therefore mtime is used instead. It is not required, but otherwise some hits may be left out.
                                break;
                                default:        // Distinct word
-                                       $wSel = 'IW.wid = '.$hash = $this->indexerObj->md5inthash($sWord);
-                                       $res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
+                                       $res = $this->searchDistinct($sWord);
                                break;
                        }
 
-                               // Accumulate the word-select clauses
-                       $this->wSelClauses[] = $wSel;
-
                                // If there was a query to do, then select all phash-integers which resulted from this.
                        if ($res)       {
 
@@ -862,28 +896,86 @@ class tx_indexedsearch extends tslib_pibase {
        }
 
        /**
+        * Search for a word
+        *
+        * @param string $sWord Word to search for
+        * @param integer $mode Bit-field which can contain WILDCARD_LEFT and/or WILDCARD_RIGHT
+        * @return pointer SQL result pointer
+        */
+       function searchWord($sWord, $mode) {
+               $wildcard_left = ($mode & WILDCARD_LEFT) ? '%' : '';
+               $wildcard_right = ($mode & WILDCARD_RIGHT) ? '%' : '';
+
+               $wSel = 'IW.baseword LIKE \'' . $wildcard_left.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words') . $wildcard_right . '\'';
+               $res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
+               return $res;
+       }
+
+       /**
+        * Search for one distinct word
+        *
+        * @param string $sWord Word to search for
+        * @return pointer SQL result pointer
+        */
+       function searchDistinct($sWord) {
+               $wSel = 'IW.wid=' . tx_indexedsearch_util::md5inthash($sWord);
+               $res = $this->execPHashListQuery($wSel, ' AND is_stopword=0');
+               return $res;
+       }
+
+       /**
+        * Search for a sentence
+        *
+        * @param string $sSentence Sentence to search for
+        * @return pointer SQL result pointer
+        */
+       function searchSentence($sSentence) {
+               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
+                       'ISEC.phash',
+                       'index_section ISEC, index_fulltext IFT',
+                       'IFT.fulltextdata LIKE \'%' . $GLOBALS['TYPO3_DB']->quoteStr($sSentence, 'index_fulltext') . '%\' AND
+                               ISEC.phash = IFT.phash
+                       ' . $this->sectionTableWhere(),
+                       'ISEC.phash'
+               );
+               return $res;
+       }
+
+       /**
+        * Search for a metaphone word
+        *
+        * @param string $sWord Word to search for
+        * @return pointer SQL result pointer
+        */
+       function searchMetaphone($sWord) {
+               $wSel = 'IW.metaphone=' . $sWord;
+               $res = $this->execPHashListQuery($wSel, ' AND is_stopword=0');
+       }
+
+
+       /**
         * Returns AND statement for selection of section in database. (rootlevel 0-2 + page_id)
         *
         * @return      string          AND clause for selection of section in database.
         */
        function sectionTableWhere()    {
-               $out = $this->wholeSiteIdList<0 ? '' : 'AND ISEC.rl0 IN ('.$this->wholeSiteIdList.')';
+               $out = ($this->wholeSiteIdList < 0) ? '' : ' AND ISEC.rl0 IN (' . $this->wholeSiteIdList . ')';
 
                $match = '';
                if (substr($this->piVars['sections'],0,4)=='rl1_')      {
                        $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],4)));
-                       $out.= 'AND ISEC.rl1 IN ('.$list.')';
+                       $out.= ' AND ISEC.rl1 IN (' . $list . ')';
                        $match = TRUE;
                } elseif (substr($this->piVars['sections'],0,4)=='rl2_')        {
                        $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],4)));
-                       $out.= 'AND ISEC.rl2 IN ('.$list.')';
+                       $out.= ' AND ISEC.rl2 IN (' . $list . ')';
                        $match = TRUE;
                } elseif (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields']))      {
                                // Traversing user configured fields to see if any of those are used to limit search to a section:
                        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
                                if (substr($this->piVars['sections'],0,strlen($fieldName)+1)==$fieldName.'_')   {
                                        $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],strlen($fieldName)+1)));
-                                       $out.= 'AND ISEC.'.$fieldName.' IN ('.$list.')';
+                                       $out.= ' AND ISEC.' . $fieldName . ' IN (' . $list . ')';
                                        $match = TRUE;
                                        break;
                                }
@@ -917,16 +1009,16 @@ class tx_indexedsearch extends tslib_pibase {
 
                switch((string)$this->piVars['media'])  {
                        case '0':               // '0' => 'Kun TYPO3 sider',
-                               $out = 'AND IP.item_type=' . $GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');
+                               $out = ' AND IP.item_type=' . $GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');
                                break;
                        case '-2':              // All external documents
-                               $out = 'AND IP.item_type!=' . $GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');
+                               $out = ' AND IP.item_type!=' . $GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');
                                break;
                        case '-1':      // All content
                                $out = '';
                                break;
                        default:
-                               $out = 'AND IP.item_type=' . $GLOBALS['TYPO3_DB']->fullQuoteStr($this->piVars['media'], 'index_phash');
+                               $out = ' AND IP.item_type=' . $GLOBALS['TYPO3_DB']->fullQuoteStr($this->piVars['media'], 'index_phash');
                }
 
                return $out;
@@ -1016,44 +1108,14 @@ class tx_indexedsearch extends tslib_pibase {
                        foreach ($siteIdNumbers as $rootId) {
                                $id_list[] = $this->cObj->getTreeList($rootId,9999,0,0,'','').$rootId;
                        }
-                       $page_where = 'ISEC.page_id IN ('.implode(',',$id_list).')';
+                       $page_where = ' ISEC.page_id IN (' . implode(',', $id_list) . ')';
                } else {        // Disable everything... (select all)
                        $page_where = ' 1=1 ';
                }
 
-
                        // If any of the ranking sortings are selected, we must make a join with the word/rel-table again, because we need to calculate ranking based on all search-words found.
-               if (substr($this->piVars['order'],0,5)=='rank_')        {
-                               /*
-                                        OK there were some fancy calculations promoted by Graeme Merrall:
-
-                                       "However, regarding relevance you probably want to look at something like
-                                       Salton's formula which is a good easy way to measure relevance.
-                                       Oracle Intermedia uses this and it's pretty simple:
-                                       Score can be between 0 and 100, but the top-scoring document in the query
-                                       will not necessarily have a score of 100 -- scoring is relative, not
-                                       absolute. This means that scores are not comparable across indexes, or even
-                                       across different queries on the same index. Score for each document is
-                                       computed using the standard Salton formula:
-
-                                           3f(1+log(N/n))
-
-                                       Where f is the frequency of the search term in the document, N is the total
-                                       number of rows in the table, and n is the number of rows which contain the
-                                       search term. This is converted into an integer in the range 0 - 100.
-
-                                       There's a good doc on it at
-                                       http://ls6-www.informatik.uni-dortmund.de/bib/fulltext/ir/Pfeifer:97/
-                                       although it may be a little complex for what you require so just pick the
-                                       relevant parts out.
-                                       "
-
-                                       However I chose not to go with this for several reasons.
-                                       I do not claim that my ways of calculating importance here is the best.
-                                       ANY (better) suggestion for ranking calculation is accepted! (as long as they are shipped with tested code in exchange for this.)
-                               */
-
-                       switch($this->piVars['order'])  {
+               if (substr($this->piVars['order'],0,5)=='rank_') {
+                       switch($this->piVars['order']) {
                                case 'rank_flag':       // This gives priority to word-position (max-value) so that words in title, keywords, description counts more than in content.
                                                                        // The ordering is refined with the frequency sum as well.
                                        $grsel = 'MAX(IR.flags) AS order_val1, SUM(IR.freq) AS order_val2';
@@ -1073,10 +1135,7 @@ class tx_indexedsearch extends tslib_pibase {
                                break;
                        }
 
-                               // So, words are imploded into an OR statement (no "sentence search" should be done here - may deselect results)
-                       $wordSel='('.implode(' OR ',$this->wSelClauses).') AND ';
-
-                       return $GLOBALS['TYPO3_DB']->exec_SELECTquery(
+                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
                                                'ISEC.*, IP.*, '
                                                .$grsel,
                                                'index_words IW,
@@ -1084,15 +1143,14 @@ class tx_indexedsearch extends tslib_pibase {
                                                        index_section ISEC,
                                                        index_phash IP'.
                                                        $page_join,
-                                               $wordSel.'
-                                                       IP.phash IN ('.$list.') '.
+                                                       'IP.phash IN ('.$list.') '.
                                                        $this->mediaTypeWhere().' '.
                                                        $this->languageWhere().
                                                        $freeIndexUidClause.'
                                                        AND IW.wid=IR.wid
                                                        AND ISEC.phash = IR.phash
                                                        AND IP.phash = IR.phash
-                                                       AND     '.$page_where,
+                                                       AND '.$page_where,
                                                'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2 ,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId',
                                                $orderBy
                                        );
@@ -1111,7 +1169,7 @@ class tx_indexedsearch extends tslib_pibase {
                                break;
                        }
 
-                       return $GLOBALS['TYPO3_DB']->exec_SELECTquery(
+                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
                                                'ISEC.*, IP.*',
                                                'index_phash IP,index_section ISEC'.$page_join,
                                                'IP.phash IN ('.$list.') '.
@@ -1124,6 +1182,7 @@ class tx_indexedsearch extends tslib_pibase {
                                                $orderBy
                                        );
                }
+               return $res;
        }
 
        /**
@@ -1148,8 +1207,13 @@ class tx_indexedsearch extends tslib_pibase {
                                // "phash_t3" is the phash of the parent TYPO3 page row which initiated the indexing of the documents in this section.
                                // So, selecting for the grlist records belonging to the parent phash-row where the current users gr_list exists will help us to know.
                                // If this is NOT found, there is still a theoretical possibility that another user accessible page would display a link, so maybe the resume of such a document here may be unjustified hidden. But better safe than sorry.
-                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash_t3']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist'));
-                       if ($GLOBALS['TYPO3_DB']->sql_num_rows($res))   {
+                       if (tx_indexedsearch_util::isTableUsed('index_grlist')) {
+                               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash_t3']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist'));
+                       } else {
+                               $res = FALSE;
+                       }
+
+                       if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
                                #debug("Look up for external media '".$row['data_filename']."': phash:".$row['phash_t3'].' YES - ('.$GLOBALS['TSFE']->gr_list.")!");
                                return TRUE;
                        } else {
@@ -1159,8 +1223,13 @@ class tx_indexedsearch extends tslib_pibase {
                } else {        // Ordinary TYPO3 pages:
                        if (strcmp($row['gr_list'],$GLOBALS['TSFE']->gr_list))  {
                                        // Selecting for the grlist records belonging to the phash-row where the current users gr_list exists. If it is found it is proof that this user has direct access to the phash-rows content although he did not himself initiate the indexing...
-                               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist'));
-                               if ($GLOBALS['TYPO3_DB']->sql_num_rows($res))   {
+                               if (tx_indexedsearch_util::isTableUsed('index_grlist')) {
+                                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist'));
+                               } else {
+                                       $res = FALSE;
+                               }
+
+                               if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
                                        #debug('Checking on it ...'.$row['item_title'].'/'.$row['phash'].' - YES ('.$GLOBALS['TSFE']->gr_list.")");
                                        return TRUE;
                                } else {
@@ -1175,6 +1244,25 @@ class tx_indexedsearch extends tslib_pibase {
        }
 
        /**
+        * Check if the record is still available or if it has been deleted meanwhile.
+        * Currently this works for files only, since extending it to page content would cause a lot of overhead.
+        *
+        * @param       array           Result row array
+        * @return      boolean         Returns TRUE if record is still available
+        */
+       function checkExistance($row) {
+               $recordExists = TRUE;   // Always expect that page content exists
+
+               if ($row['item_type']) {        // External media:
+                       if (!is_file($row['data_filename']) || !file_exists($row['data_filename'])) {
+                               $recordExists = FALSE;
+                       }
+               }
+
+               return $recordExists;
+       }
+
+       /**
         * Returns "DESC" or "" depending on the settings of the incoming highest/lowest result order (piVars['desc']
         *
         * @param       boolean         If TRUE, inverse the order which is defined by piVars['desc']
@@ -1222,17 +1310,18 @@ class tx_indexedsearch extends tslib_pibase {
                }
        }
 
-
-
-
-
-
-
-
-
-
-
-
+       /**
+        * This function is deprecated.
+        *
+        * @param string $stringToHash
+        * @return int Integer intepretation of the md5 hash of input string.
+        * @deprecated
+        * @see tx_indexedsearch_util::md5inthash
+        */
+       function md5inthash($stringToHash) {
+               t3lib_div::logDeprecatedFunction();
+               return tx_indexedsearch_util::md5inthash($stringToHash);
+       }
 
        /***********************************
         *
@@ -1258,20 +1347,7 @@ class tx_indexedsearch extends tslib_pibase {
                $markerArray['###FORM_SUBMIT###'] = $this->pi_getLL('submit_button_label','',1);
 
                        // Adding search field value
-               if (isset($this->piVars['sword']) && $this->piVars['sword'] != '') {
-                       $markerArray['###SWORD_VALUE###'] = htmlspecialchars($this->piVars['sword']);
-                       $markerArray['###PLACEHOLDER###'] = '';
-               } else {
-                               // Add a HTML5 placeholder attribute if the configured doctype allows it
-                       if ($GLOBALS['TSFE']->config['config']['doctype'] === 'html5') {
-                               $markerArray['###SWORD_VALUE###'] = '';
-                               $markerArray['###PLACEHOLDER###'] = 'placeholder="' . $this->pi_getLL('default_search_word_entry') . '"';
-                       } else {
-                               $markerArray['###SWORD_VALUE###'] = $this->pi_getLL('default_search_word_entry');
-                               $markerArray['###PLACEHOLDER###'] = '';
-                       }
-               }
-
+               $markerArray['###SWORD_VALUE###'] = htmlspecialchars($this->piVars['sword']);
 
                        // Additonal keyword => "Add to current search words"
                if ($this->conf['show.']['clearSearchBox'] && $this->conf['show.']['clearSearchBox.']['enableSubSearchCheckBox'])       {
@@ -1905,13 +1981,20 @@ class tx_indexedsearch extends tslib_pibase {
                if ($row['show_resume'])        {
                        if (!$noMarkup) {
                                $markedSW = '';
-                               $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_fulltext', 'phash='.intval($row['phash']));
-                               if ($ftdrow = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))      {
-                                               // Cut HTTP references after some length
-                                       $content = preg_replace('/(http:\/\/[^ ]{60})([^ ]+)/i', '$1...', $ftdrow['fulltextdata']);
-                                       $markedSW = $this->markupSWpartsOfString($content);
+                               if (tx_indexedsearch_util::isTableUsed('index_fulltext')) {
+                                       $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_fulltext', 'phash=' . intval($row['phash']));
+                               } else {
+                                       $res = FALSE;
+                               }
+
+                               if ($res) {
+                                       if ($ftdrow = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
+                                                       // Cut HTTP references after some length
+                                               $content = preg_replace('/(http:\/\/[^ ]{60})([^ ]+)/i', '$1...', $ftdrow['fulltextdata']);
+                                               $markedSW = $this->markupSWpartsOfString($content);
+                                       }
+                                       $GLOBALS['TYPO3_DB']->sql_free_result($res);
                                }
-                               $GLOBALS['TYPO3_DB']->sql_free_result($res);
                        }
 
                        if (!trim($markedSW))   {
index e389c44..baf65a7 100755 (executable)
@@ -1,3 +1,6 @@
+- Search is always case insensitive. If you need a case sensitive search, use a binary collation for the index_fulltext and index_words tables.
+
+
 MAILS about:
 
 
@@ -316,4 +319,35 @@ Piotr
 *****************************************************************************************************************
 *****************************************************************************************************************
 
+OK there were some fancy calculations promoted by Graeme Merrall:
+
+"However, regarding relevance you probably want to look at something like
+Salton's formula which is a good easy way to measure relevance.
+Oracle Intermedia uses this and it's pretty simple:
+Score can be between 0 and 100, but the top-scoring document in the query
+will not necessarily have a score of 100 -- scoring is relative, not
+absolute. This means that scores are not comparable across indexes, or even
+across different queries on the same index. Score for each document is
+computed using the standard Salton formula:
+
+  3f(1+log(N/n))
+
+Where f is the frequency of the search term in the document, N is the total
+number of rows in the table, and n is the number of rows which contain the
+search term. This is converted into an integer in the range 0 - 100.
+
+There's a good doc on it at
+http://ls6-www.informatik.uni-dortmund.de/bib/fulltext/ir/Pfeifer:97/
+although it may be a little complex for what you require so just pick the
+relevant parts out.
+"
 
+However I chose not to go with this for several reasons.
+I do not claim that my ways of calculating importance here is the best.
+ANY (better) suggestion for ranking calculation is accepted! (as long as they are shipped with tested code in exchange for this.)
+
+
+
+
+*****************************************************************************************************************
+*****************************************************************************************************************
\ No newline at end of file
diff --git a/typo3/sysext/indexed_search_mysql/ChangeLog b/typo3/sysext/indexed_search_mysql/ChangeLog
new file mode 100644 (file)
index 0000000..728f393
--- /dev/null
@@ -0,0 +1,3 @@
+2008-03-18  Michael Stucki  <michael@typo3.org>
+
+       * Initial version
diff --git a/typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php b/typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php
new file mode 100644 (file)
index 0000000..fa3c6e3
--- /dev/null
@@ -0,0 +1,219 @@
+<?php
+/***************************************************************
+*  Copyright notice
+*
+*  (c) 2011 Michael Stucki (michael@typo3.org)
+*  All rights reserved
+*
+*  This script is part of the TYPO3 project. The TYPO3 project is
+*  free software; you can redistribute it and/or modify
+*  it under the terms of the GNU General Public License as published by
+*  the Free Software Foundation; either version 2 of the License, or
+*  (at your option) any later version.
+*
+*  The GNU General Public License can be found at
+*  http://www.gnu.org/copyleft/gpl.html.
+*  A copy is found in the textfile GPL.txt and important notices to the license
+*  from the author is found in LICENSE.txt distributed with these scripts.
+*
+*
+*  This script is distributed in the hope that it will be useful,
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*  GNU General Public License for more details.
+*
+*  This copyright notice MUST APPEAR in all copies of the script!
+***************************************************************/
+
+/**
+ * Class that hooks into Indexed Search and replaces standard SQL queries with MySQL fulltext index queries.
+ *
+ * @author     Michael Stucki <michael@typo3.org>
+ * @package TYPO3
+ * @subpackage tx_indexedsearch_mysql
+ */
+class tx_indexedsearch_mysql {
+       /** @var tx_indexedsearch */
+       public $pObj;
+
+       const ANY_PART_OF_THE_WORD = '1';
+       const LAST_PART_OF_THE_WORD = '2';
+       const FIRST_PART_OF_THE_WORD = '3';
+       const SOUNDS_LIKE = '10';
+       const SENTENCE = '20';
+
+       /**
+        * Gets a SQL result pointer to traverse for the search records.
+        *
+        * @param array $searchWordsArray Search words
+        * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
+        * @return resource|false
+        */
+       public function getResultRows_SQLpointer($searchWordsArray, $freeIndexUid = -1) {
+                       // Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not
+               $searchData = $this->getSearchString($searchWordsArray);
+
+                       // Perform SQL Search / collection of result rows array:
+               $resource = FALSE;
+               if ($searchData) {
+                               // Do the search:
+                       $GLOBALS['TT']->push('execFinalQuery');
+                       $resource = $this->execFinalQuery_fulltext($searchData ,$freeIndexUid);
+                       $GLOBALS['TT']->pull();
+               }
+               return $resource;
+       }
+
+       /**
+        * Returns a search string for use with MySQL FULLTEXT query
+        *
+        * @param array $searchWordArray Search word array
+        * @return string Search string
+        */
+       public function getSearchString($searchWordArray) {
+
+                       // Initialize variables:
+               $count = 0;
+
+               $searchBoolean = FALSE; // Change this to TRUE to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty)
+               $fulltextIndex = 'index_fulltext.fulltextdata';
+
+               $naturalSearchString = '';      // This holds the result if the search is natural (doesn't contain any boolean operators)
+               $booleanSearchString = '';      // This holds the result if the search is boolen (contains +/-/| operators)
+
+               $searchType = (string)$this->pObj->piVars['type'];
+
+                       // Traverse searchwords and prefix them with corresponding operator
+               foreach ($searchWordArray as $searchWordData) {
+                               // Making the query for a single search word based on the search-type
+                       $searchWord = $searchWordData['sword']; // $GLOBALS['TSFE']->csConvObj->conv_case('utf-8', $v['sword'], 'toLower');     // lower-case all of them...
+                       $wildcard = '';
+
+                       if (strstr($searchWord, ' ')) {
+                               $searchType = self::SENTENCE;   // If there are spaces in the search-word, make a full text search instead.
+                       }
+
+                       switch ($searchType) {
+                               case self::ANY_PART_OF_THE_WORD:
+                               case self::LAST_PART_OF_THE_WORD:
+                                       // Both options above are both not possible with fulltext indexing! Therefore, fallback to first-part-of-word search
+                               case self::FIRST_PART_OF_THE_WORD:
+                                               // First part of word
+                                       $wildcard = '*';
+                                               // Part-of-word search requires boolean mode!
+                                       $searchBoolean = TRUE;
+                               break;
+                               case self::SOUNDS_LIKE:
+                                       $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');      // Initialize the indexer-class
+                                               /** @var tx_indexedsearch_indexer $indexerObj */
+                                       $searchWord = $indexerObj->metaphone($searchWord, $indexerObj->storeMetaphoneInfoAsWords);
+                                       unset($indexerObj);
+                                       $fulltextIndex = 'index_fulltext.metaphonedata';
+                               break;
+                               case self::SENTENCE:
+                                       $searchBoolean = TRUE;
+                                               // Remove existing quotes and fix misplaced quotes.
+                                       $searchWord = trim(str_replace('"', ' ', $searchWord));
+                               break;
+                       }
+
+                               // Perform search for word:
+                       switch ($searchWordData['oper']) {
+                               case 'AND NOT':
+                                       $booleanSearchString .= ' -' . $searchWord . $wildcard;
+                                       $searchBoolean = TRUE;
+                               break;
+                               case 'OR':
+                                       $booleanSearchString .= ' ' . $searchWord . $wildcard;
+                                       $searchBoolean = TRUE;
+                               break;
+                               default:
+                                       $booleanSearchString .= ' +' . $searchWord . $wildcard;
+                                       $naturalSearchString .= ' ' . $searchWord;
+                       }
+
+                       $count++;
+               }
+
+               if ($searchType == self::SENTENCE) {
+                       $searchString = '"' . trim($naturalSearchString) . '"';
+               } elseif ($searchBoolean) {
+                       $searchString = trim($booleanSearchString);
+               } else {
+                       $searchString = trim($naturalSearchString);
+               }
+
+               return array(
+                       'searchBoolean' => $searchBoolean,
+                       'searchString' => $searchString,
+                       'fulltextIndex' => $fulltextIndex
+               );
+       }
+
+       /**
+        * Execute final query, based on phash integer list. The main point is sorting the result in the right order.
+        *
+        * @param array $searchData Array with search string, boolean indicator, and fulltext index reference
+        * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
+        * @return resource Query result
+        */
+       protected function execFinalQuery_fulltext($searchData, $freeIndexUid = -1) {
+
+                       // Setting up methods of filtering results based on page types, access, etc.
+               $pageJoin = '';
+
+                       // Indexing configuration clause:
+               $freeIndexUidClause = $this->pObj->freeIndexUidWhere($freeIndexUid);
+
+                       // Calling hook for alternative creation of page ID list
+               if (($hookObj = &$this->pObj->hookRequest('execFinalQuery_idList'))) {
+                       $pageWhere = $hookObj->execFinalQuery_idList('');       // Originally this hook expects a list of page IDs, so since we don't know them yet, just send an empty string. Users of this hook need to adjust their hook to this!
+               } elseif ($this->pObj->join_pages) {    // Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected.
+                       $pageJoin = ',
+                               pages';
+                       $pageWhere = 'pages.uid = ISEC.page_id
+                               '.$this->pObj->cObj->enableFields('pages').'
+                               AND pages.no_search=0
+                               AND pages.doktype<200
+                       ';
+               } elseif ($this->pObj->wholeSiteIdList >= 0) {  // Collecting all pages IDs in which to search; filtering out ALL pages that are not accessible due to enableFields. Does NOT look for "no_search" field!
+                       $siteIdNumbers = t3lib_div::intExplode(',', $this->pObj->wholeSiteIdList);
+                       $idList = array();
+                       foreach ($siteIdNumbers as $rootId) {
+                               $cObj = t3lib_div::makeInstance('tslib_cObj');
+                               /** @var tslib_cObj $cObj */
+                               $idList[] = $cObj->getTreeList($rootId, 9999, 0, 0, '', '') . $rootId;
+                       }
+                       $pageWhere = ' ISEC.page_id IN (' . implode(',', $idList) . ')';
+               } else {
+                               // Disable everything... (select all)
+                       $pageWhere = ' 1=1';
+               }
+
+               $searchBoolean = '';
+               if ($searchData['searchBoolean']) {
+                       $searchBoolean = ' IN BOOLEAN MODE';
+               }
+
+               $resource = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
+                               'index_fulltext.*, ISEC.*, IP.*',
+                               'index_fulltext, index_section ISEC, index_phash IP' . $pageJoin,
+                               'MATCH ('.$searchData['fulltextIndex'].') AGAINST (' . $GLOBALS['TYPO3_DB']->fullQuoteStr($searchData['searchString'],'index_fulltext') . $searchBoolean . ') '.
+                                       $this->pObj->mediaTypeWhere() . ' ' .
+                                       $this->pObj->languageWhere() .
+                                       $freeIndexUidClause . '
+                                       AND index_fulltext.phash = IP.phash
+                                       AND ISEC.phash = IP.phash
+                                       AND ' . $pageWhere,
+                               'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId'
+                       );
+
+               return $resource;
+       }
+}
+
+if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/indexed_search_mysql/class.tx_indexedsearch_mysql.php'])) {
+       include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/indexed_search/indexed_search_mysql/class.tx_indexedsearch_mysql.php']);
+}
+
+?>
\ No newline at end of file
diff --git a/typo3/sysext/indexed_search_mysql/ext_emconf.php b/typo3/sysext/indexed_search_mysql/ext_emconf.php
new file mode 100644 (file)
index 0000000..cf33839
--- /dev/null
@@ -0,0 +1,52 @@
+<?php
+
+########################################################################
+# Extension Manager/Repository config file for ext: "indexed_search_mysql"
+#
+# Auto generated 18-03-2008 20:13
+#
+# Manual updates:
+# Only the data in the array - anything else is removed by next write.
+# "version" and "dependencies" must not be touched!
+########################################################################
+
+$EM_CONF[$_EXTKEY] = array(
+       'title' => 'MySQL driver for Indexed Search Engine',
+       'description' => 'MySQL specific driver for Indexed Search Engine. Allows usage of MySQL-only features like FULLTEXT indexes.',
+       'category' => 'misc',
+       'shy' => 0,
+       'dependencies' => 'cms,indexed_search',
+       'conflicts' => '',
+       'priority' => '',
+       'loadOrder' => '',
+       'module' => '',
+       'state' => 'beta',
+       'internal' => 1,
+       'uploadfolder' => 0,
+       'createDirs' => '',
+       'modify_tables' => '',
+       'clearCacheOnLoad' => 0,
+       'lockType' => '',
+       'author' => 'Michael Stucki',
+       'author_email' => 'michael@typo3.org',
+       'author_company' => '',
+       'CGLcompliance' => '',
+       'CGLcompliance_note' => '',
+       'version' => '2.13.0',
+       '_md5_values_when_last_written' => '',
+       'constraints' => array(
+               'depends' => array(
+                       'php' => '5.2.6-',
+                       'typo3' => '4.6.0-',
+                       'indexed_search' => '2.13.0-'
+               ),
+               'conflicts' => array(
+               ),
+               'suggests' => array(
+               ),
+       ),
+       'suggests' => array(
+       ),
+);
+
+?>
\ No newline at end of file
diff --git a/typo3/sysext/indexed_search_mysql/ext_icon.gif b/typo3/sysext/indexed_search_mysql/ext_icon.gif
new file mode 100755 (executable)
index 0000000..8f723c4
Binary files /dev/null and b/typo3/sysext/indexed_search_mysql/ext_icon.gif differ
diff --git a/typo3/sysext/indexed_search_mysql/ext_localconf.php b/typo3/sysext/indexed_search_mysql/ext_localconf.php
new file mode 100644 (file)
index 0000000..0f067a0
--- /dev/null
@@ -0,0 +1,10 @@
+<?php
+if (!defined ('TYPO3_MODE'))   die ('Access denied.');
+
+       // Configure hook to query the fulltext index
+$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['pi1_hooks']['getResultRows_SQLpointer'] = 'EXT:indexed_search_mysql/class.tx_indexedsearch_mysql.php:&tx_indexedsearch_mysql';
+
+       // Use all index_* tables except "index_rel" and "index_words"
+$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config';
+
+?>
\ No newline at end of file
diff --git a/typo3/sysext/indexed_search_mysql/ext_tables.sql b/typo3/sysext/indexed_search_mysql/ext_tables.sql
new file mode 100644 (file)
index 0000000..7cbcab2
--- /dev/null
@@ -0,0 +1,16 @@
+#
+# Table structure for table 'index_fulltext'
+#
+# Differences compared to original definition in EXT:indexed_search are as follows:
+# - Add new mediumtext field "metaphonedata"
+# - Add new FULLTEXT index "fulltextdata"
+# - Add new FULLTEXT index "metaphonedata"
+# - Change table engine from InnoDB to MyISAM (required for FULLTEXT indexing)
+CREATE TABLE index_fulltext (
+  phash int(11) DEFAULT '0' NOT NULL,
+  fulltextdata mediumtext,
+  metaphonedata mediumtext,
+  PRIMARY KEY (phash)
+  FULLTEXT fulltextdata (fulltextdata)
+  FULLTEXT metaphonedata (metaphonedata)
+) ENGINE=MyISAM;