[TASK] Performance improvement on indexing process 75/20275/8
authorRalf Hettinger <ng@ralfhettinger.de>
Sun, 28 Apr 2013 19:48:54 +0000 (21:48 +0200)
committerWouter Wolters <typo3@wouterwolters.nl>
Fri, 3 Oct 2014 09:46:20 +0000 (11:46 +0200)
Use exec_INSERTmultipleRows instead of exec_INSERTquery for
submitting indexer results to table index_rel. This may save _a lot_
of I/O load, especially when a page contains many words to be indexed.

Skip words marked as stopWords to prevent flooding of index_rel table.

Resolves: #38742
Resolves: #18599
Releases: master
Change-Id: Ia729e6632124692cabe7d5ebe079db7d6795b809
Reviewed-on: http://review.typo3.org/20275
Reviewed-by: Markus Klein <klein.t3@reelworx.at>
Tested-by: Markus Klein <klein.t3@reelworx.at>
Reviewed-by: Wouter Wolters <typo3@wouterwolters.nl>
Tested-by: Wouter Wolters <typo3@wouterwolters.nl>
typo3/sysext/indexed_search/Classes/Indexer.php

index c964a51..4cd8fb6 100644 (file)
@@ -1993,18 +1993,25 @@ class Indexer {
         */
        public function submitWords($wordList, $phash) {
                if (\TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed('index_rel')) {
+                       $stopWords = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('wid', 'index_words', 'is_stopword != 0', '', '', '', 'wid');
+
                        $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash=' . (int)$phash);
+                       $fields = array('phash', 'wid', 'count', 'first', 'freq', 'flags');
+                       $rows = array();
                        foreach ($wordList as $val) {
-                               $insertFields = array(
-                                       'phash' => (int)$phash,
-                                       'wid' => (int)$val['hash'],
-                                       'count' => (int)$val['count'],
-                                       'first' => (int)$val['first'],
-                                       'freq' => $this->freqMap($val['count'] / $this->wordcount),
-                                       'flags' => $val['cmp'] & $this->flagBitMask
+                               if (isset($stopWords[$val['hash']])) {
+                                       continue;
+                               }
+                               $rows[] = array(
+                                       (int)$phash,
+                                       (int)$val['hash'],
+                                       (int)$val['count'],
+                                       (int)$val['first'],
+                                       $this->freqMap($val['count'] / $this->wordcount),
+                                       $val['cmp'] & $this->flagBitMask
                                );
-                               $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
                        }
+                       $GLOBALS['TYPO3_DB']->exec_INSERTmultipleRows('index_rel', $fields, $rows);
                }
        }