Commit d7745053 authored by Benni Mack's avatar Benni Mack Committed by Georg Ringer
Browse files

[!!!][TASK] Remove personalized search statistics from indexed search

Indexed search captures search statistics and which words
were searched. However, TYPO3 only evaluates the searched
words, which do not contain any user/IP specific values.

This change removes the personalized tracking information,
as only the word-based statistics are evaluated.

Also the database table "index_stat_search" gets removed.

Resolves: #92993
Releases: master
Change-Id: I7ab5f9d4a23a79b68274db1a709551604f5479fb
Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/66996


Tested-by: Anja Leichsenring's avatarAnja Leichsenring <aleichsenring@ab-softlab.de>
Tested-by: default avatarTYPO3com <noreply@typo3.com>
Tested-by: Georg Ringer's avatarGeorg Ringer <georg.ringer@gmail.com>
Reviewed-by: Anja Leichsenring's avatarAnja Leichsenring <aleichsenring@ab-softlab.de>
Reviewed-by: Georg Ringer's avatarGeorg Ringer <georg.ringer@gmail.com>
parent 220d8802
.. include:: ../../Includes.txt
========================================================================
Breaking: #92993 - Generic search statistics from indexed search removed
========================================================================
See :issue:`92993`
Description
===========
When using TYPO3 Core's built-in Frontend Search ("Indexed Search"), search
statistics were written which were never evaluated, but might contain
user-specific information about logged-in users and their previously used sessions,
which might be conflicting with privacy policies.
The IP Address could be masked via Indexed Search Extension Setting
`trackIpInStatistic` which is now removed, along the database table
`index_search_stat`.
However, TYPO3 also stores statistics on the searched word, which is evaluated
in the TYPO3 Backend, and kept.
Impact
======
Searching within Indexed Search will only track the searched words, but not
additional meta data anymore.
The database table `index_search_stat` is not available anymore, along with the
Extension setting to disable IP address tracking, as nothing is tracked anymore.
Affected Installations
======================
TYPO3 installations using Indexed Search and accessing this information.
Migration
=========
It is recommended to use a more generic and sophisticated analytics tool like
Matomo or Google Analytics to track searched terms.
.. index:: Database, NotScanned, ext:indexed_search
......@@ -29,7 +29,6 @@ use TYPO3\CMS\Core\Site\SiteFinder;
use TYPO3\CMS\Core\Type\File\ImageInfo;
use TYPO3\CMS\Core\TypoScript\TypoScriptService;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Core\Utility\IpAnonymizationUtility;
use TYPO3\CMS\Core\Utility\MathUtility;
use TYPO3\CMS\Core\Utility\PathUtility;
use TYPO3\CMS\Core\Utility\RootlineUtility;
......@@ -283,20 +282,17 @@ class SearchController extends ActionController
$resultsets = [];
foreach ($indexCfgs as $freeIndexUid) {
// Get result rows
$tstamp1 = IndexedSearchUtility::milliseconds();
if ($hookObj = $this->hookRequest('getResultRows')) {
$resultData = $hookObj->getResultRows($this->searchWords, $freeIndexUid);
} else {
$resultData = $this->searchRepository->doSearch($this->searchWords, $freeIndexUid);
}
// Display search results
$tstamp2 = IndexedSearchUtility::milliseconds();
if ($hookObj = $this->hookRequest('getDisplayResults')) {
$resultsets[$freeIndexUid] = $hookObj->getDisplayResults($this->searchWords, $resultData, $freeIndexUid);
} else {
$resultsets[$freeIndexUid] = $this->getDisplayResults($this->searchWords, $resultData, $freeIndexUid);
}
$tstamp3 = IndexedSearchUtility::milliseconds();
// Create header if we are searching more than one indexing configuration
if (count($indexCfgs) > 1) {
if ($freeIndexUid > 0) {
......@@ -322,7 +318,7 @@ class SearchController extends ActionController
$resultsets[$freeIndexUid]['categoryTitle'] = $categoryTitle;
}
// Write search statistics
$this->writeSearchStat($searchData, $this->searchWords, $resultData['count'], [$tstamp1, $tstamp2, $tstamp3]);
$this->writeSearchStat($this->searchWords ?: []);
}
$this->view->assign('resultsets', $resultsets);
$this->view->assign('searchParams', $searchData);
......@@ -854,58 +850,25 @@ class SearchController extends ActionController
/**
* Write statistics information to database for the search operation if there was at least one search word.
*
* @param array $searchParams search params
* @param array $searchWords Search Word array
* @param int $count Number of hits
* @param array $pt Milliseconds the search took (start time DB query + end time DB query + end time to compile results)
*/
protected function writeSearchStat($searchParams, $searchWords, $count, $pt)
protected function writeSearchStat(array $searchWords): void
{
$searchWord = $this->getSword();
if (empty($searchWord) && empty($searchWords)) {
if (empty($this->getSword()) && empty($searchWords)) {
return;
}
$ipAddress = '';
try {
$ipMask = isset($this->indexerConfig['trackIpInStatistic']) ? (int)$this->indexerConfig['trackIpInStatistic'] : 2;
$ipAddress = IpAnonymizationUtility::anonymizeIp(GeneralUtility::getIndpEnv('REMOTE_ADDR'), $ipMask);
} catch (\Exception $e) {
}
$insertFields = [
'searchstring' => $searchWord,
'searchoptions' => serialize([$searchParams, $searchWords, $pt]),
'feuser_id' => (int)$GLOBALS['TSFE']->fe_user->user['uid'],
// cookie as set or retrieved. If people has cookies disabled this will vary all the time
'cookie' => $GLOBALS['TSFE']->fe_user->id,
// Remote IP address
'IP' => $ipAddress,
// Number of hits on the search
'hits' => (int)$count,
// Time stamp
'tstamp' => $GLOBALS['EXEC_TIME']
];
$connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_search_stat');
$connection->insert(
'index_stat_search',
$insertFields,
['searchoptions' => Connection::PARAM_LOB]
);
$newId = $connection->lastInsertId('index_stat_search');
if ($newId) {
$connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_stat_word');
foreach ($searchWords as $val) {
$insertFields = [
'word' => $val['sword'],
'index_stat_search_id' => $newId,
// Time stamp
'tstamp' => $GLOBALS['EXEC_TIME'],
// search page id for indexed search stats
'pageid' => $GLOBALS['TSFE']->id
];
$connection->insert('index_stat_word', $insertFields);
}
$entries = [];
foreach ($searchWords as $val) {
$entries[] = [
'word' => $val['sword'],
// Time stamp
'tstamp' => $GLOBALS['EXEC_TIME'],
// search page id for indexed search stats
'pageid' => $GLOBALS['TSFE']->id
];
}
$connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_stat_word');
$connection->bulkInsert('index_stat_word', $entries);
}
/**
......
......@@ -60,9 +60,6 @@
<trans-unit id="indexedsearch.config.useMysqlFulltext" resname="indexedsearch.config.useMysqlFulltext">
<source>Use MySQL specific fulltext search - Update database schema in install tool after toggling this flag</source>
</trans-unit>
<trans-unit id="indexedsearch.config.trackIpInStatistic" resname="indexedsearch.config.trackIpInStatistic">
<source>Define the privacy level of the logged IP: 0 for no mask which means no privacy, 1 for masking the host and 2 for masking host and subnet (full privacy)</source>
</trans-unit>
</body>
</file>
</xliff>
......@@ -19,9 +19,6 @@ ppthtml = /usr/bin/
# cat=basic; type=string; label=LLL:EXT:indexed_search/Resources/Private/Language/locallang_em.xlf:indexedsearch.config.unrtf
unrtf = /usr/bin/
# cat=basic; type=int; label=LLL:EXT:indexed_search/Resources/Private/Language/locallang_em.xlf:indexedsearch.config.trackIpInStatistic
trackIpInStatistic = 2
# cat=basic; type=boolean; label=LLL:EXT:indexed_search/Resources/Private/Language/locallang_em.xlf:indexedsearch.config.debugMode
debugMode = 0
......
......@@ -60,10 +60,10 @@ $extConf = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(
if (isset($extConf['useMysqlFulltext']) && (bool)$extConf['useMysqlFulltext']) {
// Use all index_* tables except "index_rel" and "index_words"
$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['use_tables'] =
'index_phash,index_fulltext,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config';
'index_phash,index_fulltext,index_section,index_grlist,index_stat_word,index_debug,index_config';
} else {
$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['use_tables'] =
'index_phash,index_fulltext,index_rel,index_words,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config';
'index_phash,index_fulltext,index_rel,index_words,index_section,index_grlist,index_stat_word,index_debug,index_config';
}
// Add search to new content element wizard
......@@ -87,17 +87,3 @@ if (isset($extConf['enableMetaphoneSearch']) && (int)$extConf['enableMetaphoneSe
$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['metaphone'] = \TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility::class;
}
unset($extConf);
if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['scheduler']['tasks'][\TYPO3\CMS\Scheduler\Task\TableGarbageCollectionTask::class]['options']['tables'])) {
$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['scheduler']['tasks'][\TYPO3\CMS\Scheduler\Task\TableGarbageCollectionTask::class]['options']['tables']['index_stat_search'] = [
'dateField' => 'tstamp',
'expirePeriod' => 90
];
}
if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['scheduler']['tasks'][\TYPO3\CMS\Scheduler\Task\IpAnonymizationTask::class]['options']['tables'])) {
$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['scheduler']['tasks'][\TYPO3\CMS\Scheduler\Task\IpAnonymizationTask::class]['options']['tables']['index_stat_search'] = [
'dateField' => 'tstamp',
'ipField' => 'IP'
];
}
......@@ -100,21 +100,6 @@ CREATE TABLE index_grlist (
KEY phash_grouping (phash_x,hash_gr_list)
) ENGINE=InnoDB;
#
# Table structure for table 'index_stat_search'
#
CREATE TABLE index_stat_search (
uid int(11) NOT NULL auto_increment,
searchstring varchar(255) DEFAULT '' NOT NULL,
searchoptions blob,
tstamp int(11) DEFAULT '0' NOT NULL,
feuser_id int(11) unsigned DEFAULT '0' NOT NULL,
cookie varchar(32) DEFAULT '' NOT NULL,
IP varchar(255) DEFAULT '' NOT NULL,
hits int(11) DEFAULT '0' NOT NULL,
PRIMARY KEY (uid)
) ENGINE=InnoDB;
#
# Table structure for table 'index_debug'
#
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment