Commit 8d709070 authored by Stefan Bürk's avatar Stefan Bürk Committed by Benni Mack
Browse files

[BUGFIX] Use clustered cache entries per domain for redirects

'ext:redirect' now clusters the redirect cache into dedicated
parts per domain, which reduces the data amount needed to load
on redirect handling in the frontend to the absolute minimum.

On the other hand, when rebuilding a cache this is narrowed down
to the related domain, thus avoiding retrieving and caching all
redirects, which may reduce the processing time in the backend
on changing redirects or auto redirects creation on page updates.

This works around some flaws for some instances and use cases,
but do not harm on smaller instances with lower redirects count.

* read and write redirect cache by domain based identifier
* removed one array level from cached information and reduced
  one array key level in redirect matching code
* cache identifier for domain entries are build using 'sha1()'
* combined with early redirects introduced with #96480 this
  reduces the loaded data even more, as it is done in chunks
* adjusted unit tests to reflect two redirect cache service
  calls, one for the domain and one for wildcard "*".
* rebuilding cache through datahandler hook now rebuilds
  only for the related 'source_host', thus avoiding rebuild
  of all redirects for every change. This may still be further
  improved for multi record operations with the same domain.
  A 'rebuildAll()' fallback stayed as a last defense to cover
  cases where specific source_host cannot be determined.

Resolves: #96617
Releases: main, 11.5
Change-Id: Id335017cf890dca7c57e892a2561c3555348a668
Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/73118

Tested-by: Oliver Bartsch's avatarOliver Bartsch <bo@cedev.de>
Tested-by: core-ci's avatarcore-ci <typo3@b13.com>
Tested-by: Benni Mack's avatarBenni Mack <benni@typo3.org>
Reviewed-by: Oliver Bartsch's avatarOliver Bartsch <bo@cedev.de>
Reviewed-by: Stefan Bürk's avatarStefan Bürk <stefan@buerk.tech>
Reviewed-by: Benni Mack's avatarBenni Mack <benni@typo3.org>
parent 49628eda
......@@ -17,6 +17,8 @@ declare(strict_types=1);
namespace TYPO3\CMS\Redirects\Hooks;
use TYPO3\CMS\Core\Database\Connection;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\DataHandling\DataHandler;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Redirects\Service\RedirectCacheService;
......@@ -30,13 +32,61 @@ class DataHandlerCacheFlushingHook
/**
* Check if the data handler processed a sys_redirect record, if so, rebuild the redirect index cache
*
* @param array $parameters unused
* @param DataHandler $dataHandler the data handler object
* @todo This hook is called for each record which needs to clear cache, which means this gets called
* for other records than sys_redirects, but also for each sys_redirect record which has been
* modified with this DataHandler call. Even if we can narrow down to rebuild only for specific
* source_hosts, this still means that we eventually rebuild the "same" cache multiple times.
* Find a better way to aggregate them and rebuild only once at the end.
*/
public function rebuildRedirectCacheIfNecessary(array $parameters, DataHandler $dataHandler)
public function rebuildRedirectCacheIfNecessary(array $parameters, DataHandler $dataHandler): void
{
if (isset($dataHandler->datamap['sys_redirect']) || isset($dataHandler->cmdmap['sys_redirect'])) {
GeneralUtility::makeInstance(RedirectCacheService::class)->rebuild();
if (
($parameters['table'] ?? false) !== 'sys_redirect'
|| !($parameters['uid'] ?? false)
|| (
!isset($dataHandler->datamap['sys_redirect'][(int)$parameters['uid']])
&& !isset($dataHandler->cmdmap['sys_redirect'][(int)$parameters['uid']])
)
) {
return;
}
$redirectCacheService = GeneralUtility::makeInstance(RedirectCacheService::class);
$sourceHosts = [];
if (isset($dataHandler->getHistoryRecords()['sys_redirect:' . (int)$parameters['uid']]['oldRecord']['source_host'])) {
$sourceHosts[] = $dataHandler->getHistoryRecords()['sys_redirect:' . (int)$parameters['uid']]['oldRecord']['source_host'];
}
if (isset($dataHandler->getHistoryRecords()['sys_redirect:' . (int)$parameters['uid']]['newRecord']['source_host'])) {
$sourceHosts[] = $dataHandler->getHistoryRecords()['sys_redirect:' . (int)$parameters['uid']]['newRecord']['source_host'];
}
// only do record lookup for delete cmd, otherwise we cannot get old and new source_host,
// thus rebuildAll() should be executed as a safety net anyway.
if ($sourceHosts === [] && isset($dataHandler->cmdmap['sys_redirect'][(int)$parameters['uid']])) {
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_redirect');
$queryBuilder->getRestrictions()->removeAll();
$row = $queryBuilder
->select('source_host')
->from('sys_redirect')
->where(
$queryBuilder->expr()->eq('uid', $queryBuilder->createNamedParameter($parameters['uid'], Connection::PARAM_INT))
)
->executeQuery()
->fetchAssociative();
if (isset($row['source_host'])) {
$sourceHosts[] = $row['source_host'] ?: '*';
}
}
// rebuild only specific source_host redirect caches
if ($sourceHosts !== []) {
foreach (array_unique($sourceHosts) as $sourceHost) {
$redirectCacheService->rebuildForHost($sourceHost);
}
return;
}
// Hopefully we get distinct source_host before. However, rebuild all redirect caches as a safety fallback.
$redirectCacheService->rebuildAll();
}
}
......@@ -45,41 +45,77 @@ class RedirectCacheService
/**
* Fetches all redirects available to the system, grouped by domain and regexp/nonregexp
*/
public function getRedirects(): array
public function getRedirects(string $sourceHost): array
{
$redirects = $this->cache->get('redirects');
$redirects = $this->cache->get($this->buildCacheIdentifier($sourceHost));
// empty array is considered as valid cache, so we need to check for array type here.
if (!is_array($redirects)) {
$redirects = $this->rebuild();
$redirects = $this->rebuildForHost($sourceHost);
}
return $redirects;
}
/**
* Rebuilds the cache for all redirects, grouped by host as well as by regular expressions and respect_query_parameters.
* Does not include deleted redirects, but includes the ones with dynamic starttime/endtime.
* Does not include hidden or deleted redirects, but includes the ones with dynamic starttime/endtime.
*/
public function rebuild(): array
public function rebuildForHost(string $sourceHost): array
{
$redirects = [];
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_redirect');
$queryBuilder->getRestrictions()->removeAll()
->add(GeneralUtility::makeInstance(HiddenRestriction::class))
->add(GeneralUtility::makeInstance(DeletedRestriction::class));
$statement = $queryBuilder
$queryBuilder
->select('*')
->from('sys_redirect')
->executeQuery();
->from('sys_redirect');
if ($sourceHost === '' || $sourceHost === '*') {
$queryBuilder->where(
$queryBuilder->expr()->orX(
$queryBuilder->expr()->eq('source_host', $queryBuilder->createNamedParameter('')),
$queryBuilder->expr()->eq('source_host', $queryBuilder->createNamedParameter('*')),
)
);
} else {
$queryBuilder->where(
$queryBuilder->expr()->in('source_host', $queryBuilder->createNamedParameter($sourceHost))
);
}
$statement = $queryBuilder->executeQuery();
while ($row = $statement->fetchAssociative()) {
$host = $row['source_host'] ?: '*';
if ($row['is_regexp']) {
$redirects[$host]['regexp'][$row['source_path']][$row['uid']] = $row;
$redirects['regexp'][$row['source_path']][$row['uid']] = $row;
} elseif ($row['respect_query_parameters']) {
$redirects[$host]['respect_query_parameters'][$row['source_path']][$row['uid']] = $row;
$redirects['respect_query_parameters'][$row['source_path']][$row['uid']] = $row;
} else {
$redirects[$host]['flat'][rtrim($row['source_path'], '/') . '/'][$row['uid']] = $row;
$redirects['flat'][rtrim($row['source_path'], '/') . '/'][$row['uid']] = $row;
}
}
$this->cache->set('redirects', $redirects);
$this->cache->set($this->buildCacheIdentifier($sourceHost), $redirects);
return $redirects;
}
/**
* Rebuild cache for each distinct redirect source_host.
*/
public function rebuildAll(): void
{
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_redirect');
// remove all restriction, as we need to retrieve the source host even for hidden or deleted redirects.
$queryBuilder->getRestrictions()->removeAll();
$resultSet = $queryBuilder
->select('source_host')
->distinct()
->from('sys_redirect')
->executeQuery();
while ($row = $resultSet->fetchAssociative()) {
$this->rebuildForHost($row['source_host'] ?? '*');
}
}
private function buildCacheIdentifier(string $sourceHost): string
{
return 'redirects_' . sha1($sourceHost);
}
}
......@@ -77,26 +77,26 @@ class RedirectService implements LoggerAwareInterface
*/
public function matchRedirect(string $domain, string $path, string $query = ''): ?array
{
$allRedirects = $this->fetchRedirects();
$path = rawurldecode($path);
// Check if the domain matches, or if there is a
// redirect fitting for any domain
foreach ([$domain, '*'] as $domainName) {
if (empty($allRedirects[$domainName])) {
$redirects = $this->fetchRedirects($domainName);
if (empty($redirects)) {
continue;
}
// check if a flat redirect matches with the Query applied
if (!empty($query)) {
$pathWithQuery = rtrim($path, '/') . '?' . ltrim($query, '?');
if (!empty($allRedirects[$domainName]['respect_query_parameters'][$pathWithQuery])) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($allRedirects[$domainName]['respect_query_parameters'][$pathWithQuery])) {
if (!empty($redirects['respect_query_parameters'][$pathWithQuery])) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($redirects['respect_query_parameters'][$pathWithQuery])) {
return $matchedRedirect;
}
} else {
$pathWithQueryAndSlash = rtrim($path, '/') . '/?' . ltrim($query, '?');
if (!empty($allRedirects[$domainName]['respect_query_parameters'][$pathWithQueryAndSlash])) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($allRedirects[$domainName]['respect_query_parameters'][$pathWithQueryAndSlash])) {
if (!empty($redirects['respect_query_parameters'][$pathWithQueryAndSlash])) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($redirects['respect_query_parameters'][$pathWithQueryAndSlash])) {
return $matchedRedirect;
}
}
......@@ -104,15 +104,15 @@ class RedirectService implements LoggerAwareInterface
}
// check if a flat redirect matches
if (!empty($allRedirects[$domainName]['flat'][rtrim($path, '/') . '/'])) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($allRedirects[$domainName]['flat'][rtrim($path, '/') . '/'])) {
if (!empty($redirects['flat'][rtrim($path, '/') . '/'])) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($redirects['flat'][rtrim($path, '/') . '/'])) {
return $matchedRedirect;
}
}
// check all redirects that are registered as regex
if (!empty($allRedirects[$domainName]['regexp'])) {
$allRegexps = array_keys($allRedirects[$domainName]['regexp']);
if (!empty($redirects['regexp'])) {
$allRegexps = array_keys($redirects['regexp']);
$regExpPath = $path;
if (!empty($query)) {
$regExpPath .= '?' . ltrim($query, '?');
......@@ -120,7 +120,7 @@ class RedirectService implements LoggerAwareInterface
foreach ($allRegexps as $regexp) {
$matchResult = @preg_match((string)$regexp, $regExpPath);
if ($matchResult > 0) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($allRedirects[$domainName]['regexp'][$regexp])) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($redirects['regexp'][$regexp])) {
return $matchedRedirect;
}
continue;
......@@ -139,7 +139,7 @@ class RedirectService implements LoggerAwareInterface
if (!empty($query)) {
$matchResult = preg_match((string)$regexp, $path);
if ($matchResult > 0) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($allRedirects[$domainName]['regexp'][$regexp])) {
if ($matchedRedirect = $this->getFirstActiveRedirectFromPossibleRedirects($redirects['regexp'][$regexp])) {
return $matchedRedirect;
}
continue;
......@@ -165,12 +165,12 @@ class RedirectService implements LoggerAwareInterface
}
/**
* Fetches all redirects from the DB and caches them, grouped by the domain
* does NOT take starttime/endtime into account, as it is cached.
* Fetches all redirects from cache, with fallback to rebuild cache from the DB if caches was empty,
* grouped by the domain does NOT take starttime/endtime into account, as it is cached.
*/
protected function fetchRedirects(): array
protected function fetchRedirects(string $sourceHost): array
{
return $this->redirectCacheService->getRedirects();
return $this->redirectCacheService->getRedirects($sourceHost);
}
/**
......
......@@ -103,12 +103,20 @@ class SlugService implements LoggerAwareInterface
*/
protected $httpStatusCode;
public function __construct(Context $context, SiteFinder $siteFinder, PageRepository $pageRepository, LinkService $linkService)
{
protected RedirectCacheService $redirectCacheService;
public function __construct(
Context $context,
SiteFinder $siteFinder,
PageRepository $pageRepository,
LinkService $linkService,
RedirectCacheService $redirectCacheService
) {
$this->context = $context;
$this->siteFinder = $siteFinder;
$this->pageRepository = $pageRepository;
$this->linkService = $linkService;
$this->redirectCacheService = $redirectCacheService;
}
public function rebuildSlugsForSlugChange(int $pageId, string $currentSlug, string $newSlug, CorrelationId $correlationId): void
......@@ -120,15 +128,19 @@ class SlugService implements LoggerAwareInterface
$defaultPageId = (int)$currentPageRecord['sys_language_uid'] > 0 ? (int)$currentPageRecord['l10n_parent'] : $pageId;
$this->initializeSettings($defaultPageId);
if ($this->autoUpdateSlugs || $this->autoCreateRedirects) {
$createdRedirect = null;
$this->createCorrelationIds($pageId, $correlationId);
if ($this->autoCreateRedirects) {
$this->createRedirect($currentSlug, $defaultPageId, (int)$currentPageRecord['sys_language_uid'], (int)$pageId);
$createdRedirect = $this->createRedirect($currentSlug, $defaultPageId, (int)$currentPageRecord['sys_language_uid'], $pageId);
}
if ($this->autoUpdateSlugs) {
$this->checkSubPages($currentPageRecord, $currentSlug, $newSlug);
}
$this->sendNotification();
GeneralUtility::makeInstance(RedirectCacheService::class)->rebuild();
// rebuild caches only for matched source hosts
if ($createdRedirect) {
$this->redirectCacheService->rebuildForHost($createdRedirect['source_host'] ?: '*');
}
}
}
......@@ -156,7 +168,10 @@ class SlugService implements LoggerAwareInterface
$this->correlationIdSlugUpdate = $correlationId->withAspects(self::CORRELATION_ID_IDENTIFIER, 'slug');
}
protected function createRedirect(string $originalSlug, int $pageId, int $languageId, int $pid): void
/**
* @return array The created redirect record
*/
protected function createRedirect(string $originalSlug, int $pageId, int $languageId, int $pid): array
{
$siteLanguage = $this->site->getLanguageById($languageId);
$basePath = rtrim($siteLanguage->getBase()->getPath(), '/');
......@@ -196,6 +211,7 @@ class SlugService implements LoggerAwareInterface
$id = (int)$connection->lastInsertId('sys_redirect');
$record['uid'] = $id;
$this->getRecordHistoryStore()->addRecord('sys_redirect', $id, $record, $this->correlationIdRedirectCreation);
return $record;
}
protected function checkSubPages(array $currentPageRecord, string $oldSlugOfParentPage, string $newSlugOfParentPage): void
......
......@@ -28,6 +28,7 @@ use TYPO3\CMS\Core\Routing\SiteMatcher;
use TYPO3\CMS\Core\Site\SiteFinder;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Core\Utility\StringUtility;
use TYPO3\CMS\Redirects\Service\RedirectCacheService;
use TYPO3\CMS\Redirects\Service\SlugService;
use TYPO3\TestingFramework\Core\Functional\FunctionalTestCase;
......@@ -439,7 +440,8 @@ class SlugServiceTest extends FunctionalTestCase
GeneralUtility::makeInstance(Context::class),
GeneralUtility::makeInstance(SiteFinder::class),
GeneralUtility::makeInstance(PageRepository::class),
GeneralUtility::makeInstance(LinkService::class)
GeneralUtility::makeInstance(LinkService::class),
GeneralUtility::makeInstance(RedirectCacheService::class)
);
$this->subject->setLogger(new NullLogger());
}
......
......@@ -78,7 +78,8 @@ class RedirectServiceTest extends UnitTestCase
*/
public function matchRedirectReturnsNullIfNoRedirectsExist(): void
{
$this->redirectCacheServiceProphecy->getRedirects()->willReturn([]);
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn([]);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', 'foo');
......@@ -101,17 +102,16 @@ class RedirectServiceTest extends UnitTestCase
'starttime' => '0',
'endtime' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'flat' => [
$path . '/' => [
1 => $row,
],
'flat' => [
$path . '/' => [
1 => $row,
],
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', rawurlencode($path));
......@@ -160,17 +160,16 @@ class RedirectServiceTest extends UnitTestCase
'starttime' => '0',
'endtime' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'respect_query_parameters' => [
'index.php?id=123' => [
1 => $row,
],
'respect_query_parameters' => [
'index.php?id=123' => [
1 => $row,
],
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', 'index.php', 'id=123');
......@@ -192,17 +191,16 @@ class RedirectServiceTest extends UnitTestCase
'starttime' => '0',
'endtime' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'respect_query_parameters' => [
'index.php/?id=123' => [
1 => $row,
],
'respect_query_parameters' => [
'index.php/?id=123' => [
1 => $row,
],
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', 'index.php', 'id=123');
......@@ -224,17 +222,16 @@ class RedirectServiceTest extends UnitTestCase
'starttime' => '0',
'endtime' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'respect_query_parameters' => [
'index.php?id=123&a=b' => [
1 => $row,
],
'respect_query_parameters' => [
'index.php?id=123&a=b' => [
1 => $row,
],
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', 'index.php', 'id=123&a=b');
......@@ -256,17 +253,16 @@ class RedirectServiceTest extends UnitTestCase
'starttime' => '0',
'endtime' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'respect_query_parameters' => [
'index.php?id=123&a=b' => [
1 => $row,
],
'respect_query_parameters' => [
'index.php?id=123&a=b' => [
1 => $row,
],
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', 'index.php', 'id=123&a=a');
......@@ -298,23 +294,22 @@ class RedirectServiceTest extends UnitTestCase
'starttime' => '0',
'endtime' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'flat' => [
'special/page/' =>
'flat' => [
'special/page/' =>
[
1 => $row1,
],
],
'respect_query_parameters' => [
'special/page?key=998877' => [
1 => $row2,
],
],
'respect_query_parameters' => [
'special/page?key=998877' => [
1 => $row2,
],
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', 'special/page', 'key=998877');
......@@ -344,20 +339,20 @@ class RedirectServiceTest extends UnitTestCase
'starttime' => '0',
'endtime' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'flat' => [
'foo/' => [
1 => $row1,
],
'flat' => [
'foo/' => [
1 => $row1,
],
],
'*' => [
'flat' => [
'foo/' => [
2 => $row2,
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn(
[
'flat' => [
'foo/' => [
2 => $row2,
],
],
]
......@@ -382,17 +377,16 @@ class RedirectServiceTest extends UnitTestCase
'starttime' => '0',
'endtime' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'regexp' => [
'/f.*?/' => [
1 => $row,
],
'regexp' => [
'/f.*?/' => [
1 => $row,
],
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', 'foo');
......@@ -422,18 +416,17 @@ class RedirectServiceTest extends UnitTestCase
'endtime' => '0',
'disabled' => '0',
];
$this->redirectCacheServiceProphecy->getRedirects()->willReturn(
$this->redirectCacheServiceProphecy->getRedirects('example.com')->willReturn(
[
'example.com' => [
'flat' => [
'foo/' => [
1 => $row1,
2 => $row2,
],
'flat' => [
'foo/' => [
1 => $row1,
2 => $row2,
],
],
]
);
$this->redirectCacheServiceProphecy->getRedirects('*')->willReturn([]);
$result = $this->redirectService->matchRedirect('example.com', 'foo');
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment