[BUGFIX] Respect GET parameters when generating canonicalized URLs 11/58611/22
authorRichard Haeser <richard@maxserv.com>
Wed, 10 Oct 2018 21:49:03 +0000 (23:49 +0200)
committerMarkus Klein <markus.klein@typo3.org>
Mon, 10 Dec 2018 21:06:16 +0000 (22:06 +0100)
Add current query string to the generated canonicalized URL to make the
hreflang and canonical work with additional parameters as well.

Parameters that are excluded from cache hash generation and not included
in $GLOBALS['TYPO3_CONF_VARS']['FE']['additionalCanonicalizedUrlParameters'],
are excluded from the canonicalized URL.

Resolves: #86577
Resolves: #86866
Resolves: #86865
Releases: master
Change-Id: Ib574e2f6c7aa869b4ffa798c7122a60f4cf13f13
Reviewed-on: https://review.typo3.org/58611
Tested-by: Joerg Kummer <typo3@enobe.de>
Tested-by: TYPO3com <no-reply@typo3.com>
Reviewed-by: Joerg Kummer <typo3@enobe.de>
Reviewed-by: Markus Klein <markus.klein@typo3.org>
Tested-by: Markus Klein <markus.klein@typo3.org>
typo3/sysext/core/Configuration/DefaultConfiguration.php
typo3/sysext/core/Configuration/DefaultConfigurationDescription.yaml
typo3/sysext/core/Documentation/Changelog/master/Important-86577-QueryParametersAreNowIncludedInCanonicalizedUrls.rst [new file with mode: 0644]
typo3/sysext/frontend/Classes/DataProcessing/LanguageMenuProcessor.php
typo3/sysext/frontend/Classes/Utility/CanonicalizationUtility.php [new file with mode: 0644]
typo3/sysext/seo/Classes/Canonical/CanonicalGenerator.php
typo3/sysext/seo/Classes/HrefLang/HrefLangGenerator.php

index f12434a..54ec536 100644 (file)
@@ -1304,6 +1304,7 @@ return [
             'excludeAllEmptyParameters' => false,
             'excludedParametersIfEmpty' => []
         ],
+        'additionalCanonicalizedUrlParameters' => [],
         'workspacePreviewLogoutTemplate' => '',
         'versionNumberInFilename' => 'querystring',
         'contentRenderingTemplates' => [], // Array to define the TypoScript parts that define the main content rendering. Extensions like "fluid_styled_content" provide content rendering templates. Other extensions like "felogin" or "indexed search" extend these templates and their TypoScript parts are added directly after the content templates. See EXT:fluid_styled_content/ext_localconf.php and EXT:frontend/Classes/TypoScript/TemplateService.php
index 73c16b3..121e8e8 100644 (file)
@@ -490,6 +490,9 @@ FE:
               excludeAllEmptyParameters:
                   type: bool
                   description: 'If true, all parameters which are relevant for cHash are only considered if they are non-empty.'
+        additionalCanonicalizedUrlParameters:
+            type: array
+            description: The given parameters will be included when calculating canonicalized URL
         workspacePreviewLogoutTemplate:
             type: text
             description: 'If set, points to an HTML file relative to the TYPO3_site root which will be read and outputted as template for this message. Example: <code>fileadmin/templates/template_workspace_preview_logout.html</code>. Inside you can put the marker %1$s to insert the URL to go back to. Use this in <code>&lt;a href="%1$s"&gt;Go back...&lt;/a&gt;</code> links.'
diff --git a/typo3/sysext/core/Documentation/Changelog/master/Important-86577-QueryParametersAreNowIncludedInCanonicalizedUrls.rst b/typo3/sysext/core/Documentation/Changelog/master/Important-86577-QueryParametersAreNowIncludedInCanonicalizedUrls.rst
new file mode 100644 (file)
index 0000000..59f962e
--- /dev/null
@@ -0,0 +1,44 @@
+.. include:: ../../Includes.txt
+
+===========================================================================
+Important: #86577 - Query parameters are now included in canonicalized URLs
+===========================================================================
+
+See :issue:`86577`
+
+Description
+===========
+
+Canonicalized URLs include all query parameters which are needed to define what content to show
+on a page. These URLs are used for the canonical URL and the hreflang URLs.
+This is especially important with for example detail pages of records. The query parameters are 
+crucial to show the right content.
+
+Possibility to define query parameters to be included in canonicalized URLs
+---------------------------------------------------------------------------
+
+By default only parameters that are needed to calculate the cHash are included in the
+canonicalized URLs. If you want to add your own parameters that should be included in those
+URLs, you can use the newly introduced configuration option
+:php:`$GLOBALS['TYPO3_CONF_VARS']['FE']['additionalCanonicalizedUrlParameters']`. You can add
+your own query parameters by adding them as elements of the array.
+
+An example:
+
+.. code-block:: php
+
+   $GLOBALS['TYPO3_CONF_VARS']['FE']['additionalCanonicalizedUrlParameters'] => [
+       'queryParam1',
+       'queryParam2',
+   ]
+
+This example will add query parameters `queryParam1` and `queryParam2` to the canonicalized
+URLs if they are provided.
+
+.. important::
+
+    Be careful when adding your own parameters. Only add those parameters which will change the
+    content of your page. Otherwise search engines will most likely indicate your pages as
+    duplicate content.
+
+.. index:: ext:seo, ext:frontend, PHP-API
index d2bc970..b0a4f4d 100644 (file)
@@ -23,6 +23,7 @@ use TYPO3\CMS\Frontend\ContentObject\ContentDataProcessor;
 use TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer;
 use TYPO3\CMS\Frontend\ContentObject\DataProcessorInterface;
 use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
+use TYPO3\CMS\Frontend\Utility\CanonicalizationUtility;
 
 /**
  * This menu processor generates a json encoded menu string that will be
@@ -69,7 +70,9 @@ class LanguageMenuProcessor implements DataProcessorInterface
         'if.',
         'languages',
         'languages.',
-        'as'
+        'as',
+        'addQueryString',
+        'addQueryString.'
     ];
 
     /**
@@ -89,6 +92,10 @@ class LanguageMenuProcessor implements DataProcessorInterface
      */
     protected $menuConfig = [
         'special' => 'language',
+        'addQueryString' => 1,
+        'addQueryString.' => [
+            'method' => 'GET'
+        ],
         'wrap' => '[|]'
     ];
 
@@ -322,6 +329,19 @@ class LanguageMenuProcessor implements DataProcessorInterface
                 unset($this->menuConfig[$key]);
             }
         }
+
+        $paramsToExclude = CanonicalizationUtility::getParamsToExcludeForCanonicalizedUrl(
+            (int)$this->getTypoScriptFrontendController()->id,
+            (array)$GLOBALS['TYPO3_CONF_VARS']['FE']['additionalCanonicalizedUrlParameters']
+        );
+
+        $this->menuConfig['addQueryString.']['exclude'] = implode(
+            ',',
+            array_merge(
+                GeneralUtility::trimExplode(',', $this->menuConfig['addQueryString.']['exclude'] ?? '', true),
+                $paramsToExclude
+            )
+        );
     }
 
     /**
diff --git a/typo3/sysext/frontend/Classes/Utility/CanonicalizationUtility.php b/typo3/sysext/frontend/Classes/Utility/CanonicalizationUtility.php
new file mode 100644 (file)
index 0000000..0af6859
--- /dev/null
@@ -0,0 +1,63 @@
+<?php
+declare(strict_types = 1);
+
+namespace TYPO3\CMS\Frontend\Utility;
+
+/*
+ * This file is part of the TYPO3 CMS project.
+ *
+ * It is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License, either version 2
+ * of the License, or any later version.
+ *
+ * For the full copyright and license information, please read the
+ * LICENSE.txt file that was distributed with this source code.
+ *
+ * The TYPO3 project - inspiring people to share!
+ */
+
+use Psr\Http\Message\ServerRequestInterface;
+use TYPO3\CMS\Core\Utility\GeneralUtility;
+use TYPO3\CMS\Core\Utility\HttpUtility;
+use TYPO3\CMS\Frontend\Page\CacheHashCalculator;
+
+class CanonicalizationUtility
+{
+    /**
+     * Get all params that are not needed to determine a canonicalized URL
+     *
+     * The format of the additionalCanonicalizedUrlParameters is:
+     * $parameters = [
+     *  'foo',
+     *  'bar',
+     *  'foo[bar]'
+     * ]
+     *
+     * @param int $pageId Id of the page you want to get the excluded params
+     * @param array $additionalCanonicalizedUrlParameters Which GET-params should stay besides the params used for cHash calculation
+     *
+     * @return array
+     */
+    public static function getParamsToExcludeForCanonicalizedUrl(int $pageId, array $additionalCanonicalizedUrlParameters = []): array
+    {
+        $cacheHashCalculator = GeneralUtility::makeInstance(CacheHashCalculator::class);
+
+        $GET = ($GLOBALS['TYPO3_REQUEST'] instanceof ServerRequestInterface) ? $GLOBALS['TYPO3_REQUEST']->getQueryParams() : [];
+        $GET['id'] = $pageId;
+
+        $queryString = HttpUtility::buildQueryString($GET, '&');
+        $cHashArray = $cacheHashCalculator->getRelevantParameters($queryString);
+
+        // By exploding the earlier imploded array, we get the flat array with URL params
+        $urlParameters = GeneralUtility::explodeUrl2Array($queryString);
+
+        $paramsToExclude = array_keys(
+            array_diff(
+                $urlParameters,
+                $cHashArray
+            )
+        );
+
+        return array_diff($paramsToExclude, $additionalCanonicalizedUrlParameters);
+    }
+}
index f7a5157..dfbc2ed 100644 (file)
@@ -20,6 +20,7 @@ use TYPO3\CMS\Core\Utility\GeneralUtility;
 use TYPO3\CMS\Extbase\SignalSlot\Dispatcher;
 use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
 use TYPO3\CMS\Frontend\Page\PageRepository;
+use TYPO3\CMS\Frontend\Utility\CanonicalizationUtility;
 
 /**
  * Class to add the canonical tag to the page
@@ -141,13 +142,19 @@ class CanonicalGenerator
     protected function checkDefaultCanonical(): string
     {
         return $this->typoScriptFrontendController->cObj->typoLink_URL([
-            'parameter' => $this->typoScriptFrontendController->page['uid'],
+            'parameter' => $this->typoScriptFrontendController->id . ',' . $this->typoScriptFrontendController->type,
             'forceAbsoluteUrl' => true,
             'addQueryString' => true,
             'addQueryString.' => [
-                'exclude' => 'type,no_cache'
-            ],
-            'useCacheHash' => true,
+                'method' => 'GET',
+                'exclude' => implode(
+                    ',',
+                    CanonicalizationUtility::getParamsToExcludeForCanonicalizedUrl(
+                        (int)$this->typoScriptFrontendController->id,
+                        (array)$GLOBALS['TYPO3_CONF_VARS']['FE']['additionalCanonicalizedUrlParameters']
+                    )
+                )
+            ]
         ]);
     }
 
index 5161a7c..7eea1f5 100644 (file)
@@ -100,10 +100,14 @@ class HrefLangGenerator
     {
         $uri = new Uri($url);
         if (empty($uri->getHost())) {
-            $url = (string)$this->getSiteLanguage()->getBase()->withPath($uri->getPath());
+            $url = $this->getSiteLanguage()->getBase()->withPath($uri->getPath());
+
+            if ($uri->getQuery()) {
+                $url = $url->withQuery($uri->getQuery());
+            }
         }
 
-        return $url;
+        return (string)$url;
     }
 
     /**