[FEATURE] Add support for the "canonical" tag 71/58271/16
authorKevin Appelt <kevin.appelt@icloud.com>
Thu, 13 Sep 2018 20:58:47 +0000 (22:58 +0200)
committerFrank Naegler <frank.naegler@typo3.org>
Thu, 20 Sep 2018 06:57:50 +0000 (08:57 +0200)
TYPO3 will now out-of-the box create the canonical tag if you have
installed the core extension SEO. The canonical tag is based on the
settings in the page properties. First it will check if you defined
a canonical link. If not, it will check for the Show content from page
and if both are not set, it will generate an absolute url to the
current page.

Resolves: #20051
Releases: master
Change-Id: I1ae01e419a1f822b8659bf71ba99523051a2e77c
Reviewed-on: https://review.typo3.org/58271
Tested-by: TYPO3com <no-reply@typo3.com>
Reviewed-by: Richard Haeser <richard@maxserv.com>
Tested-by: Richard Haeser <richard@maxserv.com>
Reviewed-by: Frank Naegler <frank.naegler@typo3.org>
Tested-by: Frank Naegler <frank.naegler@typo3.org>
typo3/sysext/core/Documentation/Changelog/master/Feature-20051-SupportTheCanonicalTag.rst [new file with mode: 0644]
typo3/sysext/seo/Classes/Canonical/CanonicalGenerator.php [new file with mode: 0644]
typo3/sysext/seo/Configuration/TCA/Overrides/pages.php
typo3/sysext/seo/Resources/Private/Language/locallang_tca.xlf
typo3/sysext/seo/Tests/Functional/Canonical/CanonicalGeneratorTest.php [new file with mode: 0644]
typo3/sysext/seo/Tests/Functional/Fixtures/pages-canonical.xml [new file with mode: 0644]
typo3/sysext/seo/ext_localconf.php
typo3/sysext/seo/ext_tables.sql

diff --git a/typo3/sysext/core/Documentation/Changelog/master/Feature-20051-SupportTheCanonicalTag.rst b/typo3/sysext/core/Documentation/Changelog/master/Feature-20051-SupportTheCanonicalTag.rst
new file mode 100644 (file)
index 0000000..b61f3e9
--- /dev/null
@@ -0,0 +1,34 @@
+.. include:: ../../Includes.txt
+
+=============================================
+Feature: #20051 - Support the "canonical" tag
+=============================================
+
+See :issue:`20051`
+
+Description
+===========
+
+TYPO3 will finally provide built-in support for the :html:`<link rel="canoncial" href="">` tag.
+
+If the core extension "seo" is installed, it will automatically add the canonical link to the page.
+
+The canonical link is basically the same absolute link as the link to the current hreflang and is meant
+to indicate where the original source of the content is. It is a tool to prevent having duplicate content
+penalties.
+
+In the page properties, the canonical link can be overwritten per language. The link wizard offers all the
+well known possibilities including external links and link handler configurations.
+
+Should an empty href occur when generating the link to overwrite the canonical (this happens e.g. if the
+selected page is not available in the current language), the fallback to the current hreflang will be activated
+automatically. This ensures that there is no empty canonical.
+
+Impact
+======
+
+If you have other SEO extensions installed that generate canonical links, you have to make sure the other
+extensions don't do that anymore. If both core and an extension are generating a canonical link, it will
+result in 2 canonical links which might cause confusion for search engines.
+
+.. index:: Backend, Database, Frontend, TCA, ext:seo
diff --git a/typo3/sysext/seo/Classes/Canonical/CanonicalGenerator.php b/typo3/sysext/seo/Classes/Canonical/CanonicalGenerator.php
new file mode 100644 (file)
index 0000000..1a11702
--- /dev/null
@@ -0,0 +1,158 @@
+<?php
+declare(strict_types = 1);
+
+namespace TYPO3\CMS\Seo\Canonical;
+
+/*
+ * This file is part of the TYPO3 CMS project.
+ *
+ * It is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License, either version 2
+ * of the License, or any later version.
+ *
+ * For the full copyright and license information, please read the
+ * LICENSE.txt file that was distributed with this source code.
+ *
+ * The TYPO3 project - inspiring people to share!
+ */
+
+use TYPO3\CMS\Core\Utility\GeneralUtility;
+use TYPO3\CMS\Extbase\SignalSlot\Dispatcher;
+use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
+use TYPO3\CMS\Frontend\Page\PageRepository;
+
+/**
+ * Class to add the canonical tag to the page
+ *
+ * @internal
+ */
+class CanonicalGenerator
+{
+    /**
+     * @var TypoScriptFrontendController
+     */
+    protected $typoScriptFrontendController;
+
+    /**
+     * @var PageRepository
+     */
+    protected $pageRepository;
+
+    /**
+     * @var Dispatcher
+     */
+    protected $signalSlotDispatcher;
+
+    /**
+     * CanonicalGenerator constructor
+     *
+     * @param TypoScriptFrontendController $typoScriptFrontendController
+     * @param Dispatcher $signalSlotDispatcher
+     */
+    public function __construct(TypoScriptFrontendController $typoScriptFrontendController = null, Dispatcher $signalSlotDispatcher = null)
+    {
+        if ($typoScriptFrontendController === null) {
+            $typoScriptFrontendController = $this->getTypoScriptFrontendController();
+        }
+        if ($signalSlotDispatcher === null) {
+            $signalSlotDispatcher = GeneralUtility::makeInstance(Dispatcher::class);
+        }
+        $this->typoScriptFrontendController = $typoScriptFrontendController;
+        $this->signalSlotDispatcher = $signalSlotDispatcher;
+        $this->pageRepository = GeneralUtility::makeInstance(PageRepository::class);
+        $this->pageRepository->init(false);
+    }
+
+    /**
+     * @return string
+     * @throws \TYPO3\CMS\Extbase\SignalSlot\Exception\InvalidSlotException
+     * @throws \TYPO3\CMS\Extbase\SignalSlot\Exception\InvalidSlotReturnException
+     */
+    public function generate(): string
+    {
+        $href = '';
+        $this->signalSlotDispatcher->dispatch(self::class, 'beforeGeneratingCanonical', [&$href]);
+
+        if (empty($href)) {
+            // 1) Check if page show content from other page
+            $href = $this->checkContentFromPid();
+        }
+        if (empty($href)) {
+            // 2) Check if page has canonical URL set
+            $href = $this->checkForCanonicalLink();
+        }
+        if (empty($href)) {
+            // 3) Fallback, create canonical URL
+            $href = $this->checkDefaultCanonical();
+        }
+
+        if (!empty($href)) {
+            $canonical = '<link ' . GeneralUtility::implodeAttributes([
+                'rel' => 'canonical',
+                'href' => $href
+            ], true) . '/>' . LF;
+            $this->typoScriptFrontendController->additionalHeaderData[] = $canonical;
+            return $canonical;
+        }
+        return '';
+    }
+
+    /**
+     * @return string
+     */
+    protected function checkForCanonicalLink(): string
+    {
+        if (!empty($this->typoScriptFrontendController->page['canonical_link'])) {
+            return $this->typoScriptFrontendController->cObj->typoLink_URL([
+                'parameter' => $this->typoScriptFrontendController->page['canonical_link'],
+                'forceAbsoluteUrl' => true,
+            ]);
+        }
+        return '';
+    }
+
+    /**
+     * @return string
+     */
+    protected function checkContentFromPid(): string
+    {
+        if (!empty($this->typoScriptFrontendController->page['content_from_pid'])) {
+            $parameter = (int)$this->typoScriptFrontendController->page['content_from_pid'];
+            if ($parameter > 0) {
+                $targetPage = $this->pageRepository->getPage($parameter, true);
+                if (!empty($targetPage['canonical_link'])) {
+                    $parameter = $targetPage['canonical_link'];
+                }
+                return $this->typoScriptFrontendController->cObj->typoLink_URL([
+                    'parameter' => $parameter,
+                    'forceAbsoluteUrl' => true,
+                ]);
+            }
+        }
+        return '';
+    }
+
+    /**
+     * @return string
+     */
+    protected function checkDefaultCanonical(): string
+    {
+        return $this->typoScriptFrontendController->cObj->typoLink_URL([
+            'parameter' => $this->typoScriptFrontendController->page['uid'],
+            'forceAbsoluteUrl' => true,
+            'addQueryString' => true,
+            'addQueryString.' => [
+                'exclude' => 'type,no_cache'
+            ],
+            'useCacheHash' => true,
+        ]);
+    }
+
+    /**
+     * @return TypoScriptFrontendController
+     */
+    protected function getTypoScriptFrontendController(): TypoScriptFrontendController
+    {
+        return $GLOBALS['TSFE'];
+    }
+}
index d797209..c817f2f 100644 (file)
@@ -19,6 +19,10 @@ $tca = [
             'label' => 'LLL:EXT:seo/Resources/Private/Language/locallang_tca.xlf:pages.palettes.twittercards',
             'showitem' => 'twitter_title, --linebreak--, twitter_description, --linebreak--, twitter_image',
         ],
+        'canonical' => [
+            'label' => 'LLL:EXT:seo/Resources/Private/Language/locallang_tca.xlf:pages.palettes.canonical',
+            'showitem' => 'canonical_link',
+        ],
     ],
     'columns' => [
         'seo_title' => [
@@ -162,6 +166,27 @@ $tca = [
                 $GLOBALS['TYPO3_CONF_VARS']['GFX']['imagefile_ext']
             )
         ],
+        'canonical_link' => [
+            'exclude' => true,
+            'label' => 'LLL:EXT:seo/Resources/Private/Language/locallang_tca.xlf:pages.canonical_link',
+            'config' => [
+                'type' => 'input',
+                'renderType' => 'inputLink',
+                'size' => 50,
+                'max' => 1024,
+                'eval' => 'trim',
+                'fieldControl' => [
+                    'linkPopup' => [
+                        'options' => [
+                            'title' => 'LLL:EXT:seo/Resources/Private/Language/locallang_tca.xlf:pages.canonical_link',
+                            'blindLinkFields' => 'class,target,title',
+                            'blindLinkOptions' => 'mail,folder,file'
+                        ],
+                    ],
+                ],
+                'softref' => 'typolink'
+            ]
+        ],
     ],
 ];
 
@@ -173,7 +198,8 @@ $GLOBALS['TCA']['pages'] = array_replace_recursive($GLOBALS['TCA']['pages'], $tc
         --palette--;;seo,
         --palette--;;robots,
         --palette--;;opengraph,
-        --palette--;;twittercards,',
+        --palette--;;twittercards,
+        --palette--;;canonical,',
     (string)\TYPO3\CMS\Frontend\Page\PageRepository::DOKTYPE_DEFAULT,
     'after:title'
 );
index 4bdbd70..7d97c89 100644 (file)
@@ -1,47 +1,48 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <xliff version="1.0" xmlns:t3="http://typo3.org/schemas/xliff">
-       <file t3:id="1528554340" source-language="en" datatype="plaintext" original="messages" date="2018-08-09T16:22:32Z" product-name="seo">
+       <file t3:id="1528554340" source-language="en" datatype="plaintext" original="messages" date="2018-08-09T16:22:32Z"
+                 product-name="seo">
                <header/>
                <body>
                        <trans-unit id="pages.tabs.seo">
                                <source>SEO</source>
                        </trans-unit>
-            <trans-unit id="pages.palettes.seo">
-                <source>General SEO settings</source>
-            </trans-unit>
-            <trans-unit id="pages.seo_title">
-                <source>Title for search engines</source>
-            </trans-unit>
-            <trans-unit id="pages.palettes.robots">
-                <source>Robot instructions</source>
-            </trans-unit>
-            <trans-unit id="pages.no_index">
-                <source>No index</source>
-            </trans-unit>
-            <trans-unit id="pages.no_index_formlabel">
-                <source>Index this page</source>
-            </trans-unit>
-            <trans-unit id="pages.no_follow">
-                <source>No follow</source>
-            </trans-unit>
-            <trans-unit id="pages.no_follow_formlabel">
-                <source>Follow this page</source>
-            </trans-unit>
-            <trans-unit id="pages.palettes.opengraph">
-                <source>Open Graph (Facebook)</source>
-            </trans-unit>
-            <trans-unit id="pages.og_title">
-                <source>Title</source>
-            </trans-unit>
-            <trans-unit id="pages.og_description">
-                <source>Description</source>
-            </trans-unit>
-            <trans-unit id="pages.og_image">
-                <source>Image</source>
-            </trans-unit>
-            <trans-unit id="pages.palettes.twittercards">
-                <source>Twitter Cards</source>
-            </trans-unit>
+                       <trans-unit id="pages.palettes.seo">
+                               <source>General SEO settings</source>
+                       </trans-unit>
+                       <trans-unit id="pages.seo_title">
+                               <source>Title for search engines</source>
+                       </trans-unit>
+                       <trans-unit id="pages.palettes.robots">
+                               <source>Robot instructions</source>
+                       </trans-unit>
+                       <trans-unit id="pages.no_index">
+                               <source>No index</source>
+                       </trans-unit>
+                       <trans-unit id="pages.no_index_formlabel">
+                               <source>Index this page</source>
+                       </trans-unit>
+                       <trans-unit id="pages.no_follow">
+                               <source>No follow</source>
+                       </trans-unit>
+                       <trans-unit id="pages.no_follow_formlabel">
+                               <source>Follow this page</source>
+                       </trans-unit>
+                       <trans-unit id="pages.palettes.opengraph">
+                               <source>Open Graph (Facebook)</source>
+                       </trans-unit>
+                       <trans-unit id="pages.og_title">
+                               <source>Title</source>
+                       </trans-unit>
+                       <trans-unit id="pages.og_description">
+                               <source>Description</source>
+                       </trans-unit>
+                       <trans-unit id="pages.og_image">
+                               <source>Image</source>
+                       </trans-unit>
+                       <trans-unit id="pages.palettes.twittercards">
+                               <source>Twitter Cards</source>
+                       </trans-unit>
                        <trans-unit id="pages.twitter_title">
                                <source>Title</source>
                        </trans-unit>
                        <trans-unit id="pages.twitter_image">
                                <source>Image</source>
                        </trans-unit>
+                       <trans-unit id="pages.palettes.canonical">
+                               <source>Canonical</source>
+                       </trans-unit>
+                       <trans-unit id="pages.canonical_link">
+                               <source>Canonical link</source>
+                       </trans-unit>
                </body>
        </file>
 </xliff>
diff --git a/typo3/sysext/seo/Tests/Functional/Canonical/CanonicalGeneratorTest.php b/typo3/sysext/seo/Tests/Functional/Canonical/CanonicalGeneratorTest.php
new file mode 100644 (file)
index 0000000..d0a9cb7
--- /dev/null
@@ -0,0 +1,86 @@
+<?php
+declare(strict_types = 1);
+
+namespace TYPO3\CMS\Seo\Tests\Functional\Canonical;
+
+/*
+ * This file is part of the TYPO3 CMS project.
+ *
+ * It is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License, either version 2
+ * of the License, or any later version.
+ *
+ * For the full copyright and license information, please read the
+ * LICENSE.txt file that was distributed with this source code.
+ *
+ * The TYPO3 project - inspiring people to share!
+ */
+
+use Psr\Log\NullLogger;
+use TYPO3\CMS\Core\TypoScript\TemplateService;
+use TYPO3\CMS\Core\Utility\GeneralUtility;
+use TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer;
+use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
+use TYPO3\CMS\Frontend\Page\PageRepository;
+use TYPO3\CMS\Frontend\Tests\Functional\SiteHandling\AbstractTestCase;
+use TYPO3\CMS\Seo\Canonical\CanonicalGenerator;
+
+/**
+ * Test case
+ */
+class CanonicalGeneratorTest extends AbstractTestCase
+{
+    /**
+     * @var string[]
+     */
+    protected $coreExtensionsToLoad = [
+        'core', 'frontend', 'seo'
+    ];
+
+    protected function setUp(): void
+    {
+        parent::setUp();
+        $this->importDataSet('EXT:seo/Tests/Functional/Fixtures/pages-canonical.xml');
+        $this->writeSiteConfiguration(
+            'website-local',
+            $this->buildSiteConfiguration(1, 'http://localhost/')
+        );
+    }
+
+    protected function initTypoScriptFrontendController(int $uid): TypoScriptFrontendController
+    {
+        $typoScriptFrontendController = new TypoScriptFrontendController(null, $uid, 0);
+        $typoScriptFrontendController->cObj = new ContentObjectRenderer();
+        $typoScriptFrontendController->cObj->setLogger(new NullLogger());
+        $typoScriptFrontendController->sys_page = GeneralUtility::makeInstance(PageRepository::class);
+        $typoScriptFrontendController->tmpl = GeneralUtility::makeInstance(TemplateService::class);
+        $typoScriptFrontendController->getPageAndRootlineWithDomain(1);
+        $GLOBALS['TSFE'] = $typoScriptFrontendController;
+        return $typoScriptFrontendController;
+    }
+
+    public function generateDataProvider(): array
+    {
+        return [
+            'uid: 1 with canonical_link' => [1, '<link rel="canonical" href="http://localhost/"/>' . LF],
+            'uid: 2 with canonical_link' => [2, '<link rel="canonical" href="http://localhost/dummy-1-2"/>' . LF],
+            'uid: 3 with canonical_link AND content_from_pid = 2' => [3, '<link rel="canonical" href="http://localhost/dummy-1-2"/>' . LF],
+            'uid: 4 without canonical_link AND content_from_pid = 2' => [4, '<link rel="canonical" href="http://localhost/dummy-1-2"/>' . LF],
+            'uid: 5 without canonical_link AND without content_from_pid set' => [5, '<link rel="canonical" href="http://localhost/?id=5"/>' . LF],
+            'uid: 6 without canonical_link AND content_from_pid = 7 (but target page is deleted)' => [6, '<link rel="canonical" href="http://localhost/?id=6"/>' . LF],
+            'uid: 8 without canonical_link AND content_from_pid = 9 (but target page is hidden)' => [8, '<link rel="canonical" href="http://localhost/?id=8"/>' . LF],
+        ];
+    }
+
+    /**
+     * @test
+     * @dataProvider generateDataProvider
+     * @param int $uid
+     * @param string $expectedCanonicalUrl
+     */
+    public function generate(int $uid, string $expectedCanonicalUrl): void
+    {
+        $typoScriptFrontendController = $this->initTypoScriptFrontendController($uid);
+        $this->assertSame($expectedCanonicalUrl, (new CanonicalGenerator($typoScriptFrontendController))->generate());
+    }
+}
diff --git a/typo3/sysext/seo/Tests/Functional/Fixtures/pages-canonical.xml b/typo3/sysext/seo/Tests/Functional/Fixtures/pages-canonical.xml
new file mode 100644 (file)
index 0000000..787cb01
--- /dev/null
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="utf-8"?>
+<dataset>
+    <pages>
+        <uid>1</uid>
+        <pid>0</pid>
+        <title>Root 1</title>
+        <is_siteroot>1</is_siteroot>
+        <canonical_link>http://localhost/</canonical_link>
+        <content_from_pid>0</content_from_pid>
+        <deleted>0</deleted>
+    </pages>
+    <pages>
+        <uid>2</uid>
+        <pid>1</pid>
+        <title>Dummy 1-2</title>
+        <tstamp>1491811200</tstamp>
+        <canonical_link>http://localhost/dummy-1-2</canonical_link>
+        <content_from_pid>0</content_from_pid>
+        <deleted>0</deleted>
+    </pages>
+    <pages>
+        <uid>3</uid>
+        <pid>1</pid>
+        <title>Dummy 1-3</title>
+        <SYS_LASTCHANGED>1535657401</SYS_LASTCHANGED>
+        <canonical_link>http://localhost/dummy-1-3</canonical_link>
+        <content_from_pid>2</content_from_pid>
+        <deleted>0</deleted>
+    </pages>
+    <pages>
+        <uid>4</uid>
+        <pid>1</pid>
+        <title>Dummy 1-4</title>
+        <canonical_link></canonical_link>
+        <content_from_pid>2</content_from_pid>
+        <deleted>0</deleted>
+    </pages>
+    <pages>
+        <uid>5</uid>
+        <pid>2</pid>
+        <title>Dummy 1-2-5</title>
+        <canonical_link></canonical_link>
+        <content_from_pid>0</content_from_pid>
+        <deleted>0</deleted>
+    </pages>
+    <pages>
+        <uid>6</uid>
+        <pid>2</pid>
+        <title>Dummy</title>
+        <canonical_link></canonical_link>
+        <content_from_pid>7</content_from_pid>
+        <deleted>0</deleted>
+    </pages>
+    <pages>
+        <uid>7</uid>
+        <pid>2</pid>
+        <title>Dummy</title>
+        <canonical_link></canonical_link>
+        <content_from_pid>0</content_from_pid>
+        <deleted>1</deleted>
+    </pages>
+    <pages>
+        <uid>8</uid>
+        <pid>2</pid>
+        <title>Dummy</title>
+        <canonical_link></canonical_link>
+        <content_from_pid>9</content_from_pid>
+        <deleted>0</deleted>
+    </pages>
+    <pages>
+        <uid>9</uid>
+        <pid>2</pid>
+        <title>Dummy</title>
+        <canonical_link></canonical_link>
+        <content_from_pid>0</content_from_pid>
+        <deleted>0</deleted>
+        <hidden>1</hidden>
+    </pages>
+</dataset>
index 0310075..9de207e 100644 (file)
@@ -5,6 +5,8 @@ $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['TYPO3\CMS\Frontend\Page\PageGenerator
     \TYPO3\CMS\Seo\MetaTag\MetaTagGenerator::class . '->generate';
 $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['TYPO3\CMS\Frontend\Page\PageGenerator']['generateMetaTags'][] =
     \TYPO3\CMS\Seo\HrefLang\HrefLangGenerator::class . '->generate';
+$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['TYPO3\CMS\Frontend\Page\PageGenerator']['generateMetaTags'][] =
+    \TYPO3\CMS\Seo\Canonical\CanonicalGenerator::class . '->generate';
 
 $metaTagManagerRegistry = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(\TYPO3\CMS\Core\MetaTag\MetaTagManagerRegistry::class);
 $metaTagManagerRegistry->registerManager(
index 9256f9a..0c52e87 100644 (file)
@@ -11,4 +11,5 @@ CREATE TABLE pages (
        twitter_title varchar(255) DEFAULT '' NOT NULL,
        twitter_description text,
        twitter_image int(11) unsigned DEFAULT '0' NOT NULL,
+       canonical_link varchar(2048) DEFAULT '' NOT NULL,
 );