[TASK] Use native idn functionality 34/60234/7
authorBenni Mack <benni@typo3.org>
Wed, 13 Mar 2019 08:13:20 +0000 (09:13 +0100)
committerGeorg Ringer <georg.ringer@gmail.com>
Sat, 16 Mar 2019 06:30:34 +0000 (07:30 +0100)
It is safe to use idn_to_ascii() these days
due to symfony's polyfill functionality.

This allows us to remove the dependency
"algo26-matthias/idna-convert".

Also, all other places now use the native
idn_to_ascii() call, which could speed up
performance.

The wrapper call GeneralUtility::idnaEncode()
can then safely be deprecated.

used composer command:
  composer remove algo26-matthias/idna-convert

Resolves: #87894
Releases: master
Change-Id: I85aa6f39b8ff5ac171cd73218ed1144a56d9f724
Reviewed-on: https://review.typo3.org/c/60234
Reviewed-by: Benjamin Franzke <bfr@qbus.de>
Reviewed-by: Stephan Großberndt <stephan.grossberndt@typo3.org>
Reviewed-by: Georg Ringer <georg.ringer@gmail.com>
Tested-by: TYPO3com <noreply@typo3.com>
Tested-by: Benjamin Franzke <bfr@qbus.de>
Tested-by: Georg Ringer <georg.ringer@gmail.com>
13 files changed:
composer.json
composer.lock
typo3/sysext/core/Classes/DataHandling/DataHandler.php
typo3/sysext/core/Classes/Mail/MailMessage.php
typo3/sysext/core/Classes/Utility/GeneralUtility.php
typo3/sysext/core/Documentation/Changelog/master/Deprecation-87894-GeneralUtilityidnaEncode.rst [new file with mode: 0644]
typo3/sysext/core/Documentation/Changelog/master/Important-87894-RemovedPHPDependencyAlgo26-matthiasidna-convert.rst [new file with mode: 0644]
typo3/sysext/core/Tests/Unit/Utility/GeneralUtilityTest.php
typo3/sysext/core/Tests/UnitDeprecated/Utility/GeneralUtilityTest.php [new file with mode: 0644]
typo3/sysext/core/composer.json
typo3/sysext/frontend/Classes/Typolink/PageLinkBuilder.php
typo3/sysext/install/Configuration/ExtensionScanner/Php/MethodCallStaticMatcher.php
typo3/sysext/linkvalidator/Classes/Linktype/ExternalLinktype.php

index 6a001ca..d9fd2c3 100644 (file)
@@ -37,7 +37,6 @@
                "ext-pcre": "*",
                "ext-session": "*",
                "ext-xml": "*",
-               "algo26-matthias/idna-convert": "1.1.0",
                "cogpowered/finediff": "~0.3.1",
                "doctrine/annotations": "^1.3",
                "doctrine/dbal": "~2.8.0",
index ba520f1..2774856 100644 (file)
@@ -4,58 +4,9 @@
         "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
         "This file is @generated automatically"
     ],
-    "content-hash": "f9e1c111a893e26c4b846ded95bfa537",
+    "content-hash": "4df4fa58da5a1c45547ec29785a8595b",
     "packages": [
         {
-            "name": "algo26-matthias/idna-convert",
-            "version": "v1.1.0",
-            "source": {
-                "type": "git",
-                "url": "https://github.com/algo26-matthias/idna-convert.git",
-                "reference": "a6dfb6f87611e3a89d2eec4924a0f51db755c573"
-            },
-            "dist": {
-                "type": "zip",
-                "url": "https://api.github.com/repos/algo26-matthias/idna-convert/zipball/a6dfb6f87611e3a89d2eec4924a0f51db755c573",
-                "reference": "a6dfb6f87611e3a89d2eec4924a0f51db755c573",
-                "shasum": ""
-            },
-            "require": {
-                "ext-pcre": "*",
-                "php": ">=5.6.0"
-            },
-            "type": "library",
-            "extra": {
-                "branch-alias": {
-                    "dev-master": "1.0.x-dev"
-                }
-            },
-            "autoload": {
-                "psr-4": {
-                    "Mso\\IdnaConvert\\": "src"
-                }
-            },
-            "notification-url": "https://packagist.org/downloads/",
-            "license": [
-                "LGPL-2.1+"
-            ],
-            "authors": [
-                {
-                    "name": "Matthias Sommerfeld",
-                    "email": "mso@phlylabs.de",
-                    "role": "Developer"
-                }
-            ],
-            "description": "A library for encoding and decoding internationalized domain names",
-            "homepage": "http://idnaconv.net/",
-            "keywords": [
-                "idn",
-                "idna",
-                "php"
-            ],
-            "time": "2016-06-19T18:08:43+00:00"
-        },
-        {
             "name": "cogpowered/finediff",
             "version": "0.3.1",
             "source": {
index b046660..c3aa8a1 100644 (file)
@@ -2600,7 +2600,7 @@ class DataHandler implements LoggerAwareInterface
                     break;
                 case 'domainname':
                     if (!preg_match('/^[a-z0-9.\\-]*$/i', $value)) {
-                        $value = GeneralUtility::idnaEncode($value);
+                        $value = (string)idn_to_ascii($value, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
                     }
                     break;
                 case 'email':
index 83a42f5..e14e094 100644 (file)
@@ -257,7 +257,7 @@ class MailMessage extends \Swift_Message
         if (!is_string($email)) {
             return $email;
         }
-        // Split on the last "@" since adresses like "foo@bar"@example.org are valid
+        // Split on the last "@" since addresses like "foo@bar"@example.org are valid
         $atPosition = strrpos($email, '@');
         if (!$atPosition || $atPosition + 1 === strlen($email)) {
             // Return if no @ found or it is placed at the very beginning or end of the email
@@ -265,7 +265,7 @@ class MailMessage extends \Swift_Message
         }
         $domain = substr($email, $atPosition + 1);
         $local = substr($email, 0, $atPosition);
-        $domain = \TYPO3\CMS\Core\Utility\GeneralUtility::idnaEncode($domain);
+        $domain = (string)idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
 
         return $local . '@' . $domain;
     }
index 216b0ad..2395f7a 100644 (file)
@@ -81,20 +81,6 @@ class GeneralUtility
     protected static $applicationContext;
 
     /**
-     * IDNA string cache
-     *
-     * @var array<string>
-     */
-    protected static $idnaStringCache = [];
-
-    /**
-     * IDNA converter
-     *
-     * @var \Mso\IdnaConvert\IdnaConvert
-     */
-    protected static $idnaConverter;
-
-    /**
      * A list of supported CGI server APIs
      * NOTICE: This is a duplicate of the SAME array in SystemEnvironmentBuilder
      * @var array
@@ -856,9 +842,7 @@ class GeneralUtility
      *
      * http://tools.ietf.org/html/rfc3696
      * International characters are allowed in email. So the whole address needs
-     * to be converted to punicode before passing it to filter_var(). We convert
-     * the user- and domain part separately to increase the chance of hitting an
-     * entry in self::$idnaStringCache.
+     * to be converted to punicode before passing it to filter_var().
      *
      * Also the @ sign may appear multiple times in an address. If not used as
      * a boundary marker between the user- and domain part, it must be escaped
@@ -883,9 +867,8 @@ class GeneralUtility
         $domain = substr($email, $atPosition + 1);
         $user = substr($email, 0, $atPosition);
         if (!preg_match('/^[a-z0-9.\\-]*$/i', $domain)) {
-            try {
-                $domain = self::idnaEncode($domain);
-            } catch (\InvalidArgumentException $exception) {
+            $domain = idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
+            if ($domain === false) {
                 return false;
             }
         }
@@ -897,17 +880,26 @@ class GeneralUtility
      *
      * @param string $value
      * @return string An ASCII encoded (punicode) string
+     * @deprecated since TYPO3 v10.0, will be removed in TYPO3 v11.0, use PHP's native idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46) function directly.
      */
     public static function idnaEncode($value)
     {
-        if (isset(self::$idnaStringCache[$value])) {
-            return self::$idnaStringCache[$value];
-        }
-        if (!self::$idnaConverter) {
-            self::$idnaConverter = new \Mso\IdnaConvert\IdnaConvert(['idn_version' => 2008]);
+        trigger_error(__CLASS__ . ':' . __METHOD__ . ' will be removed in TYPO3 v11.0. Use PHPs native "idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46)" function directly instead.', E_USER_DEPRECATED);
+        // Early return in case input is not a string or empty
+        if (!is_string($value) || empty($value)) {
+            return (string)$value;
+        }
+        // Split on the last "@" since addresses like "foo@bar"@example.org are valid where the only focus
+        // is an email address
+        $atPosition = strrpos($value, '@');
+        if ($atPosition !== false) {
+            $domain = substr($value, $atPosition + 1);
+            $local = substr($value, 0, $atPosition);
+            $domain = (string)idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
+            // Return if no @ found or it is placed at the very beginning or end of the email
+            return $local . '@' . $domain;
         }
-        self::$idnaStringCache[$value] = self::$idnaConverter->encode($value);
-        return self::$idnaStringCache[$value];
+        return (string)idn_to_ascii($value, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
     }
 
     /**
@@ -990,11 +982,11 @@ class GeneralUtility
             return false;
         }
         if (isset($parsedUrl['host']) && !preg_match('/^[a-z0-9.\\-]*$/i', $parsedUrl['host'])) {
-            try {
-                $parsedUrl['host'] = self::idnaEncode($parsedUrl['host']);
-            } catch (\InvalidArgumentException $exception) {
+            $host = idn_to_ascii($parsedUrl['host'], IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
+            if ($host === false) {
                 return false;
             }
+            $parsedUrl['host'] = $host;
         }
         return filter_var(HttpUtility::buildUrl($parsedUrl), FILTER_VALIDATE_URL) !== false;
     }
@@ -3638,7 +3630,6 @@ class GeneralUtility
     public static function flushInternalRuntimeCaches()
     {
         self::$indpEnvCache = [];
-        self::$idnaStringCache = [];
     }
 
     /**
diff --git a/typo3/sysext/core/Documentation/Changelog/master/Deprecation-87894-GeneralUtilityidnaEncode.rst b/typo3/sysext/core/Documentation/Changelog/master/Deprecation-87894-GeneralUtilityidnaEncode.rst
new file mode 100644 (file)
index 0000000..12c9b9e
--- /dev/null
@@ -0,0 +1,39 @@
+.. include:: ../../Includes.txt
+
+================================================
+Deprecation: #87894 - GeneralUtility::idnaEncode
+================================================
+
+See :issue:`87894`
+
+Description
+===========
+
+PHP has the native function :php:`idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46)` for converting UTF-8 based domains to ascii-based ("punicode")
+which is available in all supported PHP versions using :php:`"symfony/polyfill-intl-idn"`.
+
+For this reason the method :php:`GeneralUtility::idnaEncode()` has been marked as deprecated.
+
+
+Impact
+======
+
+Calling :php:`GeneralUtility::idnaEncode()` directly will trigger a deprecation message.
+
+
+Affected Installations
+======================
+
+Any TYPO3 installation with third-party extensions calling this method.
+
+
+Migration
+=========
+
+Use :php:`idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);` instead.
+
+Please be aware that in contrary to :php:`GeneralUtility::idnaEncode()` the native PHP function only works on domain names, not email addresses or
+similar. In order to encode email addresses split the address at the last :php:`'@'` and use :php:`idn_to_ascii()` on that last part.
+Also, if there is an error in converting a string, a bool :php:`false` is returned.
+
+.. index:: PHP-API, FullyScanned, ext:core
\ No newline at end of file
diff --git a/typo3/sysext/core/Documentation/Changelog/master/Important-87894-RemovedPHPDependencyAlgo26-matthiasidna-convert.rst b/typo3/sysext/core/Documentation/Changelog/master/Important-87894-RemovedPHPDependencyAlgo26-matthiasidna-convert.rst
new file mode 100644 (file)
index 0000000..818565f
--- /dev/null
@@ -0,0 +1,20 @@
+.. include:: ../../Includes.txt
+
+=======================================================================
+Important: #87894 - Removed PHP dependency algo26-matthias/idna-convert
+=======================================================================
+
+See :issue:`87894`
+
+Description
+===========
+
+PHP has native functions for converting UTF-8 based domains to ascii-based ("punicode"), which
+can be used directly when the PHP extension "intl" is installed. For servers with PHP packages which
+do not have the PHP extension "intl" installed, the symfony polyfill package "symfony/polyfill-intl-idn"
+is available, allowing to use native PHP functionality in the TYPO3 code base.
+
+For this reason the PHP dependency "algo26-matthias/idna-convert" is no longer necessary and
+has been removed.
+
+.. index:: PHP-API, ext:core
\ No newline at end of file
index 71fc6a4..ff5467b 100644 (file)
@@ -4687,66 +4687,6 @@ class GeneralUtilityTest extends UnitTestCase
         $this->assertSame($expected, $result['index']['vDEF']);
     }
 
-    /**
-     * @test
-     * @dataProvider idnaEncodeDataProvider
-     * @param $actual
-     * @param $expected
-     */
-    public function idnaEncodeConvertsUnicodeCharsToASCIIString($actual, $expected)
-    {
-        $result = GeneralUtility::idnaEncode($actual);
-        $this->assertSame($expected, $result);
-    }
-
-    /**
-     * Data provider for method idnaEncode in GeneralUtility class.
-     * IDNA converter has to convert special chars (UTF-8) to ASCII compatible chars.
-     *
-     * @returns array
-     */
-    public function idnaEncodeDataProvider()
-    {
-        return [
-            'empty string' => [
-                '',
-                ''
-            ],
-            'null value' => [
-                null,
-                ''
-            ],
-            'string with ascii chars' => [
-                'example',
-                'example'
-            ],
-            'domain (1) with utf8 chars' => [
-                'dömäin.example',
-                'xn--dmin-moa0i.example'
-            ],
-            'domain (2) with utf8 chars' => [
-                'äaaa.example',
-                'xn--aaa-pla.example'
-            ],
-            'domain (3) with utf8 chars' => [
-                'déjà.vu.example',
-                'xn--dj-kia8a.vu.example'
-            ],
-            'domain (4) with utf8 chars' => [
-                'foo.âbcdéf.example',
-                'foo.xn--bcdf-9na9b.example'
-            ],
-            'domain with utf8 char (german umlaut)' => [
-                'exömple.com',
-                'xn--exmple-xxa.com'
-            ],
-            'email with utf8 char (german umlaut)' => [
-                'joe.doe@dömäin.de',
-                'joe.doe@xn--dmin-moa0i.de'
-            ]
-        ];
-    }
-
     public function splitHeaderLinesDataProvider(): array
     {
         return [
diff --git a/typo3/sysext/core/Tests/UnitDeprecated/Utility/GeneralUtilityTest.php b/typo3/sysext/core/Tests/UnitDeprecated/Utility/GeneralUtilityTest.php
new file mode 100644 (file)
index 0000000..717eb91
--- /dev/null
@@ -0,0 +1,89 @@
+<?php
+namespace TYPO3\CMS\Core\Tests\UnitDeprecated\Utility;
+
+/*
+ * This file is part of the TYPO3 CMS project.
+ *
+ * It is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License, either version 2
+ * of the License, or any later version.
+ *
+ * For the full copyright and license information, please read the
+ * LICENSE.txt file that was distributed with this source code.
+ *
+ * The TYPO3 project - inspiring people to share!
+ */
+
+use TYPO3\CMS\Core\Utility\GeneralUtility;
+use TYPO3\TestingFramework\Core\Unit\UnitTestCase;
+
+/**
+ * Test case
+ */
+class GeneralUtilityTest extends UnitTestCase
+{
+    /**
+     * @var bool Reset singletons created by subject
+     */
+    protected $resetSingletonInstances = true;
+
+    /**
+     * @test
+     * @dataProvider idnaEncodeDataProvider
+     * @param $actual
+     * @param $expected
+     */
+    public function idnaEncodeConvertsUnicodeCharsToASCIIString($actual, $expected)
+    {
+        $result = GeneralUtility::idnaEncode($actual);
+        $this->assertSame($expected, $result);
+    }
+
+    /**
+     * Data provider for method idnaEncode in GeneralUtility class.
+     * IDNA converter has to convert special chars (UTF-8) to ASCII compatible chars.
+     *
+     * @returns array
+     */
+    public function idnaEncodeDataProvider()
+    {
+        return [
+            'empty string' => [
+                '',
+                ''
+            ],
+            'null value' => [
+                null,
+                ''
+            ],
+            'string with ascii chars' => [
+                'example',
+                'example'
+            ],
+            'domain (1) with utf8 chars' => [
+                'dömäin.example',
+                'xn--dmin-moa0i.example'
+            ],
+            'domain (2) with utf8 chars' => [
+                'äaaa.example',
+                'xn--aaa-pla.example'
+            ],
+            'domain (3) with utf8 chars' => [
+                'déjà.vu.example',
+                'xn--dj-kia8a.vu.example'
+            ],
+            'domain (4) with utf8 chars' => [
+                'foo.âbcdéf.example',
+                'foo.xn--bcdf-9na9b.example'
+            ],
+            'domain with utf8 char (german umlaut)' => [
+                'exömple.com',
+                'xn--exmple-xxa.com'
+            ],
+            'email with utf8 char (german umlaut)' => [
+                'joe.doe@dömäin.de',
+                'joe.doe@xn--dmin-moa0i.de'
+            ]
+        ];
+    }
+}
index f780f61..ed363f6 100644 (file)
@@ -19,7 +19,6 @@
                "ext-pcre": "*",
                "ext-session": "*",
                "ext-xml": "*",
-               "algo26-matthias/idna-convert": "1.1.0",
                "cogpowered/finediff": "~0.3.1",
                "doctrine/annotations": "^1.3",
                "doctrine/dbal": "~2.8.0",
index 5df6387..667eb45 100644 (file)
@@ -469,7 +469,7 @@ class PageLinkBuilder extends AbstractTypolinkBuilder
             $target = $target ?: $this->resolveTargetAttribute($conf, 'extTarget', false, $tsfe->extTarget);
             // Convert IDNA-like domain (if any)
             if (!preg_match('/^[a-z0-9.\\-]*$/i', $targetDomain)) {
-                $targetDomain = GeneralUtility::idnaEncode($targetDomain);
+                $targetDomain = (string)idn_to_ascii($targetDomain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
             }
             $url = $absoluteUrlScheme . '://' . $targetDomain . '/index.php?id=' . $page['uid'] . $additionalQueryParams;
         } else {
index 38bbcfb..3e65200 100644 (file)
@@ -868,4 +868,11 @@ return [
             'Deprecation-87613-DeprecateTYPO3CMSExtbaseUtilityTypeHandlingUtilityhex2bin.rst',
         ],
     ],
+    'TYPO3\CMS\Core\Utility\GeneralUtility::idnaEncode' => [
+        'numberOfMandatoryArguments' => 1,
+        'maximumNumberOfArguments' => 1,
+        'restFiles' => [
+            'Deprecation-87894-GeneralUtilityidnaEncode.rst',
+        ],
+    ],
 ];
index f4a691f..908d5e6 100644 (file)
@@ -15,7 +15,6 @@ namespace TYPO3\CMS\Linkvalidator\Linktype;
  */
 
 use GuzzleHttp\Cookie\CookieJar;
-use Mso\IdnaConvert\IdnaConvert;
 use TYPO3\CMS\Core\Http\RequestFactory;
 use TYPO3\CMS\Core\Utility\GeneralUtility;
 
@@ -208,14 +207,6 @@ class ExternalLinktype extends AbstractLinktype
      */
     protected function preprocessUrl(string $url): string
     {
-        try {
-            return (new IdnaConvert())->encode($url);
-        } catch (\Exception $e) {
-            // in case of any error, return empty url.
-            $this->errorParams['errorType'] = 'exception';
-            $this->errorParams['exception'] = $e->getMessage();
-            $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
-            return '';
-        }
+        return (string)idn_to_ascii($url, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
     }
 }