[BUGFIX] Use mb_* methods directly instead of CharsetConverter 33/53233/2
authorBenni Mack <benni@typo3.org>
Wed, 14 Jun 2017 12:11:47 +0000 (14:11 +0200)
committerBenni Mack <benni@typo3.org>
Fri, 16 Jun 2017 11:02:09 +0000 (13:02 +0200)
There are several places where mb_* methods are still not used, although
they are even better to use instead of the $csConv->parseCharset() method
for instance, as it has more aliases defined already.

Same goes for "entities_to_utf8" which is a PHP user-space implementation
of html_entity_decode() dated back to Kaspers' change in 2003.

Using native PHP methods should also increase performance marginally.

Resolves: #81575
Releases: master, 8.7
Change-Id: I5e97881cc7107883a5ff53d534161a7ec17e1ee5
Reviewed-on: https://review.typo3.org/53208
Tested-by: TYPO3com <no-reply@typo3.com>
Reviewed-by: Markus Klein <markus.klein@typo3.org>
Tested-by: Markus Klein <markus.klein@typo3.org>
Reviewed-by: Wouter Wolters <typo3@wouterwolters.nl>
Reviewed-by: Anja Leichsenring <aleichsenring@ab-softlab.de>
Tested-by: Anja Leichsenring <aleichsenring@ab-softlab.de>
Reviewed-by: Benni Mack <benni@typo3.org>
Tested-by: Benni Mack <benni@typo3.org>
Reviewed-on: https://review.typo3.org/53233

typo3/sysext/backend/Classes/Form/Wizard/SuggestWizardDefaultReceiver.php
typo3/sysext/core/Classes/Page/PageRenderer.php
typo3/sysext/core/Classes/Utility/GeneralUtility.php
typo3/sysext/fluid/Classes/ViewHelpers/Format/CaseViewHelper.php
typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php
typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php
typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php
typo3/sysext/indexed_search/Classes/Controller/SearchController.php
typo3/sysext/indexed_search/Classes/FileContentParser.php
typo3/sysext/indexed_search/Classes/Indexer.php

index 7ab1feb..10046c4 100644 (file)
@@ -15,7 +15,6 @@ namespace TYPO3\CMS\Backend\Form\Wizard;
  */
 
 use TYPO3\CMS\Backend\Utility\BackendUtility;
-use TYPO3\CMS\Core\Charset\CharsetConverter;
 use TYPO3\CMS\Core\Database\Connection;
 use TYPO3\CMS\Core\Database\ConnectionPool;
 use TYPO3\CMS\Core\Database\Query\QueryBuilder;
@@ -156,8 +155,6 @@ class SuggestWizardDefaultReceiver
             ->execute();
         $allRowsCount = $result->rowCount();
         if ($allRowsCount) {
-            /** @var CharsetConverter $charsetConverter */
-            $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
             while ($row = $result->fetch()) {
                 // check if we already have collected the maximum number of records
                 if (count($rows) > $this->maxItems) {
@@ -172,12 +169,12 @@ class SuggestWizardDefaultReceiver
                 $spriteIcon = $this->iconFactory->getIconForRecord($this->table, $row, Icon::SIZE_SMALL)->render();
                 $uid = $row['t3ver_oid'] > 0 ? $row['t3ver_oid'] : $row['uid'];
                 $path = $this->getRecordPath($row, $uid);
-                if (strlen($path) > 30) {
+                if (mb_strlen($path, 'utf-8') > 30) {
                     $croppedPath = '<abbr title="' . htmlspecialchars($path) . '">' .
                         htmlspecialchars(
-                            $charsetConverter->crop('utf-8', $path, 10)
+                            mb_substr($path, 0, 10, 'utf-8')
                                 . '...'
-                                . $charsetConverter->crop('utf-8', $path, -20)
+                                . mb_substr($path, -20, null, 'utf-8')
                         ) .
                         '</abbr>';
                 } else {
index 0b4290a..d7b4603 100644 (file)
@@ -2091,9 +2091,24 @@ class PageRenderer implements \TYPO3\CMS\Core\SingletonInterface
         }
         $this->inlineLanguageLabelFiles = [];
         // Convert settings back to UTF-8 since json_encode() only works with UTF-8:
-        if (TYPO3_MODE === 'FE' && $this->getCharSet() !== 'utf-8') {
-            if ($this->inlineSettings) {
-                $this->csConvObj->convArray($this->inlineSettings, $this->getCharSet(), 'utf-8');
+        if ($this->getCharSet() && $this->getCharSet() !== 'utf-8' && is_array($this->inlineSettings)) {
+            $this->convertCharsetRecursivelyToUtf8($this->inlineSettings, $this->getCharSet());
+        }
+    }
+
+    /**
+     * Small helper function to convert charsets for arrays into utf-8
+     *
+     * @param mixed $data given by reference (string/array usually)
+     * @param string $fromCharset convert FROM this charset
+     */
+    protected function convertCharsetRecursivelyToUtf8(&$data, string $fromCharset)
+    {
+        foreach ($data as $key => $value) {
+            if (is_array($data[$key])) {
+                $this->convertCharsetRecursivelyToUtf8($data[$key], $fromCharset);
+            } elseif (is_string($data[$key])) {
+                $data[$key] = mb_convert_encoding($data[$key], 'utf-8', $fromCharset);
             }
         }
     }
index 36c6e37..7d5d36e 100644 (file)
@@ -15,7 +15,6 @@ namespace TYPO3\CMS\Core\Utility;
  */
 
 use GuzzleHttp\Exception\RequestException;
-use TYPO3\CMS\Core\Charset\CharsetConverter;
 use TYPO3\CMS\Core\Core\ApplicationContext;
 use TYPO3\CMS\Core\Core\ClassLoadingInformation;
 use TYPO3\CMS\Core\Crypto\Random;
@@ -279,9 +278,15 @@ class GeneralUtility
      */
     public static function fixed_lgd_cs($string, $chars, $appendString = '...')
     {
-        /** @var CharsetConverter $charsetConverter */
-        $charsetConverter = self::makeInstance(\TYPO3\CMS\Core\Charset\CharsetConverter::class);
-        return $charsetConverter->crop('utf-8', $string, $chars, $appendString);
+        if ((int)$chars === 0 || mb_strlen($string, 'utf-8') <= abs($chars)) {
+            return $string;
+        }
+        if ($chars > 0) {
+            $string = mb_substr($string, 0, $chars, 'utf-8') . $appendString;
+        } else {
+            $string = $appendString . mb_substr($string, $len, mb_strlen($string, 'utf-8'), 'utf-8');
+        }
+        return $string;
     }
 
     /**
index 7fdecc9..eb8229c 100644 (file)
@@ -14,8 +14,6 @@ namespace TYPO3\CMS\Fluid\ViewHelpers\Format;
  * The TYPO3 project - inspiring people to share!
  */
 
-use TYPO3\CMS\Core\Charset\CharsetConverter;
-use TYPO3\CMS\Core\Utility\GeneralUtility;
 use TYPO3\CMS\Fluid\Core\ViewHelper\AbstractViewHelper;
 use TYPO3\CMS\Fluid\Core\ViewHelper\Exception\InvalidVariableException;
 use TYPO3Fluid\Fluid\Core\Rendering\RenderingContextInterface;
@@ -151,10 +149,16 @@ class CaseViewHelper extends AbstractViewHelper
                 $output = mb_strtoupper($value, 'utf-8');
                 break;
             case self::CASE_CAPITAL:
-                $output = GeneralUtility::makeInstance(CharsetConverter::class)->convCaseFirst('utf-8', $value, 'toUpper');
+                $firstChar = mb_substr($value, 0, 1, 'utf-8');
+                $firstChar = mb_strtoupper($firstChar, 'utf-8');
+                $remainder = mb_substr($value, 1, null, 'utf-8');
+                $output = $firstChar . $remainder;
                 break;
             case self::CASE_UNCAPITAL:
-                $output = GeneralUtility::makeInstance(CharsetConverter::class)->convCaseFirst('utf-8', $value, 'toLower');
+                $firstChar = mb_substr($value, 0, 1, 'utf-8');
+                $firstChar = mb_strtolower($firstChar, 'utf-8');
+                $remainder = mb_substr($value, 1, null, 'utf-8');
+                $output = $firstChar . $remainder;
                 break;
             case self::CASE_CAPITAL_WORDS:
                 // @todo: Implement method once there is a proper solution with using the CharsetConverter
index 1072939..018d6a0 100644 (file)
@@ -17,7 +17,6 @@ namespace TYPO3\CMS\Frontend\ContentObject;
 use Doctrine\DBAL\DBALException;
 use Doctrine\DBAL\Driver\Statement;
 use TYPO3\CMS\Core\Cache\CacheManager;
-use TYPO3\CMS\Core\Charset\CharsetConverter;
 use TYPO3\CMS\Core\Database\Connection;
 use TYPO3\CMS\Core\Database\ConnectionPool;
 use TYPO3\CMS\Core\Database\Query\QueryBuilder;
@@ -2320,8 +2319,7 @@ class ContentObjectRenderer
     public function stdWrap_csConv($content = '', $conf = [])
     {
         if (!empty($conf['csConv'])) {
-            $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
-            $output = $charsetConverter->conv($content, $charsetConverter->parse_charset($conf['csConv']), 'utf-8');
+            $output = mb_convert_encoding($content, 'utf-8', $conf['csConv']);
             return $output !== false && $output !== '' ? $output : $content;
         } else {
             return $content;
@@ -2520,9 +2518,7 @@ class ContentObjectRenderer
         $content = (string)$content === '' ? $GLOBALS['EXEC_TIME'] : (int)$content;
         $content = $conf['strftime.']['GMT'] ? gmstrftime($conf['strftime'], $content) : strftime($conf['strftime'], $content);
         if (!empty($conf['strftime.']['charset'])) {
-            /** @var CharsetConverter $charsetConverter */
-            $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
-            $output = $charsetConverter->conv($content, $charsetConverter->parse_charset($conf['strftime.']['charset']), 'utf-8');
+            $output = mb_convert_encoding($content, 'utf-8', $conf['strftime.']['charset']);
             return $output ?: $content;
         }
         return $content;
@@ -6308,7 +6304,6 @@ class ContentObjectRenderer
      */
     public function caseshift($theValue, $case)
     {
-        $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
         switch (strtolower($case)) {
             case 'upper':
                 $theValue = mb_strtoupper($theValue, 'utf-8');
@@ -6320,10 +6315,16 @@ class ContentObjectRenderer
                 $theValue = mb_convert_case($theValue, MB_CASE_TITLE, 'utf-8');
                 break;
             case 'ucfirst':
-                $theValue = $charsetConverter->convCaseFirst('utf-8', $theValue, 'toUpper');
+                $firstChar = mb_substr($theValue, 0, 1, 'utf-8');
+                $firstChar = mb_strtoupper($firstChar, 'utf-8');
+                $remainder = mb_substr($theValue, 1, null, 'utf-8');
+                $theValue = $firstChar . $remainder;
                 break;
             case 'lcfirst':
-                $theValue = $charsetConverter->convCaseFirst('utf-8', $theValue, 'toLower');
+                $firstChar = mb_substr($theValue, 0, 1, 'utf-8');
+                $firstChar = mb_strtolower($firstChar, 'utf-8');
+                $remainder = mb_substr($theValue, 1, null, 'utf-8');
+                $theValue = $firstChar . $remainder;
                 break;
             case 'uppercamelcase':
                 $theValue = GeneralUtility::underscoredToUpperCamelCase($theValue);
index 3f024a2..87b443b 100644 (file)
@@ -4426,9 +4426,7 @@ class TypoScriptFrontendController
 
         // Rendering charset of HTML page.
         if ($this->config['config']['metaCharset']) {
-            /** @var CharsetConverter $charsetConverter */
-            $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
-            $this->metaCharset = $charsetConverter->parse_charset($this->config['config']['metaCharset']);
+            $this->metaCharset = $this->config['config']['metaCharset'];
         }
     }
 
@@ -4479,14 +4477,29 @@ class TypoScriptFrontendController
     public function convPOSTCharset()
     {
         if ($this->metaCharset !== 'utf-8' && is_array($_POST) && !empty($_POST)) {
-            /** @var CharsetConverter $charsetConverter */
-            $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
-            $charsetConverter->convArray($_POST, $this->metaCharset, 'utf-8');
+            $this->convertCharsetRecursivelyToUtf8($_POST, $this->metaCharset);
             $GLOBALS['HTTP_POST_VARS'] = $_POST;
         }
     }
 
     /**
+     * Small helper function to convert charsets for arrays to UTF-8
+     *
+     * @param mixed $data given by reference (string/array usually)
+     * @param string $fromCharset convert FROM this charset
+     */
+    protected function convertCharsetRecursivelyToUtf8(&$data, string $fromCharset)
+    {
+        foreach ($data as $key => $value) {
+            if (is_array($data[$key])) {
+                $this->convertCharsetRecursivelyToUtf8($data[$key], $fromCharset);
+            } elseif (is_string($data[$key])) {
+                $data[$key] = mb_convert_encoding($data[$key], 'utf-8', $fromCharset);
+            }
+        }
+    }
+
+    /**
      * Calculates page cache timeout according to the records with starttime/endtime on the page.
      *
      * @return int Page cache timeout or PHP_INT_MAX if cannot be determined
index 1671dee..ea6a838 100644 (file)
@@ -17,7 +17,6 @@ namespace TYPO3\CMS\Frontend\Tests\Unit\ContentObject;
 use Psr\Log\LoggerInterface;
 use TYPO3\CMS\Core\Cache\CacheManager;
 use TYPO3\CMS\Core\Cache\Frontend\FrontendInterface as CacheFrontendInterface;
-use TYPO3\CMS\Core\Charset\CharsetConverter;
 use TYPO3\CMS\Core\Core\ApplicationContext;
 use TYPO3\CMS\Core\Log\LogManager;
 use TYPO3\CMS\Core\Resource\File;
@@ -184,9 +183,8 @@ class ContentObjectRendererTest extends \TYPO3\TestingFramework\Core\Unit\UnitTe
      */
     protected function handleCharset(&$subject, &$expected)
     {
-        $charsetConverter = new CharsetConverter();
-        $subject = $charsetConverter->conv($subject, 'iso-8859-1', 'utf-8');
-        $expected = $charsetConverter->conv($expected, 'iso-8859-1', 'utf-8');
+        $subject = mb_convert_encoding($subject, 'utf-8', 'iso-8859-1');
+        $expected = mb_convert_encoding($expected, 'utf-8', 'iso-8859-1');
     }
 
     /////////////////////////////////////////////
index fd83b27..08a6c13 100644 (file)
@@ -456,7 +456,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
             }
         }
         $title = $resultData['item_title'] . $resultData['titleaddition'];
-        $title = $this->charsetConverter->crop('utf-8', $title, $this->settings['results.']['titleCropAfter'], $this->settings['results.']['titleCropSignifier']);
+        $title = GeneralUtility::fixed_lgd_cs($title, $this->settings['results.']['titleCropAfter'], $this->settings['results.']['titleCropSignifier']);
         // If external media, link to the media-file instead.
         if ($row['item_type']) {
             if ($row['show_resume']) {
@@ -712,7 +712,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
                 }
             }
             if (!trim($markedSW)) {
-                $outputStr = $this->charsetConverter->crop('utf-8', $row['item_description'], $length, $this->settings['results.']['summaryCropSignifier']);
+                $outputStr = GeneralUtility::fixed_lgd_cs($row['item_description'], $length, $this->settings['results.']['summaryCropSignifier']);
                 $outputStr = htmlspecialchars($outputStr);
             }
             $output = $outputStr ?: $markedSW;
@@ -764,16 +764,16 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
                 if (!$k) {
                     // First entry at all (only cropped on the frontside)
                     if ($strLen > $postPreLgd) {
-                        $output[$k] = $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset)));
+                        $output[$k] = $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', GeneralUtility::fixed_lgd_cs($parts[$k], -($postPreLgd - $postPreLgd_offset)));
                     }
                 } elseif ($summaryLgd > $summaryMax || !isset($parts[$k + 1])) {
                     // In case summary length is exceed OR if there are no more entries at all:
                     if ($strLen > $postPreLgd) {
-                        $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider;
+                        $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', GeneralUtility::fixed_lgd_cs($parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider;
                     }
                 } else {
                     if ($strLen > $postPreLgd * 2) {
-                        $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset)));
+                        $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', GeneralUtility::fixed_lgd_cs($parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', GeneralUtility::fixed_lgd_cs($parts[$k], -($postPreLgd - $postPreLgd_offset)));
                     }
                 }
                 $summaryLgd += mb_strlen($output[$k], 'utf-8');
@@ -860,8 +860,10 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
         // shortening the string here is only a run-away feature!)
         $searchWords = substr($this->getSword(), 0, 200);
         // Convert to UTF-8 + conv. entities (was also converted during indexing!)
-        $searchWords = $this->charsetConverter->conv($searchWords, $GLOBALS['TSFE']->metaCharset, 'utf-8');
-        $searchWords = $this->charsetConverter->entities_to_utf8($searchWords);
+        if ($GLOBALS['TSFE']->metaCharset && $GLOBALS['TSFE']->metaCharset !== 'utf-8') {
+            $searchWords = mb_convert_encoding($searchWords, 'utf-8', $GLOBALS['TSFE']->metaCharset);
+            $searchWords = html_entity_decode($searchWords);
+        }
         $sWordArray = false;
         if ($hookObj = $this->hookRequest('getSearchWords')) {
             $sWordArray = $hookObj->getSearchWords_splitSWords($searchWords, $defaultOperator);
index d5b0e9f..edb68ed 100644 (file)
@@ -660,7 +660,7 @@ class FileContentParser
                 $fileContent = GeneralUtility::getUrl($absFile);
                 // Finding charset:
                 preg_match('/^[[:space:]]*<\\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i', substr($fileContent, 0, 200), $reg);
-                $charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) : 'utf-8';
+                $charset = $reg[1] ?: 'utf-8';
                 // Converting content:
                 $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace('<', ' <', $fileContent)), $charset);
                 $contentArr = $this->pObj->splitRegularContent($fileContent);
index b0cebd2..1ac9bd0 100644 (file)
@@ -674,13 +674,12 @@ class Indexer
     {
         // Find charset:
         $charset = $charset ?: $this->getHTMLcharset($content);
-        $charset = $this->csObj->parse_charset($charset);
         // Convert charset:
         if ($charset && $charset !== 'utf-8') {
-            $content = $this->csObj->conv($content, $charset, 'utf-8');
+            $content = mb_convert_encoding($content, 'utf-8', $charset);
         }
         // Convert entities, assuming document is now UTF-8:
-        return $this->csObj->entities_to_utf8($content);
+        return html_entity_decode($content);
     }
 
     /**
@@ -1270,10 +1269,10 @@ class Indexer
         foreach ($contentArr as $key => $value) {
             if ((string)$contentArr[$key] !== '') {
                 if ($charset !== 'utf-8') {
-                    $contentArr[$key] = $this->csObj->conv($contentArr[$key], $charset, 'utf-8');
+                    $contentArr[$key] = mb_convert_encoding($contentArr[$key], 'utf-8', $charset);
                 }
                 // decode all numeric / html-entities in the string to real characters:
-                $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key]);
+                $contentArr[$key] = html_entity_decode($contentArr[$key]);
             }
         }
     }