Commit 1a1ea9e5 authored by Benni Mack's avatar Benni Mack
Browse files

[BUGFIX] Use mb_* methods directly instead of CharsetConverter

There are several places where mb_* methods are still not used, although
they are even better to use instead of the $csConv->parseCharset() method
for instance, as it has more aliases defined already.

Same goes for "entities_to_utf8" which is a PHP user-space implementation
of html_entity_decode() dated back to Kaspers' change in 2003.

Using native PHP methods should also increase performance marginally.

Resolves: #81575
Releases: master, 8.7
Change-Id: I5e97881cc7107883a5ff53d534161a7ec17e1ee5
Reviewed-on: https://review.typo3.org/53208


Tested-by: default avatarTYPO3com <no-reply@typo3.com>
Reviewed-by: Markus Klein's avatarMarkus Klein <markus.klein@typo3.org>
Tested-by: Markus Klein's avatarMarkus Klein <markus.klein@typo3.org>
Reviewed-by: Wouter Wolters's avatarWouter Wolters <typo3@wouterwolters.nl>
Reviewed-by: Anja Leichsenring's avatarAnja Leichsenring <aleichsenring@ab-softlab.de>
Tested-by: Anja Leichsenring's avatarAnja Leichsenring <aleichsenring@ab-softlab.de>
Reviewed-by: Benni Mack's avatarBenni Mack <benni@typo3.org>
Tested-by: Benni Mack's avatarBenni Mack <benni@typo3.org>
parent 0e3615ba
......@@ -15,7 +15,6 @@ namespace TYPO3\CMS\Backend\Form\Wizard;
*/
use TYPO3\CMS\Backend\Utility\BackendUtility;
use TYPO3\CMS\Core\Charset\CharsetConverter;
use TYPO3\CMS\Core\Database\Connection;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
......@@ -156,8 +155,6 @@ class SuggestWizardDefaultReceiver
->execute();
$allRowsCount = $result->rowCount();
if ($allRowsCount) {
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
while ($row = $result->fetch()) {
// check if we already have collected the maximum number of records
if (count($rows) > $this->maxItems) {
......@@ -172,12 +169,12 @@ class SuggestWizardDefaultReceiver
$spriteIcon = $this->iconFactory->getIconForRecord($this->table, $row, Icon::SIZE_SMALL)->render();
$uid = $row['t3ver_oid'] > 0 ? $row['t3ver_oid'] : $row['uid'];
$path = $this->getRecordPath($row, $uid);
if (strlen($path) > 30) {
if (mb_strlen($path, 'utf-8') > 30) {
$croppedPath = '<abbr title="' . htmlspecialchars($path) . '">' .
htmlspecialchars(
$charsetConverter->crop('utf-8', $path, 10)
mb_substr($path, 0, 10, 'utf-8')
. '...'
. $charsetConverter->crop('utf-8', $path, -20)
. mb_substr($path, -20, null, 'utf-8')
) .
'</abbr>';
} else {
......
......@@ -2087,9 +2087,24 @@ class PageRenderer implements \TYPO3\CMS\Core\SingletonInterface
}
$this->inlineLanguageLabelFiles = [];
// Convert settings back to UTF-8 since json_encode() only works with UTF-8:
if (TYPO3_MODE === 'FE' && $this->getCharSet() !== 'utf-8') {
if ($this->inlineSettings) {
$this->csConvObj->convArray($this->inlineSettings, $this->getCharSet(), 'utf-8');
if ($this->getCharSet() && $this->getCharSet() !== 'utf-8' && is_array($this->inlineSettings)) {
$this->convertCharsetRecursivelyToUtf8($this->inlineSettings, $this->getCharSet());
}
}
/**
* Small helper function to convert charsets for arrays into utf-8
*
* @param mixed $data given by reference (string/array usually)
* @param string $fromCharset convert FROM this charset
*/
protected function convertCharsetRecursivelyToUtf8(&$data, string $fromCharset)
{
foreach ($data as $key => $value) {
if (is_array($data[$key])) {
$this->convertCharsetRecursivelyToUtf8($data[$key], $fromCharset);
} elseif (is_string($data[$key])) {
$data[$key] = mb_convert_encoding($data[$key], 'utf-8', $fromCharset);
}
}
}
......
......@@ -15,7 +15,6 @@ namespace TYPO3\CMS\Core\Utility;
*/
use GuzzleHttp\Exception\RequestException;
use TYPO3\CMS\Core\Charset\CharsetConverter;
use TYPO3\CMS\Core\Core\ApplicationContext;
use TYPO3\CMS\Core\Core\ClassLoadingInformation;
use TYPO3\CMS\Core\Database\ConnectionPool;
......@@ -256,9 +255,15 @@ class GeneralUtility
*/
public static function fixed_lgd_cs($string, $chars, $appendString = '...')
{
/** @var CharsetConverter $charsetConverter */
$charsetConverter = self::makeInstance(\TYPO3\CMS\Core\Charset\CharsetConverter::class);
return $charsetConverter->crop('utf-8', $string, $chars, $appendString);
if ((int)$chars === 0 || mb_strlen($string, 'utf-8') <= abs($chars)) {
return $string;
}
if ($chars > 0) {
$string = mb_substr($string, 0, $chars, 'utf-8') . $appendString;
} else {
$string = $appendString . mb_substr($string, $len, mb_strlen($string, 'utf-8'), 'utf-8');
}
return $string;
}
/**
......
......@@ -14,8 +14,6 @@ namespace TYPO3\CMS\Fluid\ViewHelpers\Format;
* The TYPO3 project - inspiring people to share!
*/
use TYPO3\CMS\Core\Charset\CharsetConverter;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Fluid\Core\ViewHelper\AbstractViewHelper;
use TYPO3\CMS\Fluid\Core\ViewHelper\Exception\InvalidVariableException;
use TYPO3Fluid\Fluid\Core\Rendering\RenderingContextInterface;
......@@ -136,10 +134,16 @@ class CaseViewHelper extends AbstractViewHelper
$output = mb_strtoupper($value, 'utf-8');
break;
case self::CASE_CAPITAL:
$output = GeneralUtility::makeInstance(CharsetConverter::class)->convCaseFirst('utf-8', $value, 'toUpper');
$firstChar = mb_substr($value, 0, 1, 'utf-8');
$firstChar = mb_strtoupper($firstChar, 'utf-8');
$remainder = mb_substr($value, 1, null, 'utf-8');
$output = $firstChar . $remainder;
break;
case self::CASE_UNCAPITAL:
$output = GeneralUtility::makeInstance(CharsetConverter::class)->convCaseFirst('utf-8', $value, 'toLower');
$firstChar = mb_substr($value, 0, 1, 'utf-8');
$firstChar = mb_strtolower($firstChar, 'utf-8');
$remainder = mb_substr($value, 1, null, 'utf-8');
$output = $firstChar . $remainder;
break;
case self::CASE_CAPITAL_WORDS:
// @todo: Implement method once there is a proper solution with using the CharsetConverter
......
......@@ -17,7 +17,6 @@ namespace TYPO3\CMS\Frontend\ContentObject;
use Doctrine\DBAL\DBALException;
use Doctrine\DBAL\Driver\Statement;
use TYPO3\CMS\Core\Cache\CacheManager;
use TYPO3\CMS\Core\Charset\CharsetConverter;
use TYPO3\CMS\Core\Database\Connection;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
......@@ -2090,8 +2089,7 @@ class ContentObjectRenderer
public function stdWrap_csConv($content = '', $conf = [])
{
if (!empty($conf['csConv'])) {
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$output = $charsetConverter->conv($content, $charsetConverter->parse_charset($conf['csConv']), 'utf-8');
$output = mb_convert_encoding($content, 'utf-8', $conf['csConv']);
return $output !== false && $output !== '' ? $output : $content;
} else {
return $content;
......@@ -2290,9 +2288,7 @@ class ContentObjectRenderer
$content = (string)$content === '' ? $GLOBALS['EXEC_TIME'] : (int)$content;
$content = $conf['strftime.']['GMT'] ? gmstrftime($conf['strftime'], $content) : strftime($conf['strftime'], $content);
if (!empty($conf['strftime.']['charset'])) {
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$output = $charsetConverter->conv($content, $charsetConverter->parse_charset($conf['strftime.']['charset']), 'utf-8');
$output = mb_convert_encoding($content, 'utf-8', $conf['strftime.']['charset']);
return $output ?: $content;
}
return $content;
......@@ -5928,7 +5924,6 @@ class ContentObjectRenderer
*/
public function caseshift($theValue, $case)
{
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
switch (strtolower($case)) {
case 'upper':
$theValue = mb_strtoupper($theValue, 'utf-8');
......@@ -5940,10 +5935,16 @@ class ContentObjectRenderer
$theValue = mb_convert_case($theValue, MB_CASE_TITLE, 'utf-8');
break;
case 'ucfirst':
$theValue = $charsetConverter->convCaseFirst('utf-8', $theValue, 'toUpper');
$firstChar = mb_substr($theValue, 0, 1, 'utf-8');
$firstChar = mb_strtoupper($firstChar, 'utf-8');
$remainder = mb_substr($theValue, 1, null, 'utf-8');
$theValue = $firstChar . $remainder;
break;
case 'lcfirst':
$theValue = $charsetConverter->convCaseFirst('utf-8', $theValue, 'toLower');
$firstChar = mb_substr($theValue, 0, 1, 'utf-8');
$firstChar = mb_strtolower($firstChar, 'utf-8');
$remainder = mb_substr($theValue, 1, null, 'utf-8');
$theValue = $firstChar . $remainder;
break;
case 'uppercamelcase':
$theValue = GeneralUtility::underscoredToUpperCamelCase($theValue);
......
......@@ -4172,9 +4172,7 @@ class TypoScriptFrontendController
// Rendering charset of HTML page.
if ($this->config['config']['metaCharset']) {
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$this->metaCharset = $charsetConverter->parse_charset($this->config['config']['metaCharset']);
$this->metaCharset = $this->config['config']['metaCharset'];
}
}
......@@ -4200,13 +4198,28 @@ class TypoScriptFrontendController
public function convPOSTCharset()
{
if ($this->metaCharset !== 'utf-8' && is_array($_POST) && !empty($_POST)) {
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$charsetConverter->convArray($_POST, $this->metaCharset, 'utf-8');
$this->convertCharsetRecursivelyToUtf8($_POST, $this->metaCharset);
$GLOBALS['HTTP_POST_VARS'] = $_POST;
}
}
/**
* Small helper function to convert charsets for arrays to UTF-8
*
* @param mixed $data given by reference (string/array usually)
* @param string $fromCharset convert FROM this charset
*/
protected function convertCharsetRecursivelyToUtf8(&$data, string $fromCharset)
{
foreach ($data as $key => $value) {
if (is_array($data[$key])) {
$this->convertCharsetRecursivelyToUtf8($data[$key], $fromCharset);
} elseif (is_string($data[$key])) {
$data[$key] = mb_convert_encoding($data[$key], 'utf-8', $fromCharset);
}
}
}
/**
* Calculates page cache timeout according to the records with starttime/endtime on the page.
*
......
......@@ -17,7 +17,6 @@ namespace TYPO3\CMS\Frontend\Tests\Unit\ContentObject;
use Psr\Log\LoggerInterface;
use TYPO3\CMS\Core\Cache\CacheManager;
use TYPO3\CMS\Core\Cache\Frontend\FrontendInterface as CacheFrontendInterface;
use TYPO3\CMS\Core\Charset\CharsetConverter;
use TYPO3\CMS\Core\Core\ApplicationContext;
use TYPO3\CMS\Core\Log\LogManager;
use TYPO3\CMS\Core\Resource\File;
......@@ -184,9 +183,8 @@ class ContentObjectRendererTest extends \TYPO3\TestingFramework\Core\Unit\UnitTe
*/
protected function handleCharset(&$subject, &$expected)
{
$charsetConverter = new CharsetConverter();
$subject = $charsetConverter->conv($subject, 'iso-8859-1', 'utf-8');
$expected = $charsetConverter->conv($expected, 'iso-8859-1', 'utf-8');
$subject = mb_convert_encoding($subject, 'utf-8', 'iso-8859-1');
$expected = mb_convert_encoding($expected, 'utf-8', 'iso-8859-1');
}
/////////////////////////////////////////////
......
......@@ -456,7 +456,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
}
}
$title = $resultData['item_title'] . $resultData['titleaddition'];
$title = $this->charsetConverter->crop('utf-8', $title, $this->settings['results.']['titleCropAfter'], $this->settings['results.']['titleCropSignifier']);
$title = GeneralUtility::fixed_lgd_cs($title, $this->settings['results.']['titleCropAfter'], $this->settings['results.']['titleCropSignifier']);
// If external media, link to the media-file instead.
if ($row['item_type']) {
if ($row['show_resume']) {
......@@ -712,7 +712,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
}
}
if (!trim($markedSW)) {
$outputStr = $this->charsetConverter->crop('utf-8', $row['item_description'], $length, $this->settings['results.']['summaryCropSignifier']);
$outputStr = GeneralUtility::fixed_lgd_cs($row['item_description'], $length, $this->settings['results.']['summaryCropSignifier']);
$outputStr = htmlspecialchars($outputStr);
}
$output = $outputStr ?: $markedSW;
......@@ -764,16 +764,16 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
if (!$k) {
// First entry at all (only cropped on the frontside)
if ($strLen > $postPreLgd) {
$output[$k] = $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset)));
$output[$k] = $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', GeneralUtility::fixed_lgd_cs($parts[$k], -($postPreLgd - $postPreLgd_offset)));
}
} elseif ($summaryLgd > $summaryMax || !isset($parts[$k + 1])) {
// In case summary length is exceed OR if there are no more entries at all:
if ($strLen > $postPreLgd) {
$output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider;
$output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', GeneralUtility::fixed_lgd_cs($parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider;
}
} else {
if ($strLen > $postPreLgd * 2) {
$output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset)));
$output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', GeneralUtility::fixed_lgd_cs($parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', GeneralUtility::fixed_lgd_cs($parts[$k], -($postPreLgd - $postPreLgd_offset)));
}
}
$summaryLgd += mb_strlen($output[$k], 'utf-8');
......@@ -860,8 +860,10 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
// shortening the string here is only a run-away feature!)
$searchWords = substr($this->getSword(), 0, 200);
// Convert to UTF-8 + conv. entities (was also converted during indexing!)
$searchWords = $this->charsetConverter->conv($searchWords, $GLOBALS['TSFE']->metaCharset, 'utf-8');
$searchWords = $this->charsetConverter->entities_to_utf8($searchWords);
if ($GLOBALS['TSFE']->metaCharset && $GLOBALS['TSFE']->metaCharset !== 'utf-8') {
$searchWords = mb_convert_encoding($searchWords, 'utf-8', $GLOBALS['TSFE']->metaCharset);
$searchWords = html_entity_decode($searchWords);
}
$sWordArray = false;
if ($hookObj = $this->hookRequest('getSearchWords')) {
$sWordArray = $hookObj->getSearchWords_splitSWords($searchWords, $defaultOperator);
......
......@@ -660,7 +660,7 @@ class FileContentParser
$fileContent = GeneralUtility::getUrl($absFile);
// Finding charset:
preg_match('/^[[:space:]]*<\\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i', substr($fileContent, 0, 200), $reg);
$charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) : 'utf-8';
$charset = $reg[1] ?: 'utf-8';
// Converting content:
$fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace('<', ' <', $fileContent)), $charset);
$contentArr = $this->pObj->splitRegularContent($fileContent);
......
......@@ -674,13 +674,12 @@ class Indexer
{
// Find charset:
$charset = $charset ?: $this->getHTMLcharset($content);
$charset = $this->csObj->parse_charset($charset);
// Convert charset:
if ($charset && $charset !== 'utf-8') {
$content = $this->csObj->conv($content, $charset, 'utf-8');
$content = mb_convert_encoding($content, 'utf-8', $charset);
}
// Convert entities, assuming document is now UTF-8:
return $this->csObj->entities_to_utf8($content);
return html_entity_decode($content);
}
/**
......@@ -1270,10 +1269,10 @@ class Indexer
foreach ($contentArr as $key => $value) {
if ((string)$contentArr[$key] !== '') {
if ($charset !== 'utf-8') {
$contentArr[$key] = $this->csObj->conv($contentArr[$key], $charset, 'utf-8');
$contentArr[$key] = mb_convert_encoding($contentArr[$key], 'utf-8', $charset);
}
// decode all numeric / html-entities in the string to real characters:
$contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key]);
$contentArr[$key] = html_entity_decode($contentArr[$key]);
}
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment