Commit e27614cd authored by Sybille Peters's avatar Sybille Peters 🙋 Committed by Georg Ringer
Browse files

[BUGFIX] Pad multibyte strings correctly

A new function StringUtility::multibyteStringPad() is
introduced to handle multibyte strings instead of
str_pad() which will not pad strings with multibyte
characters correctly.

The new function is now used for:

- Fluid ViewHelper format.padding
- TypoScript stdWrap.strPad

Resolves: #95189
Resolves: #95190
Releases: master
Change-Id: I4b8bf4b42e049b17754b260003ac064fbbb6246a
Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/71032

Tested-by: core-ci's avatarcore-ci <typo3@b13.com>
Tested-by: Benni Mack's avatarBenni Mack <benni@typo3.org>
Tested-by: Wouter Wolters's avatarWouter Wolters <typo3@wouterwolters.nl>
Tested-by: Georg Ringer's avatarGeorg Ringer <georg.ringer@gmail.com>
Reviewed-by: Benni Mack's avatarBenni Mack <benni@typo3.org>
Reviewed-by: Wouter Wolters's avatarWouter Wolters <typo3@wouterwolters.nl>
Reviewed-by: Georg Ringer's avatarGeorg Ringer <georg.ringer@gmail.com>
parent fd2be8b3
......@@ -166,4 +166,47 @@ class StringUtility
{
return implode(',', array_unique(GeneralUtility::trimExplode(',', $list, true)));
}
/**
* Works the same as str_pad() except that it correctly handles strings with multibyte characters
* and takes an additional optional argument $encoding.
*
* @param string $string
* @param int $length
* @param string $pad_string
* @param int $pad_type
* @param string $encoding
* @return string
*/
public static function multibyteStringPad(string $string, int $length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
{
$len = mb_strlen($string, $encoding);
$pad_string_len = mb_strlen($pad_string, $encoding);
if ($len >= $length || $pad_string_len === 0) {
return $string;
}
switch ($pad_type) {
case STR_PAD_RIGHT:
$string .= str_repeat($pad_string, (int)(($length - $len)/$pad_string_len));
$string .= mb_substr($pad_string, 0, ($length - $len) % $pad_string_len);
return $string;
case STR_PAD_LEFT:
$leftPad = str_repeat($pad_string, (int)(($length - $len)/$pad_string_len));
$leftPad .= mb_substr($pad_string, 0, ($length - $len) % $pad_string_len);
return $leftPad . $string;
case STR_PAD_BOTH:
$leftPadCount = (int)(($length - $len)/2);
$len += $leftPadCount;
$padded = ((int)($leftPadCount / $pad_string_len)) * $pad_string_len;
$leftPad = str_repeat($pad_string, (int)($leftPadCount / $pad_string_len));
$leftPad .= mb_substr($pad_string, 0, $leftPadCount - $padded);
$string = $leftPad . $string . str_repeat($pad_string, ($length - $len)/$pad_string_len);
$string .= mb_substr($pad_string, 0, ($length - $len) % $pad_string_len);
return $string;
}
return $string;
}
}
......@@ -182,4 +182,83 @@ class StringUtilityTest extends UnitTestCase
{
self::assertSame($unifiedList, StringUtility::uniqueList($initialList));
}
/**
* Data provider for multibyteStringPadReturnsSameValueAsStrPadForAsciiStrings
*
* @return \Generator
*/
public function multibyteStringPadReturnsSameValueAsStrPadForAsciiStringsDataProvider(): \Generator
{
yield 'Pad right to 10 with string with uneven length' => ['ABC', 10, ' ', STR_PAD_RIGHT];
yield 'Pad left to 10 with string with uneven length' => ['ABC', 10, ' ', STR_PAD_LEFT];
yield 'Pad both to 10 with string with uneven length' => ['ABC', 10, ' ', STR_PAD_BOTH];
yield 'Pad right to 10 with string with uneven length and 2 character padding' => ['ABC', 10, '12', STR_PAD_RIGHT];
yield 'Pad left to 10 with string with uneven length and 2 character padding' => ['ABC', 10, '12', STR_PAD_LEFT];
yield 'Pad both to 10 with string with uneven length and 2 character padding' => ['ABC', 10, '12', STR_PAD_BOTH];
yield 'Pad right to 10 with string with even length' => ['AB', 10, ' ', STR_PAD_RIGHT];
yield 'Pad left to 10 with string with even length' => ['AB', 10, ' ', STR_PAD_LEFT];
yield 'Pad both to 10 with string with even length' => ['AB', 10, ' ', STR_PAD_BOTH];
yield 'Pad right to 10 with string with even length and 2 character padding' => ['AB', 10, '12', STR_PAD_RIGHT];
yield 'Pad left to 10 with string with even length and 2 character padding' => ['AB', 10, '12', STR_PAD_LEFT];
yield 'Pad both to 10 with string with even length and 2 character padding' => ['AB', 10, '12', STR_PAD_BOTH];
}
/**
* @test
*
* Tests that StringUtility::multibyteStringPad() returns the same value as \str_pad()
* for ASCII strings.
*
* @param string $string
* @param int $length
* @param string $pad_string
* @param int $pad_type
*
* @dataProvider multibyteStringPadReturnsSameValueAsStrPadForAsciiStringsDataProvider
*/
public function multibyteStringPadReturnsSameValueAsStrPadForAsciiStrings(string $string, int $length, string $pad_string, int $pad_type): void
{
self::assertEquals(
str_pad($string, $length, $pad_string, $pad_type),
StringUtility::multibyteStringPad($string, $length, $pad_string, $pad_type)
);
}
public function multibyteStringPadReturnsCorrectResultsMultibyteDataProvider(): \Generator
{
yield 'Pad right to 8 with string with uneven length' => ['häh ', 'häh', 8, ' ', STR_PAD_RIGHT];
yield 'Pad left to 8 with string with uneven length' => [' häh', 'häh', 8, ' ', STR_PAD_LEFT];
yield 'Pad both to 8 with string with uneven length' => [' häh ', 'häh', 8, ' ', STR_PAD_BOTH];
yield 'Pad right to 8 with string with uneven length and 2 character padding' => ['hühäöäöä', 'hüh', 8, 'äö', STR_PAD_RIGHT];
yield 'Pad left to 8 with string with uneven length and 2 character padding' => ['äöäöähüh', 'hüh', 8, 'äö', STR_PAD_LEFT];
yield 'Pad both to 8 with string with uneven length and 2 character padding' => ['äöhühäöä', 'hüh', 8, 'äö', STR_PAD_BOTH];
yield 'Pad right to 8 with string with even length' => ['hä ', 'hä', 8, ' ', STR_PAD_RIGHT];
yield 'Pad left to 8 with string with even length' => [' hä', 'hä', 8, ' ', STR_PAD_LEFT];
yield 'Pad both to 8 with string with even length' => [' hä ', 'hä', 8, ' ', STR_PAD_BOTH];
yield 'Pad right to 8 with string with even length and 2 character padding with MB char' => ['hüäöäöäö', 'hü', 8, 'äö', STR_PAD_RIGHT];
yield 'Pad left to 8 with string with even length and 2 character padding with MB char' => ['äöäöäöhü', 'hü', 8, 'äö', STR_PAD_LEFT];
yield 'Pad both to 8 with string with even length and 2 character padding with MB char' => ['äöähüäöä', 'hü', 8, 'äö', STR_PAD_BOTH];
}
/**
* @test
*
* @param string $expectedResult
* @param string $string
* @param int $length
* @param string $pad_string
* @param int $pad_type
*
* @dataProvider multibyteStringPadReturnsCorrectResultsMultibyteDataProvider
*/
public function multibyteStringPadReturnsCorrectResultsMultibyte(string $expectedResult, string $string, int $length, string $pad_string, int $pad_type): void
{
self::assertEquals(
$expectedResult,
StringUtility::multibyteStringPad($string, $length, $pad_string, $pad_type)
);
}
}
......@@ -15,6 +15,7 @@
namespace TYPO3\CMS\Fluid\ViewHelpers\Format;
use TYPO3\CMS\Core\Utility\StringUtility;
use TYPO3Fluid\Fluid\Core\Rendering\RenderingContextInterface;
use TYPO3Fluid\Fluid\Core\ViewHelper\AbstractViewHelper;
use TYPO3Fluid\Fluid\Core\ViewHelper\Traits\CompileWithContentArgumentAndRenderStatic;
......@@ -102,6 +103,6 @@ class PaddingViewHelper extends AbstractViewHelper
$padType = 'right';
}
return str_pad($value, $arguments['padLength'], $arguments['padString'], $padTypes[$padType]);
return StringUtility::multibyteStringPad($value, $arguments['padLength'], $arguments['padString'], $padTypes[$padType]);
}
}
......@@ -1698,7 +1698,7 @@ class ContentObjectRenderer implements LoggerAwareInterface
$padType = STR_PAD_BOTH;
}
}
return str_pad($content, $length, $padWith, $padType);
return StringUtility::multibyteStringPad($content, $length, $padWith, $padType);
}
/**
......
......@@ -6804,6 +6804,13 @@ class ContentObjectRendererTest extends UnitTestCase
'length' => '10',
],
],
'pad string with default settings and length 10 and multibyte character' => [
'Älien ',
'Älien',
[
'length' => '10',
],
],
'pad string with padWith -= and type left and length 10' => [
'-=-=-Alien',
'Alien',
......@@ -6813,6 +6820,15 @@ class ContentObjectRendererTest extends UnitTestCase
'type' => 'left',
],
],
'pad string with padWith äö and type left and length 10 and multibyte characters' => [
'äöäöäÄlien',
'Älien',
[
'length' => '10',
'padWith' => 'äö',
'type' => 'left',
],
],
'pad string with padWith _ and type both and length 10' => [
'__Alien___',
'Alien',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment