Commit 0cad8803 authored by Markus Klein's avatar Markus Klein Committed by Benni Mack
Browse files

[TASK] Remove iconv support and use mbstring-polyfill

We remove the iconv support in the CharsetConverter as the PHP internal
implementation of some functions differ from their mb_string pendants.
This gives us a real headache in edge-cases.

We already have the mbstring-polyfill as an indirect dependency in
the Core (by symfony), so we simply require it our own now and use
mb_string all over the places.

This renders quite a few methods of CharsetConverter useless
and we therefore deprecate those.

Resolves: #78670
Releases: master
Change-Id: I88479f7939e0afb46f704d4e8f347abd2e10fc6d
Reviewed-on: https://review.typo3.org/50607


Tested-by: default avatarTYPO3com <no-reply@typo3.com>
Reviewed-by: Anja Leichsenring's avatarAnja Leichsenring <aleichsenring@ab-softlab.de>
Tested-by: Anja Leichsenring's avatarAnja Leichsenring <aleichsenring@ab-softlab.de>
Reviewed-by: Benni Mack's avatarBenni Mack <benni@typo3.org>
Tested-by: Benni Mack's avatarBenni Mack <benni@typo3.org>
parent edbaad16
......@@ -41,6 +41,7 @@
"symfony/console": "^2.7 || ^3.0",
"symfony/finder": "^2.7 || ^3.0",
"symfony/yaml": "^2.7 || ^3.0",
"symfony/polyfill-mbstring": "^1.2",
"doctrine/instantiator": "~1.0.4",
"typo3/class-alias-loader": "^1.0",
"typo3/cms-composer-installers": "^1.2.8",
......@@ -58,8 +59,7 @@
"se/selenium-server-standalone": "~2.53",
"7elix/styleguide": "~8.0.0",
"friendsofphp/php-cs-fixer": "^1.12",
"fiunchinho/phpunit-randomizer": "~2.0.3",
"symfony/polyfill-mbstring": "~1.0"
"fiunchinho/phpunit-randomizer": "~2.0.3"
},
"suggest": {
"ext-gd": "GDlib/Freetype is required for building images with text (GIFBUILDER) and can also be used to scale images",
......@@ -230,4 +230,4 @@
},
"classmap": ["typo3/sysext/extbase/Tests/Unit/Object/Container/Fixtures/"]
}
}
}
\ No newline at end of file
This diff is collapsed.
......@@ -340,9 +340,9 @@ class SearchFormController extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin
}
// Add operators for various languages
// Converts the operators to lowercase
$this->operator_translate_table[] = [$this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_AND'), 'toLower'), 'AND'];
$this->operator_translate_table[] = [$this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_OR'), 'toLower'), 'OR'];
$this->operator_translate_table[] = [$this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_NOT'), 'toLower'), 'AND NOT'];
$this->operator_translate_table[] = [mb_strtolower($this->pi_getLL('local_operator_AND'), 'utf-8'), 'AND'];
$this->operator_translate_table[] = [mb_strtolower($this->pi_getLL('local_operator_OR'), 'utf-8'), 'OR'];
$this->operator_translate_table[] = [mb_strtolower($this->pi_getLL('local_operator_NOT'), 'utf-8'), 'AND NOT'];
// This is the id of the site root. This value may be a commalist of integer (prepared for this)
$this->wholeSiteIdList = (int)$this->frontendController->config['rootLine'][0]['uid'];
// Creating levels for section menu:
......@@ -2173,7 +2173,7 @@ class SearchFormController extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin
foreach ($parts as $k => $strP) {
if ($k % 2 == 0) {
// Find length of the summary part:
$strLen = $this->charsetConverter->strlen('utf-8', $parts[$k]);
$strLen = mb_strlen($parts[$k], 'utf-8');
$output[$k] = $parts[$k];
// Possibly shorten string:
if (!$k) {
......@@ -2192,7 +2192,7 @@ class SearchFormController extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin
$output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset)));
}
}
$summaryLgd += $this->charsetConverter->strlen('utf-8', $output[$k]);
$summaryLgd += mb_strlen($output[$k], 'utf-8');
// Protect output:
$output[$k] = htmlspecialchars($output[$k]);
// If summary lgd is exceed, break the process:
......@@ -2200,7 +2200,7 @@ class SearchFormController extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin
break;
}
} else {
$summaryLgd += $this->charsetConverter->strlen('utf-8', $strP);
$summaryLgd += mb_strlen($strP, 'utf-8');
$output[$k] = '<strong class="tx-indexedsearch-redMarkup">' . htmlspecialchars($parts[$k]) . '</strong>';
}
}
......
......@@ -21,7 +21,6 @@ use TYPO3\CMS\Backend\Utility\BackendUtility;
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
use TYPO3\CMS\Core\Cache\CacheManager;
use TYPO3\CMS\Core\Cache\Frontend\VariableFrontend;
use TYPO3\CMS\Core\Charset\CharsetConverter;
use TYPO3\CMS\Core\Configuration\FlexForm\FlexFormTools;
use TYPO3\CMS\Core\Database\Connection;
use TYPO3\CMS\Core\Database\ConnectionPool;
......@@ -1765,9 +1764,7 @@ class DataHandler
}
// Secures the string-length to be less than max.
if ((int)$tcaFieldConf['max'] > 0) {
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$value = $charsetConverter->substr('utf-8', (string)$value, 0, (int)$tcaFieldConf['max']);
$value = mb_substr((string)$value, 0, (int)$tcaFieldConf['max'], 'utf-8');
}
// Checking range of value:
// @todo: The "checkbox" option was removed for type=input, this check could be probably relaxed?
......@@ -2750,14 +2747,10 @@ class DataHandler
$value = trim($value);
break;
case 'upper':
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$value = $charsetConverter->conv_case('utf-8', $value, 'toUpper');
$value = mb_strtoupper($value, 'utf-8');
break;
case 'lower':
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$value = $charsetConverter->conv_case('utf-8', $value, 'toLower');
$value = mb_strtolower($value, 'utf-8');
break;
case 'required':
if (!isset($value) || $value === '') {
......
......@@ -72,10 +72,10 @@ class JavaScriptEncoder implements \TYPO3\CMS\Core\SingletonInterface
*/
public function encode($input)
{
$stringLength = $this->charsetConversion->strlen('utf-8', $input);
$stringLength = mb_strlen($input, 'utf-8');
$encodedString = '';
for ($i = 0; $i < $stringLength; $i++) {
$c = $this->charsetConversion->substr('utf-8', $input, $i, 1);
$c = mb_substr($input, $i, 1, 'utf-8');
$encodedString .= $this->encodeCharacter($c);
}
return $encodedString;
......
......@@ -86,7 +86,7 @@ abstract class AbstractHierarchicalFilesystemDriver extends AbstractDriver
$fileIdentifier = $this->canonicalizeAndCheckFilePath($fileIdentifier);
$fileIdentifier = '/' . ltrim($fileIdentifier, '/');
if (!$this->isCaseSensitiveFileSystem()) {
$fileIdentifier = $this->getCharsetConversion()->conv_case('utf-8', $fileIdentifier, 'toLower');
$fileIdentifier = mb_strtolower($fileIdentifier, 'utf-8');
}
}
return $fileIdentifier;
......
......@@ -1034,7 +1034,7 @@ class GeneralUtility
*
* @param string $str Input string
* @return string Uppercase String
* @deprecated since TYPO3 CMS v8, this method will be removed in TYPO3 CMS v9, Use \TYPO3\CMS\Core\Charset\CharsetConverter->conv_case() instead
* @deprecated since TYPO3 CMS v8, this method will be removed in TYPO3 CMS v9, use mb_strtoupper() instead
*/
public static function strtoupper($str)
{
......@@ -1050,7 +1050,7 @@ class GeneralUtility
*
* @param string $str Input string
* @return string Lowercase String
* @deprecated since TYPO3 CMS v8, this method will be removed in TYPO3 CMS v9, Use \TYPO3\CMS\Core\Charset\CharsetConverter->conv_case() instead
* @deprecated since TYPO3 CMS v8, this method will be removed in TYPO3 CMS v9, use mb_strtolower() instead
*/
public static function strtolower($str)
{
......@@ -1136,9 +1136,8 @@ class GeneralUtility
*/
public static function camelCaseToLowerCaseUnderscored($string)
{
$charsetConverter = self::makeInstance(\TYPO3\CMS\Core\Charset\CharsetConverter::class);
$value = preg_replace('/(?<=\\w)([A-Z])/', '_\\1', $string);
return $charsetConverter->conv_case('utf-8', $value, 'toLower');
return mb_strtolower($value, 'utf-8');
}
/**
......
......@@ -11,7 +11,7 @@ Description
===========
The support for GNU-recode when converting from one charset to another has been dropped. The CharsetConverter
now only supports `mbstring` and `iconv` as well as the home-made TYPO3-internal conversion.
now only supports `mbstring`. In case `mbstring` is not present a polyfill will kick in.
Impact
......@@ -30,7 +30,6 @@ Installations that have the option `$TYPO3_CONF_VARS[SYS][t3lib_cs_convMethod]`
Migration
=========
Use the Install Tool and the Preset information to see which other, better supported conversion libraries (mbstring
or iconv) are available.
No migration.
.. index:: PHP-API, LocalConfiguration
......@@ -11,18 +11,13 @@ Description
===========
The Charset Converter which is used to handle multi-byte charset conversions now
auto-detects which conversion strategy - either `mbstring`, `iconv` or the
TYPO3-internal functionality - should be used, based on the available PHP modules
in the system.
`mbstring` takes precedence over `iconv` and the TYPO3-internal functionality.
always uses `mbstring`. In case `mbstring` is not present a polyfill will kick in.
Impact
======
The options `$TYPO3_CONF_VARS['SYS'][t3lib_cs_utils]` and
`$TYPO3_CONF_VARS[SYS][t3lib_cs_convMethod]` have no effect anymore and can be
removed. TYPO3 chooses the best strategy at runtime.
removed.
.. index:: LocalConfiguration
......@@ -35,17 +35,15 @@ Instead of :php:`GeneralUtility::strtoupper($value)` use:
.. code-block:: php
$charsetConverter = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Charset\CharsetConverter::class);
$charsetConverter->conv_case('utf-8', $value, 'toUpper');
mb_strtoupper($value, 'utf-8');
Instead of :php:`GeneralUtility::strtolower($value)` use:
.. code-block:: php
$charsetConverter = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Charset\CharsetConverter::class);
$charsetConverter->conv_case('utf-8', $value, 'toLower');
mb_strtolower($value, 'utf-8');
Alternatively use the native implementation of :php:`strtoupper($value)` or :php:`strtolower($value)`
if the handled string consists of ascii characters only and has no multi-byte characters like umlauts.
if the handled string consists of ascii characters only.
.. index:: PHP-API
\ No newline at end of file
.. index:: PHP-API
.. include:: ../../Includes.txt
=========================================================
Deprecation: #78670 - Deprecated CharsetConverter methods
=========================================================
See :issue:`78670`
Description
===========
The ``symfony/polyfill-mbstring`` package provides us with mb_string functionality in all installations.
Therefore some methods of :php:`CharsetConverter` have been deprecated, since the equivalent mb_string functions can be used directly:
- :php:`strlen()`: use :php:`mb_strlen()` directly
- :php:`substr()`: use :php:`mb_substr()` directly
- :php:`convCapitalize()`: use :php:`mb_convert_case()` directly
- :php:`conv_case()`: use :php:`mb_strtolower()` or :php:`mb_strtoupper()` directly
- :php:`utf8_substr()`: use :php:`mb_substr()` directly
- :php:`utf8_strlen()`: use :php:`mb_strlen()` directly
- :php:`utf8_strtrunc()`: use :php:`mb_strcut()` directly
- :php:`utf8_strpos()`: use :php:`mb_strpos()` directly
- :php:`utf8_strrpos()`: use :php:`mb_strrpos()` directly
- :php:`utf8_byte2char_pos()`: no replacement
- :php:`euc_strtrunc()`: use :php:`mb_strcut()` directly
- :php:`euc_substr()`: use :php:`mb_substr()` directly
- :php:`euc_strlen()`: use :php:`mb_strlen()` directly
- :php:`euc_char2byte_pos()`: no replacement
- :php:`$fourByteSets`: no replacement
Impact
======
Calling the deprecated :php:`CharsetConverter` methods will trigger a deprecation log entry.
Affected Installations
======================
Any installation using third party extensions leveraging the mentioned :php:`CharsetConverter` functionalities.
Migration
=========
Use the equivalent mb_string methods directly as denoted above.
.. index:: PHP-API
......@@ -483,7 +483,7 @@ class Query implements QueryInterface
$comparison = $this->qomFactory->comparison(
$this->qomFactory->lowerCase($this->qomFactory->propertyValue($propertyName, $this->getSelectorName())),
QueryInterface::OPERATOR_EQUAL_TO,
\TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(\TYPO3\CMS\Core\Charset\CharsetConverter::class)->conv_case(\TYPO3\CMS\Extbase\Persistence\Generic\Query::CHARSET, $operand, 'toLower')
mb_strtolower($operand, \TYPO3\CMS\Extbase\Persistence\Generic\Query::CHARSET)
);
}
return $comparison;
......
......@@ -98,11 +98,6 @@ class CaseViewHelper extends AbstractViewHelper
*/
protected $escapeChildren = false;
/**
* @var NULL|CharsetConverter
*/
protected static $charsetConverter = null;
/**
* Initialize ViewHelper arguments
*
......@@ -150,23 +145,18 @@ class CaseViewHelper extends AbstractViewHelper
$value = $renderChildrenClosure();
}
if (is_null(static::$charsetConverter)) {
static::$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
}
$charsetConverter = static::$charsetConverter;
switch ($mode) {
case self::CASE_LOWER:
$output = $charsetConverter->conv_case('utf-8', $value, 'toLower');
$output = mb_strtolower($value, 'utf-8');
break;
case self::CASE_UPPER:
$output = $charsetConverter->conv_case('utf-8', $value, 'toUpper');
$output = mb_strtoupper($value, 'utf-8');
break;
case self::CASE_CAPITAL:
$output = $charsetConverter->substr('utf-8', $charsetConverter->convCaseFirst('utf-8', $value, 'toUpper'), 0, 1) . $charsetConverter->substr('utf-8', $value, 1);
$output = GeneralUtility::makeInstance(CharsetConverter::class)->convCaseFirst('utf-8', $value, 'toUpper');
break;
case self::CASE_UNCAPITAL:
$output = $charsetConverter->substr('utf-8', $charsetConverter->convCaseFirst('utf-8', $value, 'toLower'), 0, 1) . $charsetConverter->substr('utf-8', $value, 1);
$output = GeneralUtility::makeInstance(CharsetConverter::class)->convCaseFirst('utf-8', $value, 'toLower');
break;
case self::CASE_CAPITAL_WORDS:
// @todo: Implement method once there is a proper solution with using the CharsetConverter
......
......@@ -3828,13 +3828,11 @@ class ContentObjectRenderer
*/
public function substring($content, $options)
{
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$options = GeneralUtility::intExplode(',', $options . ',');
if ($options[1]) {
return $charsetConverter->substr('utf-8', $content, $options[0], $options[1]);
return mb_substr($content, $options[0], $options[1], 'utf-8');
} else {
return $charsetConverter->substr('utf-8', $content, $options[0]);
return mb_substr($content, $options[0], null, 'utf-8');
}
}
......@@ -3854,18 +3852,16 @@ class ContentObjectRenderer
$afterstring = trim($options[1]);
$crop2space = trim($options[2]);
if ($chars) {
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
if ($charsetConverter->strlen('utf-8', $content) > abs($chars)) {
if (mb_strlen($content, 'utf-8') > abs($chars)) {
$truncatePosition = false;
if ($chars < 0) {
$content = $charsetConverter->substr('utf-8', $content, $chars);
$content = mb_substr($content, $chars, null, 'utf-8');
if ($crop2space) {
$truncatePosition = strpos($content, ' ');
}
$content = $truncatePosition ? $afterstring . substr($content, $truncatePosition) : $afterstring . $content;
} else {
$content = $charsetConverter->substr('utf-8', $content, 0, $chars);
$content = mb_substr($content, 0, $chars, 'utf-8');
if ($crop2space) {
$truncatePosition = strrpos($content, ' ');
}
......@@ -3947,13 +3943,11 @@ class ContentObjectRenderer
$strLen = 0;
// This is the offset of the content item which was cropped.
$croppedOffset = null;
/** @var CharsetConverter $charsetConverter */
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$countSplittedContent = count($splittedContent);
for ($offset = 0; $offset < $countSplittedContent; $offset++) {
if ($offset % 2 === 0) {
$tempContent = $splittedContent[$offset];
$thisStrLen = $charsetConverter->strlen('utf-8', html_entity_decode($tempContent, ENT_COMPAT, 'UTF-8'));
$thisStrLen = mb_strlen(html_entity_decode($tempContent, ENT_COMPAT, 'UTF-8'), 'utf-8');
if ($strLen + $thisStrLen > $absChars) {
$croppedOffset = $offset;
$cropPosition = $absChars - $strLen;
......@@ -6768,13 +6762,13 @@ class ContentObjectRenderer
$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
switch (strtolower($case)) {
case 'upper':
$theValue = $charsetConverter->conv_case('utf-8', $theValue, 'toUpper');
$theValue = mb_strtoupper($theValue, 'utf-8');
break;
case 'lower':
$theValue = $charsetConverter->conv_case('utf-8', $theValue, 'toLower');
$theValue = mb_strtolower($theValue, 'utf-8');
break;
case 'capitalize':
$theValue = $charsetConverter->convCapitalize('utf-8', $theValue);
$theValue = mb_convert_case($theValue, MB_CASE_TITLE, 'utf-8');
break;
case 'ucfirst':
$theValue = $charsetConverter->convCaseFirst('utf-8', $theValue, 'toUpper');
......
......@@ -583,7 +583,7 @@ class GifBuilder extends GraphicalFunctions
// Max length = 100 if automatic line braks are not defined:
if (!isset($conf['breakWidth']) || !$conf['breakWidth']) {
$tlen = (int)$conf['textMaxLength'] ?: 100;
$conf['text'] = $this->csConvObj->substr('utf-8', $conf['text'], 0, $tlen);
$conf['text'] = mb_substr($conf['text'], 0, $tlen, 'utf-8');
}
if ((string)$conf['text'] != '') {
// Char range map thingie:
......
......@@ -4616,7 +4616,7 @@ class ContentObjectRendererTest extends UnitTestCase
return [
'empty string from ISO-8859-15' => [
'',
iconv('UTF-8', 'ISO-8859-15', ''),
mb_convert_encoding('', 'ISO-8859-15', 'UTF-8'),
['csConv' => 'ISO-8859-15']
],
'empty string from BIG-5' => [
......@@ -4626,7 +4626,7 @@ class ContentObjectRendererTest extends UnitTestCase
],
'"0" from ISO-8859-15' => [
'0',
iconv('UTF-8', 'ISO-8859-15', '0'),
mb_convert_encoding('0', 'ISO-8859-15', 'UTF-8'),
['csConv' => 'ISO-8859-15']
],
'"0" from BIG-5' => [
......@@ -4636,7 +4636,7 @@ class ContentObjectRendererTest extends UnitTestCase
],
'euro symbol from ISO-88859-15' => [
'€',
iconv('UTF-8', 'ISO-8859-15', ''),
mb_convert_encoding('€', 'ISO-8859-15', 'UTF-8'),
['csConv' => 'ISO-8859-15']
],
'good morning from BIG-5' => [
......
......@@ -758,7 +758,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
foreach ($parts as $k => $strP) {
if ($k % 2 == 0) {
// Find length of the summary part:
$strLen = $this->charsetConverter->strlen('utf-8', $parts[$k]);
$strLen = mb_strlen($parts[$k], 'utf-8');
$output[$k] = $parts[$k];
// Possibly shorten string:
if (!$k) {
......@@ -776,7 +776,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
$output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset)));
}
}
$summaryLgd += $this->charsetConverter->strlen('utf-8', $output[$k]);
$summaryLgd += mb_strlen($output[$k], 'utf-8');
// Protect output:
$output[$k] = htmlspecialchars($output[$k]);
// If summary lgd is exceed, break the process:
......@@ -784,7 +784,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
break;
}
} else {
$summaryLgd += $this->charsetConverter->strlen('utf-8', $strP);
$summaryLgd += mb_strlen($strP, 'utf-8');
$output[$k] = '<strong class="tx-indexedsearch-redMarkup">' . htmlspecialchars($parts[$k]) . '</strong>';
}
}
......@@ -880,9 +880,9 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
['-', 'AND NOT'],
// Add operators for various languages
// Converts the operators to lowercase
[$this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandAnd', 'IndexedSearch'), 'toLower'), 'AND'],
[$this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandOr', 'IndexedSearch'), 'toLower'), 'OR'],
[$this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandNot', 'IndexedSearch'), 'toLower'), 'AND NOT']
[mb_strtolower(LocalizationUtility::translate('localizedOperandAnd', 'IndexedSearch'), 'utf-8'), 'AND'],
[mb_strtolower(LocalizationUtility::translate('localizedOperandOr', 'IndexedSearch'), 'utf-8'), 'OR'],
[mb_strtolower(LocalizationUtility::translate('localizedOperandNot', 'IndexedSearch'), 'utf-8'), 'AND NOT']
];
$swordArray = \TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::getExplodedSearchString($searchWords, $defaultOperator == 1 ? 'OR' : 'AND', $operatorTranslateTable);
if (is_array($swordArray)) {
......
......@@ -1324,7 +1324,7 @@ class Indexer
if ($maxL) {
$bodyDescription = preg_replace('/\s+/u', ' ', $contentArr['body']);
// Shorten the string:
$bodyDescription = $this->csObj->strtrunc('utf-8', $bodyDescription, $maxL);
$bodyDescription = mb_strcut($bodyDescription, 0, $maxL, 'utf-8');
}
return $bodyDescription;
}
......
......@@ -76,7 +76,7 @@ class Lexer
$this->debugString = '';
// Then convert the string to lowercase:
if (!$this->lexerConf['casesensitive']) {
$wordString = $this->csObj->conv_case('utf-8', $wordString, 'toLower');
$wordString = mb_strtolower($wordString, 'utf-8');
}
// Now, splitting words:
$len = 0;
......@@ -136,11 +136,11 @@ class Lexer
*/
if ($cType == 'cjk') {
// Find total string length:
$strlen = $this->csObj->strlen('utf-8', $theWord);
$strlen = mb_strlen($theWord, 'utf-8');
// Traverse string length and add words as pairs of two chars:
for ($a = 0; $a < $strlen; $a++) {
if ($strlen == 1 || $a < $strlen - 1) {
$words[] = $this->csObj->substr('utf-8', $theWord, $a, 2);
$words[] = mb_substr($theWord, $a, 2, 'utf-8');
}
}
} else {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment