Commit 3a50f21c authored by Kasper Skårhøj's avatar Kasper Skårhøj
Browse files

See four lines in changelog, 2004-05-18. Basically a lot of fixes for...

See four lines in changelog, 2004-05-18. Basically a lot of fixes for character set handling, especially in the frontend.


git-svn-id: https://svn.typo3.org/TYPO3v4/Core/trunk@311 709f56b5-9817-0410-a4d7-c38de5d9e867
parent 92ed0e38
2004-05-18 Kasper Skårhøj,,, <kasper@typo3.com>
* Added t3lib_cs::specCharsToASCII() for converting special chars (like umlauts) to their double-byte alternatives in ASCII (like au, oe etc...). Function is NOT finished at all, only added so I could use it for the conversion of filenames in simulateStaticDocuments.
* TypoScript charset compatibility extended: tslib_cObj::caseshift(), tslib_cObj::substring(), tslib_cObj::crop() uses functions in t3lib_cs now. Also stdWrap.strftime will automatically convert localized string from locale charset (guessed by t3lib_cs) to renderCharset.
* !!! website charset is now taken from "forceCharset" by default (if found) and in any case the http-header with text/html and charset is sent UNLESS you disable it with "config.disableCharsetHeader=1". You will probably have to disable this header if you are using TYPO3 for XML feeds or wap-pages.
* Added "config.renderCharset" option: This is the charset of the content while rendered in the frontend engine. If different from "metaCharset" a conversion must happen before output to browser. Both renderCharset and metaCharset takes their default values from TYPO3_CONF_VARS[BE][forceCharset] if found, otherwise they default to "iso-8859-1". Also "metaCharset" takes "renderCharset" as default - and if metaCharset is different from renderCharset a conversion will happen on output.
2004-05-18 Kasper Skårhøj,,, <kasper@typo3.com>
* Created new function, t3lib_div::fixed_lgd_cs(), which should be used in the backend wherever a string is shortend for visual display. This function will truncate the string according to the backend charset (which should of course be set by forceCharset to a fixed value). This new function has also been substituted numerous places in the source of course (hence all the script updates).
......
......@@ -480,6 +480,20 @@ class t3lib_cs {
return $cs ? $cs : 'iso-8859-1';
}
/********************************************
*
* Charset Conversion functions
*
********************************************/
/**
* Convert from one charset to another charset.
*
......@@ -1230,7 +1244,32 @@ class t3lib_cs {
return $out;
}
/**
* Converts special chars (like , umlauts etc) to ascii equivalents (usually double-bytes, like => ae etc.)
* CURRENTLY IT IS NOT FULLY IMPLEMENTED!!!
*
* @param string Character set of string
* @param string Input string to convert
* @return string The converted string
*/
function specCharsToASCII($charset,$string) {
if ($charset == 'utf-8') {
$pat = array (
'/'.$this->utf8_encode('', 'iso-8859-1').'/',
'/'.$this->utf8_encode('', 'iso-8859-1').'/',
'/'.$this->utf8_encode('', 'iso-8859-1').'/',
'/'.$this->utf8_encode('', 'iso-8859-1').'/',
'/'.$this->utf8_encode('', 'iso-8859-1').'/',
'/'.$this->utf8_encode('', 'iso-8859-1').'/',
);
$repl = array ( 'ae', 'oe', 'aa', 'AE', 'OE', 'AA');
$string = preg_replace($pat,$repl,$string);
} else {
$string = t3lib_div::convUmlauts($string);
}
return $string;
}
......
......@@ -881,7 +881,7 @@ class t3lib_div {
/**
* Change umlaut characters to plain ASCII with normally two character target
* Only known characters will be converted, so don't expect a result for any character.
* Works only for western europe single-byte charsets!
* (DEPRECIATED: Works only for western europe single-byte charsets! Use t3lib_cs::specCharsToASCII() instead!)
*
* => ae, => Oe
*
......
......@@ -2992,7 +2992,13 @@ class tslib_cObj {
if ((string)$conf['char']!=''){$content=chr(intval($conf['char']));}
if ($conf['intval']){$content=intval($content);}
if ($conf['date']){$content=date($conf['date'], $content);}
if ($conf['strftime']){$content=strftime($conf['strftime'], $content);}
if ($conf['strftime']){
$content = strftime($conf['strftime'], $content);
$tmp_charset = $conf['strftime.']['charset'] ? $conf['strftime.']['charset'] : $GLOBALS['TSFE']->localeCharset;
if ($tmp_charset) {
$content = $GLOBALS['TSFE']->csConv($content,$tmp_charset);
}
}
if ($conf['age']){$content=$this->calcAge(time()-$content,$conf['age']);}
if ($conf['case']){$content=$this->HTMLcaseshift($content, $conf['case']);}
......@@ -3336,11 +3342,11 @@ class tslib_cObj {
do {
if (!$inside) {
$len = strcspn(substr($str,$pointer),'{');
$newVal.=substr($str,$pointer,$len);
$newVal.= substr($str,$pointer,$len);
$inside = 1;
} else {
$len = strcspn(substr($str,$pointer),'}')+1;
$newVal.=$this->getData(substr($str,$pointer+1,$len-2),$this->data);
$newVal.= $this->getData(substr($str,$pointer+1,$len-2),$this->data);
$inside = 0;
}
$pointer+=$len;
......@@ -3385,9 +3391,9 @@ class tslib_cObj {
function substring($content,$options) {
$options = t3lib_div::intExplode(',',$options.',');
if ($options[1]) {
return substr($content,$options[0],$options[1]);
return $GLOBALS['TSFE']->csConvObj->substr($GLOBALS['TSFE']->renderCharset,$content,$options[0],$options[1]);
} else {
return substr($content,$options[0]);
return $GLOBALS['TSFE']->csConvObj->substr($GLOBALS['TSFE']->renderCharset,$content,$options[0]);
}
}
......@@ -3402,14 +3408,14 @@ class tslib_cObj {
*/
function crop($content,$options) {
$options = explode('|',$options);
$chars=intval($options[0]);
$afterstring=trim($options[1]);
$chars = intval($options[0]);
$afterstring = trim($options[1]);
if ($chars) {
if (strlen($content)>abs($chars)) {
if ($chars<0) {
$content= $afterstring.substr($content,$chars);
$content = $afterstring.$GLOBALS['TSFE']->csConvObj->substr($GLOBALS['TSFE']->renderCharset,$content,$chars);
} else {
$content= substr($content,0,$chars).$afterstring;
$content = $GLOBALS['TSFE']->csConvObj->substr($GLOBALS['TSFE']->renderCharset,$content,0,$chars).$afterstring;
}
}
}
......@@ -5319,12 +5325,14 @@ class tslib_cObj {
$case = strtolower($case);
switch($case) {
case 'upper':
$theValue = strtoupper($theValue);
$theValue = strtr($theValue, $this->caseConvStrings[0], $this->caseConvStrings[1]);
$theValue = $GLOBALS['TSFE']->csConvObj->conv_case($GLOBALS['TSFE']->renderCharset,$theValue,'toUpper');
#$theValue = strtoupper($theValue);
#$theValue = strtr($theValue, $this->caseConvStrings[0], $this->caseConvStrings[1]);
break;
case 'lower':
$theValue = strtolower($theValue);
$theValue = strtr($theValue, $this->caseConvStrings[1], $this->caseConvStrings[0]);
$theValue = $GLOBALS['TSFE']->csConvObj->conv_case($GLOBALS['TSFE']->renderCharset,$theValue,'toLower');
#$theValue = strtolower($theValue);
#$theValue = strtr($theValue, $this->caseConvStrings[1], $this->caseConvStrings[0]);
break;
}
return $theValue;
......@@ -5339,18 +5347,18 @@ class tslib_cObj {
* @see caseshift()
*/
function HTMLcaseshift($theValue, $case) {
$inside=0;
$newVal='';
$pointer=0;
$inside = 0;
$newVal = '';
$pointer = 0;
$totalLen = strlen($theValue);
do {
if (!$inside) {
$len = strcspn(substr($theValue,$pointer),'<');
$newVal.=$this->caseshift(substr($theValue,$pointer,$len),$case);
$newVal.= $this->caseshift(substr($theValue,$pointer,$len),$case);
$inside = 1;
} else {
$len = strcspn(substr($theValue,$pointer),'>')+1;
$newVal.=substr($theValue,$pointer,$len);
$newVal.= substr($theValue,$pointer,$len);
$inside = 0;
}
$pointer+=$len;
......
......@@ -292,10 +292,6 @@
// Page content render object
var $cObj =''; // is instantiated object of tslib_cObj
// Character set (charset) conversion object:
var $csConvObj; // An instance of the "t3lib_cs" class. May be used by any application.
var $defaultCharSet='iso-8859-1'; // The default charset used in the frontend if nothing else is set.
// CONTENT accumulation
var $content=''; // All page content is accumulated in this variable. See pagegen.php
......@@ -304,11 +300,17 @@
var $scriptParseTime=0;
var $TCAloaded = 0; // Set ONLY if the full TCA is loaded
// Character set (charset) conversion object:
var $csConvObj; // An instance of the "t3lib_cs" class. May be used by any application.
var $defaultCharSet = 'iso-8859-1'; // The default charset used in the frontend if nothing else is set.
var $renderCharset=''; // Internal charset of the frontend during rendering: Defaults to "forceCharset" and if that is not set, to ->defaultCharSet
var $metaCharset=''; // Output charset of the websites content. This is the charset found in the header, meta tag etc. If different from $renderCharset a conversion happens before output to browser. Defaults to ->renderCharset if not set.
var $localeCharset=''; // Assumed charset of locale strings.
// LANG:
var $lang=''; // Set to the system language key (used on the site)
var $langSplitIndex=0; // Set to the index number of the language key
var $labelsCharset=''; // Charset of the labels from locallang (based on $this->lang)
var $siteCharset=''; // Charset of the website.
var $convCharsetToFrom=''; // Set to the charsets to convert from/to IF there are any difference. Otherwise this stays a string
var $LL_labels_cache=array();
var $LL_files_cache=array();
......@@ -1301,6 +1303,10 @@
exit;
}
}
// Initialize charset settings etc.
$this->initLLvars();
// No cache
if ($this->config['config']['no_cache']) {$this->set_no_cache();} // Set $this->no_cache true if the config.no_cache value is set!
......@@ -1879,6 +1885,8 @@
setlocale(LC_CTYPE,$this->config['config']['locale_all']);
setlocale(LC_MONETARY,$this->config['config']['locale_all']);
setlocale(LC_TIME,$this->config['config']['locale_all']);
$this->localeCharset = $this->csConvObj->get_locale_charset($this->config['config']['locale_all']);
}
// Setting cache_timeout_default. May be overridden by PHP include scritps.
......@@ -2176,9 +2184,9 @@ if (version == "n3") {
$this->content = str_replace($this->getMethodUrlIdToken, $this->fe_user->get_URL_ID, $this->content);
}
// Set header for charset-encoding if set. Added by RL 17.10.03
if ($this->config['config']['metaCharset']) {
$headLine = 'Content-Type:text/html;charset='.trim($this->config['config']['metaCharset']);
// Set header for charset-encoding unless disabled
if (!$this->config['config']['disableCharsetHeader']) {
$headLine = 'Content-Type:text/html;charset='.trim($this->metaCharset);
header ($headLine);
}
......@@ -2436,10 +2444,10 @@ if (version == "n3") {
$titleChars = intval($this->config['config']['simulateStaticDocuments_addTitle']);
$out = '';
if ($titleChars) {
$out = t3lib_div::convUmlauts($inTitle);
$out= ereg_replace('[^[:alnum:]_-]','_',trim(substr($out,0,$titleChars)));
$out= ereg_replace('_*$','',$out);
$out= ereg_replace('^_*','',$out);
$out = $this->csConvObj->specCharsToASCII($this->renderCharset, $inTitle);
$out = ereg_replace('[^[:alnum:]_-]','_',trim(substr($out,0,$titleChars)));
$out = ereg_replace('_*$','',$out);
$out = ereg_replace('^_*','',$out);
if ($out) $out.='.';
}
$enc = '';
......@@ -2913,8 +2921,6 @@ if (version == "n3") {
* @return string Label value, if any.
*/
function sL($input) {
if (!$this->lang) $this->initLLvars();
if (strcmp(substr($input,0,4),'LLL:')) {
$t = explode('|',$input);
return $t[$this->langSplitIndex] ? $t[$this->langSplitIndex] : $t[0];
......@@ -2973,20 +2979,22 @@ if (version == "n3") {
* @return void
*/
function initLLvars() {
$this->lang = $this->config['config']['language'] ? $this->config['config']['language'] : 'default';
// Setting language key and split index:
$this->lang = $this->config['config']['language'] ? $this->config['config']['language'] : 'default';
$ls = explode('|',TYPO3_languages);
while(list($i,$v)=each($ls)) {
if ($v==$this->lang) {$this->langSplitIndex=$i; break;}
}
// Setting charsets:
$this->siteCharset = $this->csConvObj->parse_charset($GLOBALS['TSFE']->config['config']['metaCharset'] ? $GLOBALS['TSFE']->config['config']['metaCharset'] : $GLOBALS['TSFE']->defaultCharSet);
$this->renderCharset = $this->csConvObj->parse_charset($this->config['config']['renderCharset'] ? $this->config['config']['renderCharset'] : ($this->TYPO3_CONF_VARS['BE']['forceCharset'] ? $this->TYPO3_CONF_VARS['BE']['forceCharset'] : $this->defaultCharSet)); // REndering charset of HTML page.
$this->metaCharset = $this->csConvObj->parse_charset($this->config['config']['metaCharset'] ? $this->config['config']['metaCharset'] : $this->renderCharset); // Output charset of HTML page.
$this->labelsCharset = $this->csConvObj->parse_charset($this->csConvObj->charSetArray[$this->lang] ? $this->csConvObj->charSetArray[$this->lang] : 'iso-8859-1');
if ($this->siteCharset != $this->labelsCharset) {
if ($this->renderCharset != $this->labelsCharset) {
$this->convCharsetToFrom = array(
'from' => $this->labelsCharset,
'to' => $this->siteCharset
'to' => $this->renderCharset
);
}
}
......@@ -3001,13 +3009,11 @@ if (version == "n3") {
* @param string String to convert charset for
* @param string Optional "from" charset.
* @return string Output string, converted if needed.
* @see initLLvars(), t3lib_cs
* @see t3lib_cs
*/
function csConv($str,$from='') {
if (!$this->lang) $this->initLLvars();
if ($from) {
$output = $this->csConvObj->conv($str,$this->csConvObj->parse_charset($from),$this->siteCharset,1);
$output = $this->csConvObj->conv($str,$this->csConvObj->parse_charset($from),$this->renderCharset,1);
return $output ? $output : $str;
} elseif (is_array($this->convCharsetToFrom)) {
return $this->csConvObj->conv($str,$this->convCharsetToFrom['from'],$this->convCharsetToFrom['to'],1);
......
......@@ -314,7 +314,7 @@ function linkTo_UnCryptMailto(s) { //
} else $customContent='';
// Setting charset:
$theCharset = ($GLOBALS['TSFE']->config['config']['metaCharset'] ? $GLOBALS['TSFE']->config['config']['metaCharset'] : $GLOBALS['TSFE']->defaultCharSet);
$theCharset = $GLOBALS['TSFE']->metaCharset;
// Reset the content variables:
$GLOBALS['TSFE']->content='';
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment