See four lines in changelog, 2004-05-18. Basically a lot of fixes for character set...
authorKasper Skårhøj <kasper@typo3.org>
Tue, 18 May 2004 12:24:09 +0000 (12:24 +0000)
committerKasper Skårhøj <kasper@typo3.org>
Tue, 18 May 2004 12:24:09 +0000 (12:24 +0000)
git-svn-id: https://svn.typo3.org/TYPO3v4/Core/trunk@311 709f56b5-9817-0410-a4d7-c38de5d9e867

ChangeLog
t3lib/class.t3lib_cs.php
t3lib/class.t3lib_div.php
typo3/sysext/cms/tslib/class.tslib_content.php
typo3/sysext/cms/tslib/class.tslib_fe.php
typo3/sysext/cms/tslib/class.tslib_pagegen.php

index ef0c1af..2f116e1 100755 (executable)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,10 @@
 2004-05-18  Kasper Skårhøj,,,  <kasper@typo3.com>
+       * Added t3lib_cs::specCharsToASCII() for converting special chars (like umlauts) to their double-byte alternatives in ASCII (like au, oe etc...). Function is NOT finished at all, only added so I could use it for the conversion of filenames in simulateStaticDocuments.
+       * TypoScript charset compatibility extended: tslib_cObj::caseshift(), tslib_cObj::substring(), tslib_cObj::crop() uses functions in t3lib_cs now. Also stdWrap.strftime will automatically convert localized string from locale charset (guessed by t3lib_cs) to renderCharset.
+       * !!! website charset is now taken from "forceCharset" by default (if found) and in any case the http-header with text/html and charset is sent UNLESS you disable it with "config.disableCharsetHeader=1". You will probably have to disable this header if you are using TYPO3 for XML feeds or wap-pages. 
+       * Added "config.renderCharset" option: This is the charset of the content while rendered in the frontend engine. If different from "metaCharset" a conversion must happen before output to browser. Both renderCharset and metaCharset takes their default values from TYPO3_CONF_VARS[BE][forceCharset] if found, otherwise they default to "iso-8859-1". Also "metaCharset" takes "renderCharset" as default - and if metaCharset is different from renderCharset a conversion will happen on output.
+
+2004-05-18  Kasper Skårhøj,,,  <kasper@typo3.com>
 
        * Created new function, t3lib_div::fixed_lgd_cs(), which should be used in the backend wherever a string is shortend for visual display. This function will truncate the string according to the backend charset (which should of course be set by forceCharset to a fixed value). This new function has also been substituted numerous places in the source of course (hence all the script updates).
 
index c253e25..aa0980a 100755 (executable)
@@ -480,6 +480,20 @@ class t3lib_cs {
                return $cs ? $cs : 'iso-8859-1';
        }
 
+
+
+
+
+
+
+
+
+       /********************************************
+        *
+        * Charset Conversion functions
+        *
+        ********************************************/
+
        /**
         * Convert from one charset to another charset.
         *
@@ -1230,7 +1244,32 @@ class t3lib_cs {
                return $out;
        }
 
+       /**
+        * Converts special chars (like ÆØÅæøå, umlauts etc) to ascii equivalents (usually double-bytes, like æ => ae etc.)
+        * CURRENTLY IT IS NOT FULLY IMPLEMENTED!!!
+        *
+        * @param       string          Character set of string
+        * @param       string          Input string to convert
+        * @return      string          The converted string
+        */
+       function specCharsToASCII($charset,$string)     {
+               if ($charset == 'utf-8')        {
+                       $pat  = array (
+                               '/'.$this->utf8_encode('æ', 'iso-8859-1').'/',
+                               '/'.$this->utf8_encode('ø', 'iso-8859-1').'/',
+                               '/'.$this->utf8_encode('å', 'iso-8859-1').'/',
+                               '/'.$this->utf8_encode('Æ', 'iso-8859-1').'/',
+                               '/'.$this->utf8_encode('Ø', 'iso-8859-1').'/',
+                               '/'.$this->utf8_encode('Å', 'iso-8859-1').'/',
+                       );
+                       $repl = array ( 'ae',   'oe',   'aa', 'AE',     'OE',    'AA');
+                       $string = preg_replace($pat,$repl,$string);
+               } else {
+                       $string = t3lib_div::convUmlauts($string);
+               }
 
+               return $string;
+       }
 
 
 
index 6c2574f..c9112f1 100755 (executable)
@@ -881,7 +881,7 @@ class t3lib_div {
        /**
         * Change umlaut characters to plain ASCII with normally two character target
         * Only known characters will be converted, so don't expect a result for any character.
-        * Works only for western europe single-byte charsets!
+        * (DEPRECIATED: Works only for western europe single-byte charsets! Use t3lib_cs::specCharsToASCII() instead!)
         *
         * ä => ae, Ö => Oe
         *
index 71b3e96..df6878a 100755 (executable)
@@ -2992,7 +2992,13 @@ class tslib_cObj {
                                if ((string)$conf['char']!=''){$content=chr(intval($conf['char']));}
                                if ($conf['intval']){$content=intval($content);}
                                if ($conf['date']){$content=date($conf['date'], $content);}
-                               if ($conf['strftime']){$content=strftime($conf['strftime'], $content);}
+                               if ($conf['strftime']){
+                                       $content = strftime($conf['strftime'], $content);
+                                       $tmp_charset = $conf['strftime.']['charset'] ? $conf['strftime.']['charset'] : $GLOBALS['TSFE']->localeCharset;
+                                       if ($tmp_charset)       {
+                                               $content = $GLOBALS['TSFE']->csConv($content,$tmp_charset);
+                                       }
+                               }
                                if ($conf['age']){$content=$this->calcAge(time()-$content,$conf['age']);}
 
                                if ($conf['case']){$content=$this->HTMLcaseshift($content, $conf['case']);}
@@ -3336,11 +3342,11 @@ class tslib_cObj {
                do      {
                        if (!$inside)   {
                                $len = strcspn(substr($str,$pointer),'{');
-                               $newVal.=substr($str,$pointer,$len);
+                               $newVal.= substr($str,$pointer,$len);
                                $inside = 1;
                        } else {
                                $len = strcspn(substr($str,$pointer),'}')+1;
-                               $newVal.=$this->getData(substr($str,$pointer+1,$len-2),$this->data);
+                               $newVal.= $this->getData(substr($str,$pointer+1,$len-2),$this->data);
                                $inside = 0;
                        }
                        $pointer+=$len;
@@ -3385,9 +3391,9 @@ class tslib_cObj {
        function substring($content,$options)   {
                $options = t3lib_div::intExplode(',',$options.',');
                if ($options[1])        {
-                       return substr($content,$options[0],$options[1]);
+                       return $GLOBALS['TSFE']->csConvObj->substr($GLOBALS['TSFE']->renderCharset,$content,$options[0],$options[1]);
                } else {
-                       return substr($content,$options[0]);
+                       return $GLOBALS['TSFE']->csConvObj->substr($GLOBALS['TSFE']->renderCharset,$content,$options[0]);
                }
        }
 
@@ -3402,14 +3408,14 @@ class tslib_cObj {
         */
        function crop($content,$options)        {
                $options = explode('|',$options);
-               $chars=intval($options[0]);
-               $afterstring=trim($options[1]);
+               $chars = intval($options[0]);
+               $afterstring = trim($options[1]);
                if ($chars)     {
                        if (strlen($content)>abs($chars))       {
                                if ($chars<0)   {
-                                       $content= $afterstring.substr($content,$chars);
+                                       $content = $afterstring.$GLOBALS['TSFE']->csConvObj->substr($GLOBALS['TSFE']->renderCharset,$content,$chars);
                                } else {
-                                       $content= substr($content,0,$chars).$afterstring;
+                                       $content = $GLOBALS['TSFE']->csConvObj->substr($GLOBALS['TSFE']->renderCharset,$content,0,$chars).$afterstring;
                                }
                        }
                }
@@ -5319,12 +5325,14 @@ class tslib_cObj {
                $case = strtolower($case);
                switch($case)   {
                        case 'upper':
-                               $theValue = strtoupper($theValue);
-                               $theValue = strtr($theValue, $this->caseConvStrings[0], $this->caseConvStrings[1]);
+                               $theValue = $GLOBALS['TSFE']->csConvObj->conv_case($GLOBALS['TSFE']->renderCharset,$theValue,'toUpper');
+                               #$theValue = strtoupper($theValue);
+                               #$theValue = strtr($theValue, $this->caseConvStrings[0], $this->caseConvStrings[1]);
                        break;
                        case 'lower':
-                               $theValue = strtolower($theValue);
-                               $theValue = strtr($theValue, $this->caseConvStrings[1], $this->caseConvStrings[0]);
+                               $theValue = $GLOBALS['TSFE']->csConvObj->conv_case($GLOBALS['TSFE']->renderCharset,$theValue,'toLower');
+                               #$theValue = strtolower($theValue);
+                               #$theValue = strtr($theValue, $this->caseConvStrings[1], $this->caseConvStrings[0]);
                        break;
                }
                return $theValue;
@@ -5339,18 +5347,18 @@ class tslib_cObj {
         * @see caseshift()
         */
        function HTMLcaseshift($theValue, $case)        {
-               $inside=0;
-               $newVal='';
-               $pointer=0;
+               $inside = 0;
+               $newVal = '';
+               $pointer = 0;
                $totalLen = strlen($theValue);
                do      {
                        if (!$inside)   {
                                $len = strcspn(substr($theValue,$pointer),'<');
-                               $newVal.=$this->caseshift(substr($theValue,$pointer,$len),$case);
+                               $newVal.= $this->caseshift(substr($theValue,$pointer,$len),$case);
                                $inside = 1;
                        } else {
                                $len = strcspn(substr($theValue,$pointer),'>')+1;
-                               $newVal.=substr($theValue,$pointer,$len);
+                               $newVal.= substr($theValue,$pointer,$len);
                                $inside = 0;
                        }
                        $pointer+=$len;
index ebe6fd5..0f9cc7f 100755 (executable)
                // Page content render object
        var $cObj ='';                                          // is instantiated object of tslib_cObj
 
-               // Character set (charset) conversion object:
-       var $csConvObj;                                         // An instance of the "t3lib_cs" class. May be used by any application.
-       var $defaultCharSet='iso-8859-1';       // The default charset used in the frontend if nothing else is set.
-
                // CONTENT accumulation
        var $content='';                                        // All page content is accumulated in this variable. See pagegen.php
 
        var $scriptParseTime=0;
        var $TCAloaded = 0;                                     // Set ONLY if the full TCA is loaded
 
+               // Character set (charset) conversion object:
+       var $csConvObj;                                         // An instance of the "t3lib_cs" class. May be used by any application.
+       var $defaultCharSet = 'iso-8859-1';     // The default charset used in the frontend if nothing else is set.
+       var $renderCharset='';                          // Internal charset of the frontend during rendering: Defaults to "forceCharset" and if that is not set, to ->defaultCharSet
+       var $metaCharset='';                            // Output charset of the websites content. This is the charset found in the header, meta tag etc. If different from $renderCharset a conversion happens before output to browser. Defaults to ->renderCharset if not set.
+       var $localeCharset='';                          // Assumed charset of locale strings.
+
                // LANG:
        var $lang='';                                           // Set to the system language key (used on the site)
        var $langSplitIndex=0;                          // Set to the index number of the language key
        var $labelsCharset='';                          // Charset of the labels from locallang (based on $this->lang)
-       var $siteCharset='';                            // Charset of the website.
        var $convCharsetToFrom='';                      // Set to the charsets to convert from/to IF there are any difference. Otherwise this stays a string
        var $LL_labels_cache=array();
        var $LL_files_cache=array();
                                exit;
                        }
                }
+
+                       // Initialize charset settings etc.
+               $this->initLLvars();
+
                        // No cache
                if ($this->config['config']['no_cache'])        {$this->set_no_cache();}                // Set $this->no_cache true if the config.no_cache value is set!
 
                        setlocale(LC_CTYPE,$this->config['config']['locale_all']);
                        setlocale(LC_MONETARY,$this->config['config']['locale_all']);
                        setlocale(LC_TIME,$this->config['config']['locale_all']);
+
+                       $this->localeCharset = $this->csConvObj->get_locale_charset($this->config['config']['locale_all']);
                }
 
                        // Setting cache_timeout_default. May be overridden by PHP include scritps.
@@ -2176,9 +2184,9 @@ if (version == "n3") {
                        $this->content = str_replace($this->getMethodUrlIdToken, $this->fe_user->get_URL_ID, $this->content);
                }
 
-                       // Set header for charset-encoding if set. Added by RL 17.10.03
-               if ($this->config['config']['metaCharset'])     {
-                       $headLine = 'Content-Type:text/html;charset='.trim($this->config['config']['metaCharset']);
+                       // Set header for charset-encoding unless disabled
+               if (!$this->config['config']['disableCharsetHeader'])   {
+                       $headLine = 'Content-Type:text/html;charset='.trim($this->metaCharset);
                        header ($headLine);
                }
 
@@ -2436,10 +2444,10 @@ if (version == "n3") {
                $titleChars = intval($this->config['config']['simulateStaticDocuments_addTitle']);
                $out = '';
                if ($titleChars)        {
-                       $out = t3lib_div::convUmlauts($inTitle);
-                       $out= ereg_replace('[^[:alnum:]_-]','_',trim(substr($out,0,$titleChars)));
-                       $out= ereg_replace('_*$','',$out);
-                       $out= ereg_replace('^_*','',$out);
+                       $out = $this->csConvObj->specCharsToASCII($this->renderCharset, $inTitle);
+                       $out = ereg_replace('[^[:alnum:]_-]','_',trim(substr($out,0,$titleChars)));
+                       $out = ereg_replace('_*$','',$out);
+                       $out = ereg_replace('^_*','',$out);
                        if ($out)       $out.='.';
                }
                $enc = '';
@@ -2913,8 +2921,6 @@ if (version == "n3") {
         * @return      string          Label value, if any.
         */
        function sL($input)     {
-               if (!$this->lang)       $this->initLLvars();
-
                if (strcmp(substr($input,0,4),'LLL:'))  {
                        $t = explode('|',$input);
                        return $t[$this->langSplitIndex] ? $t[$this->langSplitIndex] : $t[0];
@@ -2973,20 +2979,22 @@ if (version == "n3") {
         * @return      void
         */
        function initLLvars()   {
-               $this->lang = $this->config['config']['language'] ? $this->config['config']['language'] : 'default';
 
+                       // Setting language key and split index:
+               $this->lang = $this->config['config']['language'] ? $this->config['config']['language'] : 'default';
                $ls = explode('|',TYPO3_languages);
                while(list($i,$v)=each($ls))    {
                        if ($v==$this->lang)    {$this->langSplitIndex=$i; break;}
                }
 
                        // Setting charsets:
-               $this->siteCharset = $this->csConvObj->parse_charset($GLOBALS['TSFE']->config['config']['metaCharset'] ? $GLOBALS['TSFE']->config['config']['metaCharset'] : $GLOBALS['TSFE']->defaultCharSet);
+               $this->renderCharset = $this->csConvObj->parse_charset($this->config['config']['renderCharset'] ? $this->config['config']['renderCharset'] : ($this->TYPO3_CONF_VARS['BE']['forceCharset'] ? $this->TYPO3_CONF_VARS['BE']['forceCharset'] : $this->defaultCharSet));    // REndering charset of HTML page.
+               $this->metaCharset = $this->csConvObj->parse_charset($this->config['config']['metaCharset'] ? $this->config['config']['metaCharset'] : $this->renderCharset);   // Output charset of HTML page.
                $this->labelsCharset = $this->csConvObj->parse_charset($this->csConvObj->charSetArray[$this->lang] ? $this->csConvObj->charSetArray[$this->lang] : 'iso-8859-1');
-               if ($this->siteCharset != $this->labelsCharset) {
+               if ($this->renderCharset != $this->labelsCharset)       {
                        $this->convCharsetToFrom = array(
                                'from' => $this->labelsCharset,
-                               'to' => $this->siteCharset
+                               'to' => $this->renderCharset
                        );
                }
        }
@@ -3001,13 +3009,11 @@ if (version == "n3") {
         * @param       string          String to convert charset for
         * @param       string          Optional "from" charset.
         * @return      string          Output string, converted if needed.
-        * @see initLLvars(), t3lib_cs
+        * @see t3lib_cs
         */
        function csConv($str,$from='')  {
-               if (!$this->lang)       $this->initLLvars();
-
                if ($from)      {
-                       $output = $this->csConvObj->conv($str,$this->csConvObj->parse_charset($from),$this->siteCharset,1);
+                       $output = $this->csConvObj->conv($str,$this->csConvObj->parse_charset($from),$this->renderCharset,1);
                        return $output ? $output : $str;
                } elseif (is_array($this->convCharsetToFrom))   {
                        return $this->csConvObj->conv($str,$this->convCharsetToFrom['from'],$this->convCharsetToFrom['to'],1);
index 6740180..0605c30 100755 (executable)
@@ -314,7 +314,7 @@ function linkTo_UnCryptMailto(s)    {       //
                } else $customContent='';
 
                        // Setting charset:
-               $theCharset = ($GLOBALS['TSFE']->config['config']['metaCharset'] ? $GLOBALS['TSFE']->config['config']['metaCharset'] : $GLOBALS['TSFE']->defaultCharSet);
+               $theCharset = $GLOBALS['TSFE']->metaCharset;
 
                        // Reset the content variables:
                $GLOBALS['TSFE']->content='';