Fixed bug #10086: Core uses deprecated function t3lib_db->sql()
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
index 9daee51..0d8d5a0 100755 (executable)
@@ -2,7 +2,7 @@
 /***************************************************************
 *  Copyright notice
 *
-*  (c) 1999-2004 Kasper Skaarhoj (kasperYYYY@typo3.com)
+*  (c) 1999-2008 Kasper Skaarhoj (kasperYYYY@typo3.com)
 *  All rights reserved
 *
 *  This script is part of the TYPO3 project. The TYPO3 project is
  *
  *
  *
- *  102: class t3lib_parsehtml_proc extends t3lib_parsehtml
- *  137:     function init($elRef='',$recPid=0)
- *  149:     function setRelPath($path)
- *  173:     function evalWriteFile($pArr,$currentRecord)
+ *  103: class t3lib_parsehtml_proc extends t3lib_parsehtml
+ *  138:     function init($elRef='',$recPid=0)
+ *  150:     function setRelPath($path)
+ *  174:     function evalWriteFile($pArr,$currentRecord)
  *
  *              SECTION: Main function
- *  231:     function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
+ *  232:     function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
  *
  *              SECTION: Specific RTE TRANSFORMATION functions
- *  397:     function TS_images_db($value)
- *  538:     function TS_images_rte($value)
- *  572:     function TS_reglinks($value,$direction)
- *  606:     function TS_links_db($value)
- *  654:     function TS_links_rte($value)
- *  735:     function TS_preserve_db($value)
- *  759:     function TS_preserve_rte($value)
- *  780:     function TS_transform_db($value,$css=FALSE)
- *  891:     function TS_transform_rte($value,$css=0)
- *  962:     function TS_strip_db($value)
+ *  398:     function TS_images_db($value)
+ *  550:     function TS_images_rte($value)
+ *  589:     function TS_reglinks($value,$direction)
+ *  626:     function TS_links_db($value)
+ *  675:     function TS_links_rte($value)
+ *  760:     function TS_preserve_db($value)
+ *  784:     function TS_preserve_rte($value)
+ *  805:     function TS_transform_db($value,$css=FALSE)
+ *  922:     function transformStyledATags($value)
+ *  948:     function TS_transform_rte($value,$css=0)
+ * 1019:     function TS_strip_db($value)
  *
  *              SECTION: Generic RTE transformation, analysis and helper functions
- *  993:     function getURL($url)
- * 1007:     function HTMLcleaner_db($content,$tagList='')
- * 1028:     function getKeepTags($direction='rte',$tagList='')
- * 1137:     function divideIntoLines($value,$count=5,$returnArray=FALSE)
- * 1241:     function setDivTags($value,$dT='p')
- * 1286:     function internalizeFontTags($value)
- * 1322:     function siteUrl()
- * 1332:     function rteImageStorageDir()
- * 1344:     function removeTables($value,$breakChar='<br />')
- * 1376:     function defaultTStagMapping($code,$direction='rte')
- * 1399:     function getWHFromAttribs($attribArray)
- * 1425:     function urlInfoForLinkTags($url)
- * 1484:     function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
+ * 1050:     function getURL($url)
+ * 1064:     function HTMLcleaner_db($content,$tagList='')
+ * 1091:     function getKeepTags($direction='rte',$tagList='')
+ * 1200:     function divideIntoLines($value,$count=5,$returnArray=FALSE)
+ * 1304:     function setDivTags($value,$dT='p')
+ * 1349:     function internalizeFontTags($value)
+ * 1385:     function siteUrl()
+ * 1395:     function rteImageStorageDir()
+ * 1407:     function removeTables($value,$breakChar='<br />')
+ * 1439:     function defaultTStagMapping($code,$direction='rte')
+ * 1462:     function getWHFromAttribs($attribArray)
+ * 1489:     function urlInfoForLinkTags($url)
+ * 1548:     function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
  *
- * TOTAL FUNCTIONS: 27
+ * TOTAL FUNCTIONS: 28
  * (This index is automatically created/updated by the extension "extdeveval")
  *
  */
@@ -102,13 +103,14 @@ require_once (PATH_t3lib.'class.t3lib_parsehtml.php');
 class t3lib_parsehtml_proc extends t3lib_parsehtml {
 
                // Static:
-       var $headListTags = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6';              // List of tags for header, pre and list containers
+       var $blockElementList = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,HR,ADDRESS,DL,DD'; // List of tags for these elements
 
                // Internal, static:
        var $recPid = 0;                                // Set this to the pid of the record manipulated by the class.
        var $elRef = '';                                // Element reference [table]:[field], eg. "tt_content:bodytext"
        var $relPath='';                                // Relative path
        var $relBackPath='';                    // Relative back-path
+       public $tsConfig = array();             // Current Page TSConfig
        var $procOptions = '';                  // Set to the TSconfig options coming from Page TSconfig
 
                // Internal, dynamic
@@ -231,9 +233,15 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
        function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())   {
 
                        // Init:
+               $this->tsConfig = $thisConfig;
                $this->procOptions = $thisConfig['proc.'];
                $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
 
+                       // dynamic configuration of blockElementList
+               if ($this->procOptions['blockElementList']) {
+                       $this->blockElementList = $this->procOptions['blockElementList'];
+               }
+
                        // Get parameters for rte_transformation:
                $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
 
@@ -404,8 +412,18 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                        // Init
                                $attribArray = $this->get_tag_attributes_classic($v,1);
                                $siteUrl = $this->siteUrl();
+                               $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
+
                                $absRef = trim($attribArray['src']);            // It's always a absolute URL coming from the RTE into the Database.
 
+                                       // make path absolute if it is relative and we have a site path wich is not '/'
+                               $pI=pathinfo($absRef);
+                               if($sitePath AND !$pI['scheme'] && t3lib_div::isFirstPartOfStr($absRef,$sitePath)) {
+                                               // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
+                                       $absRef = substr($absRef,strlen($sitePath));
+                                       $absRef = $siteUrl.$absRef;
+                               }
+
                                        // External image from another URL? In that case, fetch image (unless disabled feature).
                                if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures'])      {
                                        $externalFile = $this->getUrl($absRef); // Get it
@@ -429,6 +447,7 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                                }
                                        }
                                }
+
                                        // Check image as local file (siteURL equals the one of the image)
                                if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl))      {
                                        $path = rawurldecode(substr($absRef,strlen($siteUrl))); // Rel-path, rawurldecoded for special characters.
@@ -462,10 +481,10 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                                                        if ($imgI[3])   {
                                                                                $fI=pathinfo($imgI[3]);
                                                                                @copy($imgI[3],$filepath);      // Override the child file
-                                                                               unset($attribArray['style']);
+                                                                                       // Removing width and heigth form style attribute
+                                                                               $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
                                                                                $attribArray['width']=$imgI[0];
                                                                                $attribArray['height']=$imgI[1];
-                                                                               if (!$attribArray['border'])    $attribArray['border']=0;
                                                                                $params = t3lib_div::implodeAttributes($attribArray,1);
                                                                                $imgSplit[$k]='<img '.$params.' />';
                                                                        }
@@ -474,14 +493,13 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
 
                                                } elseif ($this->procOptions['plainImageMode']) {       // If "plain image" has been configured:
 
-                                                               // Image dimensions as set in the image tag
+                                                               // Image dimensions as set in the image tag, if any
                                                        $curWH = $this->getWHFromAttribs($attribArray);
-                                                       $attribArray['width'] = $curWH[0];
-                                                       $attribArray['height'] = $curWH[1];
+                                                       if ($curWH[0]) $attribArray['width'] = $curWH[0];
+                                                       if ($curWH[1]) $attribArray['height'] = $curWH[1];
 
-                                                               // Forcing values for style and border:
-                                                       unset($attribArray['style']);
-                                                       if (!$attribArray['border'])    $attribArray['border'] = 0;
+                                                               // Removing width and heigth form style attribute
+                                                       $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
 
                                                                // Finding dimensions of image file:
                                                        $fI = @getimagesize($filepath);
@@ -537,6 +555,9 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
         */
        function TS_images_rte($value)  {
 
+               $siteUrl = $this->siteUrl();
+               $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
+
                        // Split content by <img> tags and traverse the resulting array for processing:
                $imgSplit = $this->splitTags('img',$value);
                foreach($imgSplit as $k => $v)  {
@@ -544,12 +565,14 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
 
                                        // Init
                                $attribArray=$this->get_tag_attributes_classic($v,1);
-                               $siteUrl = $this->siteUrl();
                                $absRef = trim($attribArray['src']);
 
                                        // Unless the src attribute is already pointing to an external URL:
                                if (strtolower(substr($absRef,0,4))!='http')    {
-                                       $attribArray['src'] = $siteUrl.substr($attribArray['src'],strlen($this->relBackPath));
+                                       $attribArray['src'] = substr($attribArray['src'],strlen($this->relBackPath));
+                                               // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
+                                       $attribArray['src'] = preg_replace('#^'.preg_quote($sitePath,'#').'#','',$attribArray['src']);
+                                       $attribArray['src'] = $siteUrl.$attribArray['src'];
                                        if (!isset($attribArray['alt']))        $attribArray['alt']='';
                                        $params = t3lib_div::implodeAttributes($attribArray);
                                        $imgSplit[$k]='<img '.$params.' />';
@@ -570,9 +593,11 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
         * @return      string          Content output
         */
        function TS_reglinks($value,$direction)         {
+               $retVal = '';
+
                switch($direction)      {
                        case 'rte':
-                               return $this->TS_AtagToAbs($value,1);
+                               $retVal = $this->TS_AtagToAbs($value,1);
                        break;
                        case 'db':
                                $siteURL = $this->siteUrl();
@@ -590,14 +615,15 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                                $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
                                        }
                                }
-                               return implode('',$blockSplit);
+                               $retVal = implode('',$blockSplit);
                        break;
                }
+               return $retVal;
        }
 
        /**
         * Transformation handler: 'ts_links' / direction: "db"
-        * Converting <A>-tags to <LINK tags>
+        * Converting <A>-tags to <link tags>
         *
         * @param       string          Content input
         * @return      string          Content output
@@ -617,14 +643,15 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                unset($attribArray_copy['href']);
                                unset($attribArray_copy['target']);
                                unset($attribArray_copy['class']);
+                               unset($attribArray_copy['title']);
                                if ($attribArray_copy['rteerror'])      {       // Unset "rteerror" and "style" attributes if "rteerror" is set!
                                        unset($attribArray_copy['style']);
                                        unset($attribArray_copy['rteerror']);
                                }
                                if (!count($attribArray_copy))  {       // Only if href, target and class are the only attributes, we can alter the link!
                                                // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
-                                       $bTag='<LINK '.$info['url'].($attribArray['target']?' '.$attribArray['target']:($attribArray['class']?' -':'')).($attribArray['class']?' '.$attribArray['class']:'').'>';
-                                       $eTag='</LINK>';
+                                       $bTag='<link '.$info['url'].($info['query']?',0,'.$info['query']:'').($attribArray['target']?' '.$attribArray['target']:(($attribArray['class'] || $attribArray['title'])?' -':'')).($attribArray['class']?' '.$attribArray['class']:($attribArray['title']?' -':'')).($attribArray['title']?' "'.$attribArray['title'].'"':'').'>';
+                                       $eTag='</link>';
                                        $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
                                } else {        // ... otherwise store the link as a-tag.
                                                // Unsetting 'rtekeep' attribute if that had been set.
@@ -645,7 +672,7 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
 
        /**
         * Transformation handler: 'ts_links' / direction: "rte"
-        * Converting <LINK tags> to <A>-tags
+        * Converting <link tags> to <A>-tags
         *
         * @param       string          Content input
         * @return      string          Content output
@@ -654,12 +681,12 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
        function TS_links_rte($value)   {
                $value = $this->TS_AtagToAbs($value);
 
-                       // Split content by the TYPO3 pseudo tag "<LINK>":
+                       // Split content by the TYPO3 pseudo tag "<link>":
                $blockSplit = $this->splitIntoBlock('link',$value,1);
                foreach($blockSplit as $k => $v)        {
                        $error = '';
                        if ($k%2)       {       // block:
-                               $tagCode = t3lib_div::trimExplode(' ',trim(substr($this->getFirstTag($v),0,-1)),1);
+                               $tagCode = t3lib_div::unQuoteFilenames(trim(substr($this->getFirstTag($v),0,-1)),true);
                                $link_param = $tagCode[1];
                                $href = '';
                                $siteUrl = $this->siteUrl();
@@ -686,9 +713,12 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                                $link_params_parts = explode('#',$link_param);
                                                $idPart = trim($link_params_parts[0]);          // Link-data del
                                                if (!strcmp($idPart,''))        { $idPart=$this->recPid; }      // If no id or alias is given, set it to class record pid
-                                               if ($link_params_parts[1] && !$sectionMark)     {
-                                                       $sectionMark = '#'.trim($link_params_parts[1]);
-                                               }
+
+// FIXME commented because useless - what is it for?
+//                                             if ($link_params_parts[1] && !$sectionMark)     {
+//                                                     $sectionMark = '#'.trim($link_params_parts[1]);
+//                                             }
+
                                                        // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
                                                $pairParts = t3lib_div::trimExplode(',',$idPart);
                                                if (count($pairParts)>1)        {
@@ -702,7 +732,11 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                                }
                                                $page = t3lib_BEfunc::getRecord('pages', $idPart);
                                                if (is_array($page))    {       // Page must exist...
-                                                       $href = $siteUrl.'?id='.$link_param;
+                                                       $pairParts = t3lib_div::trimExplode(',',$link_param);
+                                                       $href = $siteUrl.'?id='.$pairParts[0].($pairParts[2]?$pairParts[2]:'');
+                                               } else if(strtolower(substr($link_param, 0, 7)) == 'record:') {
+                                                               // linkHandler - allowing links to start with "record:"
+                                                       $href = $link_param;
                                                } else {
                                                        #$href = '';
                                                        $href = $siteUrl.'?id='.$link_param;
@@ -714,7 +748,8 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                // Setting the A-tag:
                                $bTag = '<a href="'.htmlspecialchars($href).'"'.
                                                        ($tagCode[2]&&$tagCode[2]!='-' ? ' target="'.htmlspecialchars($tagCode[2]).'"' : '').
-                                                       ($tagCode[3] ? ' class="'.htmlspecialchars($tagCode[3]).'"' : '').
+                                                       ($tagCode[3]&&$tagCode[3]!='-' ? ' class="'.htmlspecialchars($tagCode[3]).'"' : '').
+                                                       ($tagCode[4] ? ' title="'.htmlspecialchars($tagCode[4]).'"' : '').
                                                        ($error ? ' rteerror="'.htmlspecialchars($error).'" style="background-color: yellow; border:2px red solid; color: black;"' : '').       // Should be OK to add the style; the transformation back to databsae will remove it...
                                                        '>';
                                $eTag = '</a>';
@@ -784,11 +819,17 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                if ($this->TS_transform_db_safecounter<0)       return $value;
 
                        // Split the content from RTE by the occurence of these blocks:
-               $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.$this->headListTags,$value);
+               $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$value);
 
                $cc=0;
                $aC = count($blockSplit);
 
+                       // Avoid superfluous linebreaks by transform_db after ending headListTag
+               while($aC && !strcmp(trim($blockSplit[$aC-1]),''))      {
+                       unset($blockSplit[$aC-1]);
+                       $aC = count($blockSplit);
+               }
+
                        // Traverse the blocks
                foreach($blockSplit as $k => $v)        {
                        $cc++;
@@ -803,7 +844,9 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                        // Process based on the tag:
                                switch($tagName)        {
                                        case 'blockquote':      // Keep blockquotes, but clean the inside recursively in the same manner as the main code
-                                               $blockSplit[$k]='<'.$tagName.'>'.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
+                                       case 'dd' :             // Do the same on dd elements
+                                       case 'div':             // Do the same on div sections, if they were splitted
+                                               $blockSplit[$k]=$tag.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
                                        break;
                                        case 'ol':
                                        case 'ul':      // Transform lists into <typolist>-tags:
@@ -811,7 +854,7 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                                        if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist'])   {
                                                                $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
                                                                while(list($k2)=each($parts))   {
-                                                                       $parts[$k2]=ereg_replace(chr(10).'|'.chr(13),'',$parts[$k2]);   // remove all linesbreaks!
+                                                                       $parts[$k2]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/','',$parts[$k2]);        // remove all linesbreaks!
                                                                        $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
                                                                        $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
                                                                        $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
@@ -820,14 +863,14 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                                                $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
                                                        }
                                                } else {
-                                                       $blockSplit[$k].=$lastBR;
+                                                       $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
                                                }
                                        break;
                                        case 'table':   // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
                                                if (!$this->procOptions['preserveTables'] && !$css)     {
                                                        $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
                                                } else {
-                                                       $blockSplit[$k]=str_replace(chr(10),'',$blockSplit[$k]).$lastBR;
+                                                       $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
                                                }
                                        break;
                                        case 'h1':
@@ -861,16 +904,19 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                                                                                $lastBR;
                                                        }
                                                } else {
-                                                       $blockSplit[$k].=$lastBR;
+                                                               // Eliminate true linebreaks inside Hx tags
+                                                       $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
                                                }
                                        break;
                                        default:
-                                               $blockSplit[$k].=$lastBR;
+                                                       // Eliminate true linebreaks inside other headlist tags and after hr tag
+                                               $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
                                        break;
                                }
                        } else {        // NON-block:
                                if (strcmp(trim($blockSplit[$k]),''))   {
-                                       $blockSplit[$k]=$this->divideIntoLines($blockSplit[$k]).$lastBR;
+                                       $blockSplit[$k]=$this->divideIntoLines(preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$blockSplit[$k])).$lastBR;
+                                       $blockSplit[$k]=$this->transformStyledATags($blockSplit[$k]);
                                } else unset($blockSplit[$k]);
                        }
                }
@@ -880,6 +926,29 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
        }
 
        /**
+        * Wraps a-tags that contain a style attribute with a span-tag
+        *
+        * @param       string          Content input
+        * @return      string          Content output
+        */
+       function transformStyledATags($value)   {
+               $blockSplit = $this->splitIntoBlock('A',$value);
+               foreach($blockSplit as $k => $v)        {
+                       if ($k%2)       {       // If an A-tag was found:
+                               $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
+                               if ($attribArray['style'])      {       // If "style" attribute is set!
+                                       $attribArray_copy['style'] = $attribArray['style'];
+                                       unset($attribArray['style']);
+                                       $bTag='<span '.t3lib_div::implodeAttributes($attribArray_copy,1).'><a '.t3lib_div::implodeAttributes($attribArray,1).'>';
+                                       $eTag='</a></span>';
+                                       $blockSplit[$k] = $bTag.$this->removeFirstAndLastTag($blockSplit[$k]).$eTag;
+                               }
+                       }
+               }
+               return implode('',$blockSplit);
+       }
+
+       /**
         * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
         * Set (->rte) for standard content elements (ts)
         *
@@ -891,7 +960,7 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
        function TS_transform_rte($value,$css=0)        {
 
                        // Split the content from Database by the occurence of these blocks:
-               $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$value);
+               $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$value);
 
                        // Traverse the blocks
                foreach($blockSplit as $k => $v)        {
@@ -904,10 +973,12 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
 
                                        // Based on tagname, we do transformations:
                                switch($tagName)        {
-                                       case 'blockquote':      // Keep blockquotes:
+                                       case 'blockquote':      // Keep blockquotes
+                                       case 'dd':              // Keep definitions
+                                       case 'div':             // Keep div sections, if they were splitted
                                                $blockSplit[$k] = $tag.
-                                                                                       $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
-                                                                                       '</'.$tagName.'>';
+                                                                       $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
+                                                                       '</'.$tagName.'>';
                                        break;
                                        case 'typolist':        // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
                                                if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist'])   {
@@ -938,7 +1009,7 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                        } else {        // NON-block:
                                $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
                                $singleLineBreak = $blockSplit[$k]==chr(10);
-                               if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$nextFTN))      {       // Removing linebreak if typolist/typohead
+                               if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$nextFTN))    {       // Removing linebreak if typolist/typohead
                                        $blockSplit[$k] = ereg_replace(chr(10).'[ ]*$','',$blockSplit[$k]);
                                }
                                        // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
@@ -1013,7 +1084,13 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0;              // Default: remove unknown tags.
                $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1;                                   // Default: re-convert literals to characters (that is &lt; to <)
 
-               return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC);
+                       // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
+               $addConfig=array();
+               if ((is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning']))   {
+                       $addConfig['xhtml']=1;
+               }
+
+               return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC,$addConfig);
         }
 
        /**
@@ -1048,9 +1125,12 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
 
                                        // GOING from database to Rich Text Editor:
                                case 'rte':
-                                               // Transform bold/italics tags to strong/em
-                                       if (isset($keepTags['b']))      {$keepTags['b']=array('remap'=>'STRONG');}
-                                       if (isset($keepTags['i']))      {$keepTags['i']=array('remap'=>'EM');}
+
+                                       if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
+                                                       // Transform bold/italics tags to strong/em
+                                               if (isset($keepTags['b']))      {$keepTags['b']=array('remap'=>'STRONG');}
+                                               if (isset($keepTags['i']))      {$keepTags['i']=array('remap'=>'EM');}
+                                       }
 
                                                // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
                                        list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
@@ -1058,20 +1138,23 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
 
                                        // GOING from RTE to database:
                                case 'db':
-                                               // Transform strong/em back to bold/italics:
-                                       if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
-                                       if (isset($keepTags['em']))             { $keepTags['em']=array('remap'=>'i'); }
+
+                                       if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
+                                                       // Transform strong/em back to bold/italics:
+                                               if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
+                                               if (isset($keepTags['em']))     { $keepTags['em']=array('remap'=>'i'); }
+                                       }
 
                                                // Setting up span tags if they are allowed:
                                        if (isset($keepTags['span']))           {
                                                $classes=array_merge(array(''),$this->allowedClasses);
                                                $keepTags['span']=array(
-                                                       'allowedAttribs'=>'class',
+                                                       'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir',
                                                        'fixAttrib' => Array(
                                                                'class' => Array (
                                                                        'list' => $classes,
                                                                        'removeIfFalse' => 1
-                                                               )
+                                                               ),
                                                        ),
                                                        'rmTagIfNoAttrib' => 1
                                                );
@@ -1216,8 +1299,12 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                        // Add the processed line(s)
                                $divSplit[$k] = implode(chr(10),$subLines);
 
-                                       // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank:
-                               if (trim(strip_tags($divSplit[$k]))=='&nbsp;')          $divSplit[$k]='';
+                                       // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
+                                       // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
+                                       // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
+                               if (trim(strip_tags($divSplit[$k]))=='&nbsp;' && !preg_match('/\<(img)(\s[^>]*)?\/?>/si', $divSplit[$k]) && !preg_match('/\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
+                                       $divSplit[$k]='';
+                               }
                        } else {        // outside div:
                                        // Remove positions which are outside div/p tags and without content
                                $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
@@ -1401,6 +1488,7 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                if ($style)     {
                        $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
                                // Width
+                       $reg = array();
                        eregi('width'.$regex,$style,$reg);
                        $w = intval($reg[1]);
                                // Height
@@ -1431,7 +1519,7 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                } else {
                        $curURL = $this->siteUrl();     // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
                        for($a=0;$a<strlen($url);$a++)  {
-                               if ($url[$a]!=$curURL[$a])      {
+                               if ($url{$a}!=$curURL{$a})      {
                                        break;
                                }
                        }
@@ -1455,12 +1543,14 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
                                        $info['type']='anchor';
                                } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php'))       {
                                        $pp = explode('id=',$uP['query']);
-                                       $id = trim($pp[1]);
+                                       $parameters = explode('&', $pp[1]);
+                                       $id = array_shift($parameters);
                                        if ($id)        {
                                                $info['pageid']=$id;
                                                $info['cElement']=$uP['fragment'];
                                                $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
                                                $info['type']='page';
+                                               $info['query'] = $parameters[0]?'&'.implode('&', $parameters):'';
                                        }
                                } else {
                                        $info['url']=$info['relUrl'];
@@ -1512,4 +1602,5 @@ class t3lib_parsehtml_proc extends t3lib_parsehtml {
 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php'])   {
        include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
 }
-?>
+
+?>
\ No newline at end of file