Tiny bug with date function in t3lib_befunc. Casted timestamp to integer.
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml.php
index 1889ff2..566c663 100644 (file)
@@ -1,8 +1,8 @@
 <?php
 /***************************************************************
 *  Copyright notice
-*  
-*  (c) 1999-2004 Kasper Skaarhoj (kasper@typo3.com)
+*
+*  (c) 1999-2009 Kasper Skaarhoj (kasperYYYY@typo3.com)
 *  All rights reserved
 *
 *  This script is part of the TYPO3 project. The TYPO3 project is
  * $Id$
  * Revised for TYPO3 3.6 July/2003 by Kasper Skaarhoj
  *
- * @author     Kasper Skaarhoj <kasper@typo3.com>
+ * @author     Kasper Skaarhoj <kasperYYYY@typo3.com>
  */
 /**
  * [CLASS/FUNCTION INDEX of SCRIPT]
  *
  *
  *
- *  100: class t3lib_parsehtml 
- *  117:     function getSubpart($content, $marker)    
- *  145:     function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)   
- *  210:     function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)   
- *  269:     function splitTags($tag,$content) 
- *  303:     function getAllParts($parts,$tag_parts=1,$include_tag=1)  
- *  322:     function removeFirstAndLastTag($str)      
- *  341:     function getFirstTag($str)        
- *  355:     function getFirstTagName($str)    
- *  373:     function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area') 
- *  447:     function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())        
- *  618:     function get_tag_attributes($tag,$deHSC=0)        
- *  659:     function split_tag_attributes($tag)       
- *  696:     function bidir_htmlspecialchars($value,$dir)      
- *  716:     function prefixResourcePath($main_prefix,$content,$alternatives=array())  
- *  784:     function prefixRelPath($prefix,$srcVal)   
- *  802:     function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)       
- *  833:     function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')   
- *  850:     function unprotectTags($content,$tagList='')      
- *  883:     function stripTagsExcept($value,$tagList) 
- *  906:     function caseShift($str,$flag,$cacheKey='')       
- *  930:     function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)       
- *  959:     function get_tag_attributes_classic($tag,$deHSC=0)        
- *  972:     function HTMLparserConfig($TSconfig,$keepTags=array())    
- * 1085:     function XHTML_clean($content)    
- * 1107:     function processTag($value,$conf,$endTag,$protected=0)    
- * 1153:     function processContent($value,$dir,$conf)        
+ *  106: class t3lib_parsehtml
+ *  123:     function getSubpart($content, $marker)
+ *  156:     function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)
+ *
+ *              SECTION: Parsing HTML code
+ *  247:     function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)
+ *  308:     function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)
+ *  344:     function splitTags($tag,$content)
+ *  378:     function getAllParts($parts,$tag_parts=1,$include_tag=1)
+ *  396:     function removeFirstAndLastTag($str)
+ *  412:     function getFirstTag($str)
+ *  426:     function getFirstTagName($str,$preserveCase=FALSE)
+ *  445:     function get_tag_attributes($tag,$deHSC=0)
+ *  486:     function split_tag_attributes($tag)
+ *  524:     function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')
+ *
+ *              SECTION: Clean HTML code
+ *  617:     function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())
+ *  814:     function bidir_htmlspecialchars($value,$dir)
+ *  837:     function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')
+ *  919:     function prefixRelPath($prefix,$srcVal,$suffix='')
+ *  937:     function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)
+ *  967:     function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')
+ *  982:     function unprotectTags($content,$tagList='')
+ * 1015:     function stripTagsExcept($value,$tagList)
+ * 1038:     function caseShift($str,$flag,$cacheKey='')
+ * 1065:     function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)
+ * 1093:     function get_tag_attributes_classic($tag,$deHSC=0)
+ * 1106:     function indentLines($content, $number=1, $indentChar="\t")
+ * 1123:     function HTMLparserConfig($TSconfig,$keepTags=array())
+ * 1247:     function XHTML_clean($content)
+ * 1269:     function processTag($value,$conf,$endTag,$protected=0)
+ * 1315:     function processContent($value,$dir,$conf)
  *
- * TOTAL FUNCTIONS: 26
+ * TOTAL FUNCTIONS: 28
  * (This index is automatically created/updated by the extension "extdeveval")
  *
  */
 
 
 /**
- * Functions for parsing HTML. 
+ * Functions for parsing HTML.
  * You are encouraged to use this class in your own applications
- * 
- * @author     Kasper Skaarhoj <kasper@typo3.com>
+ *
+ * @author     Kasper Skaarhoj <kasperYYYY@typo3.com>
  * @package TYPO3
  * @subpackage t3lib
  */
-class t3lib_parsehtml {
-       var $caseShift_cache=array();
-
+class t3lib_parsehtml  {
 
-       // *******************************************'
-       // COPY FROM class.tslib_content.php: / BEGIN
-       // substituteSubpart 
-       // Cleaned locally 2/2003 !!!! (so different from tslib_content version)
-       // *******************************************'
+       protected $caseShift_cache = array();
 
        /**
-        * Returns the first subpart encapsulated in the marker, $marker (possibly present in $content as a HTML comment)
-        * 
+        * Returns the first subpart encapsulated in the marker, $marker
+        * (possibly present in $content as a HTML comment)
+        *
         * @param       string          Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
         * @param       string          Marker string, eg. "###CONTENT_PART###"
-        * @return      string          
+        * @return      string
         */
-       function getSubpart($content, $marker)  {
-               if ($marker && strstr($content,$marker))        {
-                       $start = strpos($content, $marker)+strlen($marker);
-                       $stop = @strpos($content, $marker, $start+1);
-                       $sub = substr($content, $start, $stop-$start);
-       
-                       $reg=Array();           
-                       ereg('^[^<]*-->',$sub,$reg);
-                       $start+=strlen($reg[0]);
-                       
-                       $reg=Array();           
-                       ereg('<!--[^>]*$',$sub,$reg);
-                       $stop-=strlen($reg[0]);
-                       
-                       return substr($content, $start, $stop-$start);
+       public static function getSubpart($content, $marker) {
+               $start = strpos($content, $marker);
+
+               if ($start === false) {
+                       return '';
+               }
+
+               $start += strlen($marker);
+               $stop   = strpos($content, $marker, $start);
+
+                       // Q: What shall get returned if no stop marker is given
+                       // /*everything till the end*/ or nothing?
+               if ($stop===false) {
+                       return ''; /*substr($content, $start)*/
+               }
+
+               $content = substr($content, $start, $stop-$start);
+
+               $matches = array();
+               if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches) === 1) {
+                       return $matches[2];
+               }
+
+               $matches = array(); // resetting $matches
+               if (preg_match('/(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches) === 1) {
+                       return $matches[1];
+               }
+
+               $matches = array(); // resetting $matches
+               if (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $content, $matches) === 1) {
+                       return $matches[2];
                }
+
+               return $content;
        }
 
        /**
         * Substitutes a subpart in $content with the content of $subpartContent.
-        * 
+        *
         * @param       string          Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
         * @param       string          Marker string, eg. "###CONTENT_PART###"
         * @param       array           If $subpartContent happens to be an array, it's [0] and [1] elements are wrapped around the content of the subpart (fetched by getSubpart())
@@ -142,65 +161,189 @@ class t3lib_parsehtml {
         * @param       boolean         If set, the marker around the subpart is not removed, but kept in the output
         * @return      string          Processed input content
         */
-       function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) {
+       public static function substituteSubpart($content, $marker, $subpartContent, $recursive = 1, $keepMarker = 0) {
                $start = strpos($content, $marker);
-               $stop = @strpos($content, $marker, $start+1)+strlen($marker);
-               if ($start && $stop>$start)     {
-                       // code before
-                       $before = substr($content, 0, $start);
-                       $reg=Array();           
-                       ereg('<!--[^>]*$',$before,$reg);
-                       $start-=strlen($reg[0]);
-                       if ($keepMarker)        {
-                               $reg_k=Array();
-                               if ($reg[0])    ereg('^[^>]*-->',substr($content,$start),$reg_k);
-                               $before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker));
+
+               if ($start === false) {
+                       return $content;
+               }
+
+               $startAM = $start + strlen($marker);
+               $stop    = strpos($content, $marker, $startAM);
+
+               if ($stop===false) {
+                       return $content;
+               }
+
+               $stopAM  = $stop + strlen($marker);
+               $before  = substr($content, 0, $start);
+               $after   = substr($content, $stopAM);
+               $between = substr($content, $startAM, $stop-$startAM);
+
+               if ($recursive) {
+                       $after = t3lib_parsehtml::substituteSubpart(
+                               $after,
+                               $marker,
+                               $subpartContent,
+                               $recursive,
+                               $keepMarker
+                       );
+               }
+
+               if ($keepMarker) {
+                       $matches = array();
+                       if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
+                               $before  .= $marker.$matches[1];
+                               $between  = $matches[2];
+                               $after    = $matches[3] . $marker . $after;
+                       } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
+                               $before  .= $marker;
+                               $between  = $matches[1];
+                               $after    = $matches[2] . $marker . $after;
+                       } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches) === 1) {
+                               $before  .= $marker . $matches[1];
+                               $between  = $matches[2];
+                               $after    = $marker . $after;
+                       } else  {
+                               $before .= $marker;
+                               $after   = $marker . $after;
                        }
-                       $before = substr($content, 0, $start);
-                               // code after
-                       $after = substr($content, $stop);
-                       $reg=Array();           
-                       ereg('^[^<]*-->',$after,$reg);
-                       $stop+=strlen($reg[0]);
-                       if ($keepMarker)        {
-                               $reg_k=Array();
-                               if ($reg[0])    ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k);
-                               $sLen = strlen($reg_k[0]?$reg_k[0]:$marker);
-                               $after_marker = substr($content, $stop-$sLen,$sLen);
+
+               } else {
+                       $matches = array();
+                       if (preg_match('/^(.*)\<\!\-\-[^\>]*$/s', $before, $matches) === 1) {
+                               $before = $matches[1];
                        }
-                       $after = substr($content, $stop);
 
-                       
-                               // replace?
-                       if (is_array($subpartContent))  {
-                               $substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1];
-                       } else {
-                               $substContent=$subpartContent;
+                       if (is_array($subpartContent)) {
+                               $matches = array();
+                               if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
+                                       $between = $matches[2];
+                               } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) {
+                                       $between = $matches[1];
+                               } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1) {
+                                       $between = $matches[2];
+                               }
                        }
 
-                       if ($recursive && strpos($after, $marker))      {
-                               return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent);
-                       } else {
-                               return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after;
+                       $matches = array(); // resetting $matches
+                       if (preg_match('/^[^\<]*\-\-\>(.*)$/s', $after, $matches) === 1) {
+                               $after = $matches[1];
                        }
-               } else {
-                       return $content;
                }
+
+               if (is_array($subpartContent)) {
+                       $between = $subpartContent[0] . $between . $subpartContent[1];
+               } else  {
+                       $between = $subpartContent;
+               }
+
+               return $before . $between . $after;
        }
-       // *******************************************'
-       // COPY FROM class.tslib_content.php: / END
-       // *******************************************'
 
+       /**
+        * Substitues multiple subparts at once
+        *
+        * @param       string          The content stream, typically HTML template content.
+        * @param       array           The array of key/value pairs being subpart/content values used in the substitution. For each element in this array the function will substitute a subpart in the content stream with the content.
+        * @return      string          The processed HTML content string.
+        */
+       public static function substituteSubpartArray($content, array $subpartsContent) {
+               foreach ($subpartsContent as $subpartMarker => $subpartContent) {
+                       $content = t3lib_parsehtml::substituteSubpart(
+                               $content,
+                               $subpartMarker,
+                               $subpartContent
+                       );
+               }
 
+               return $content;
+       }
 
 
+       /**
+        * Substitutes a marker string in the input content
+        * (by a simple str_replace())
+        *
+        * @param       string          The content stream, typically HTML template content.
+        * @param       string          The marker string, typically on the form "###[the marker string]###"
+        * @param       mixed           The content to insert instead of the marker string found.
+        * @return      string          The processed HTML content string.
+        * @see substituteSubpart()
+        */
+       public static function substituteMarker($content, $marker, $markContent) {
+               return str_replace($marker, $markContent, $content);
+       }
 
 
        /**
+        * Traverses the input $markContentArray array and for each key the marker
+        * by the same name (possibly wrapped and in upper case) will be
+        * substituted with the keys value in the array. This is very useful if you
+        * have a data-record to substitute in some content. In particular when you
+        * use the $wrap and $uppercase values to pre-process the markers. Eg. a
+        * key name like "myfield" could effectively be represented by the marker
+        * "###MYFIELD###" if the wrap value was "###|###" and the $uppercase
+        * boolean true.
+        *
+        * @param       string          The content stream, typically HTML template content.
+        * @param       array           The array of key/value pairs being marker/content values used in the substitution. For each element in this array the function will substitute a marker in the content stream with the content.
+        * @param       string          A wrap value - [part 1] | [part 2] - for the markers before substitution
+        * @param       boolean         If set, all marker string substitution is done with upper-case markers.
+        * @param       boolean         If set, all unused marker are deleted.
+        * @return      string          The processed output stream
+        * @see substituteMarker(), substituteMarkerInObject(), TEMPLATE()
+        */
+       public static function substituteMarkerArray($content, $markContentArray, $wrap = '', $uppercase = 0, $deleteUnused = 0) {
+               if (is_array($markContentArray)) {
+                       $wrapArr = t3lib_div::trimExplode('|', $wrap);
+
+                       foreach ($markContentArray as $marker => $markContent) {
+                               if ($uppercase) {
+                                               // use strtr instead of strtoupper to avoid locale problems with Turkish
+                                       $marker = strtr(
+                                               $marker,
+                                               'abcdefghijklmnopqrstuvwxyz',
+                                               'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                                       );
+                               }
+
+                               if (count($wrapArr) > 0) {
+                                       $marker = $wrapArr[0] . $marker . $wrapArr[1];
+                               }
+
+                               $content = str_replace($marker, $markContent, $content);
+                       }
+
+                       if ($deleteUnused) {
+                               if (empty($wrap)) {
+                                       $wrapArr = array('###', '###');
+                               }
+
+                               $content = preg_replace('/'.preg_quote($wrapArr[0]).'([A-Z0-9_-|]*)'.preg_quote($wrapArr[1]).'/is', '', $content);
+                       }
+               }
+
+               return $content;
+       }
+
+
+
+
+
+
+
+       /************************************
+        *
+        * Parsing HTML code
+        *
+        ************************************/
+
+       /**
         * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
-        * Even numbers in the array are outside the blocks, Odd numbers are block-content. 
+        * Even numbers in the array are outside the blocks, Odd numbers are block-content.
         * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
-        * 
+        *
         * @param       string          List of tags, comma separated.
         * @param       string          HTML-content
         * @param       boolean         If set, excessive end tags are ignored - you should probably set this in most cases.
@@ -209,10 +352,10 @@ class t3lib_parsehtml {
         */
        function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) {
                $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
-               $regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
+               $regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si';
+
+               $parts = preg_split($regexStr, $content);
 
-               $parts = spliti($regexStr,$content);
-               
                $newParts=array();
                $pointer=strlen($parts[0]);
                $buffer=$parts[0];
@@ -250,30 +393,68 @@ class t3lib_parsehtml {
                                $pointer+=strlen($mbuffer);
                                $buffer.=$mbuffer;
                        }
-                       
+
                }
                $newParts[]=$buffer;
                return $newParts;
        }
 
        /**
+        * Splitting content into blocks *recursively* and processing tags/content with call back functions.
+        *
+        * @param       string          Tag list, see splitIntoBlock()
+        * @param       string          Content, see splitIntoBlock()
+        * @param       object          Object where call back methods are.
+        * @param       string          Name of call back method for content; "function callBackContent($str,$level)"
+        * @param       string          Name of call back method for tags; "function callBackTags($tags,$level)"
+        * @param       integer         Indent level
+        * @return      string          Processed content
+        * @see splitIntoBlock()
+        */
+       function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)   {
+               $parts = $this->splitIntoBlock($tag,$content,TRUE);
+               foreach($parts as $k => $v)     {
+                       if ($k%2)       {
+                               $firstTagName = $this->getFirstTagName($v, TRUE);
+                               $tagsArray = array();
+                               $tagsArray['tag_start'] = $this->getFirstTag($v);
+                               $tagsArray['tag_end'] = '</'.$firstTagName.'>';
+                               $tagsArray['tag_name'] = strtolower($firstTagName);
+                               $tagsArray['add_level'] = 1;
+                               $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
+
+                               if ($callBackTags)      $tagsArray = $procObj->$callBackTags($tagsArray,$level);
+
+                               $parts[$k] =
+                                       $tagsArray['tag_start'].
+                                       $tagsArray['content'].
+                                       $tagsArray['tag_end'];
+                       } else {
+                               if ($callBackContent)   $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
+                       }
+               }
+
+               return implode('',$parts);
+       }
+
+       /**
         * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
-        * Even numbers in the array are outside the blocks, Odd numbers are block-content. 
+        * Even numbers in the array are outside the blocks, Odd numbers are block-content.
         * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
-        * 
+        *
         * @param       string          List of tags
         * @param       string          HTML-content
         * @return      array           Even numbers in the array are outside the blocks, Odd numbers are block-content.
         * @see splitIntoBlock(), getAllParts(), removeFirstAndLastTag()
         */
        function splitTags($tag,$content)       {
-               $tags=t3lib_div::trimExplode(',',$tag,1);
-               $regexStr = '<('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
-               $parts = spliti($regexStr,$content);
-               
-               $pointer=strlen($parts[0]);
-               $newParts=array();
-               $newParts[]=$parts[0];
+               $tags = t3lib_div::trimExplode(',',$tag,1);
+               $regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si';
+               $parts = preg_split($regexStr, $content);
+
+               $pointer = strlen($parts[0]);
+               $newParts = array();
+               $newParts[] = $parts[0];
                reset($parts);
                next($parts);
                while(list($k,$v)=each($parts)) {
@@ -281,19 +462,19 @@ class t3lib_parsehtml {
 
                                // Set tag:
                        $tag = substr($content,$pointer,$tagLen);       // New buffer set and pointer increased
-                       $newParts[]=$tag;
-                       $pointer+=strlen($tag);
+                       $newParts[] = $tag;
+                       $pointer+= strlen($tag);
 
                                // Set content:
-                       $newParts[]=$v;
-                       $pointer+=strlen($v);
+                       $newParts[] = $v;
+                       $pointer+= strlen($v);
                }
                return $newParts;
        }
 
        /**
         * Returns an array with either tag or non-tag content of the result from ->splitIntoBlock()/->splitTags()
-        * 
+        *
         * @param       array           Parts generated by ->splitIntoBlock() or >splitTags()
         * @param       boolean         Whether to return the tag-parts (default,true) or what was outside the tags.
         * @param       boolean         Whether to include the tags in the tag-parts (most useful for input made by ->splitIntoBlock())
@@ -301,9 +482,8 @@ class t3lib_parsehtml {
         * @see splitIntoBlock(), splitTags()
         */
        function getAllParts($parts,$tag_parts=1,$include_tag=1)        {
-               reset($parts);
                $newParts=array();
-               while(list($k,$v)=each($parts)) {
+               foreach ($parts as $k => $v)    {
                        if (($k+($tag_parts?0:1))%2)    {
                                if (!$include_tag)      $v=$this->removeFirstAndLastTag($v);
                                $newParts[]=$v;
@@ -314,52 +494,126 @@ class t3lib_parsehtml {
 
        /**
         * Removes the first and last tag in the string
-        * Anything before and after the first and last tags respectively is also removed
-        * 
+        * Anything before the first and after the last tags respectively is also removed
+        *
         * @param       string          String to process
-        * @return      string          
+        * @return      string
         */
        function removeFirstAndLastTag($str)    {
-                       // First:
-               $endLen = strcspn($str,'>')+1;
-               $str = substr($str,$endLen);
-                       // Last:
-               $str = strrev($str);
-               $endLen = strcspn($str,'<')+1;
-               $str = substr($str,$endLen);
+                       // End of first tag:
+               $start = strpos($str,'>');
+                       // Begin of last tag:
+               $end = strrpos($str,'<');
                        // return
-               return strrev($str);
+               return substr($str, $start+1, $end-$start-1);
        }
 
        /**
         * Returns the first tag in $str
         * Actually everything from the begining of the $str is returned, so you better make sure the tag is the first thing...
-        * 
+        *
         * @param       string          HTML string with tags
-        * @return      string          
+        * @return      string
         */
        function getFirstTag($str)      {
                        // First:
-               $endLen = strcspn($str,'>')+1;
-               $str = substr($str,0,$endLen);
-               return $str;
+               $endLen = strpos($str,'>')+1;
+               return substr($str,0,$endLen);
        }
 
        /**
         * Returns the NAME of the first tag in $str
-        * 
+        *
         * @param       string          HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do)
         * @param       boolean         If set, then the tag is NOT converted to uppercase by case is preserved.
         * @return      string          Tag name in upper case
         * @see getFirstTag()
         */
        function getFirstTagName($str,$preserveCase=FALSE)      {
-               list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2);
-               if (!$preserveCase)     $tag = strtoupper($tag);
-                       
-               return trim($tag);
+               $matches = array();
+               if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1)        {
+                       if (!$preserveCase)     {
+                               return strtoupper($matches[1]);
+                       }
+                       return $matches[1];
+               }
+               return '';
+       }
+
+       /**
+        * Returns an array with all attributes as keys. Attributes are only lowercase a-z
+        * If a attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset()
+        *
+        * @param       string          Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>')
+        * @param       boolean         If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set!
+        * @return      array           array(Tag attributes,Attribute meta-data)
+        */
+       function get_tag_attributes($tag,$deHSC=0)      {
+               list($components,$metaC) = $this->split_tag_attributes($tag);
+               $name = '';      // attribute name is stored here
+               $valuemode = false;
+               $attributes = array();
+               $attributesMeta = array();
+               if (is_array($components))      {
+                       foreach ($components as $key => $val)   {
+                               if ($val != '=')        {       // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
+                                       if ($valuemode) {
+                                               if ($name)      {
+                                                       $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
+                                                       $attributesMeta[$name]['dashType']=$metaC[$key];
+                                                       $name = '';
+                                               }
+                                       } else {
+                                               if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val))     {
+                                                       $name = strtolower($namekey);
+                                                       $attributesMeta[$name]=array();
+                                                       $attributesMeta[$name]['origTag']=$namekey;
+                                                       $attributes[$name] = '';
+                                               }
+                                       }
+                                       $valuemode = false;
+                               } else {
+                                       $valuemode = true;
+                               }
+                       }
+                       return array($attributes,$attributesMeta);
+               }
+       }
+
+       /**
+        * Returns an array with the 'components' from an attribute list. The result is normally analyzed by get_tag_attributes
+        * Removes tag-name if found
+        *
+        * @param       string          The tag or attributes
+        * @return      array
+        * @access private
+        * @see t3lib_div::split_tag_attributes()
+        */
+       function split_tag_attributes($tag)     {
+               $matches = array();
+               if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1)  {
+                       return array(array(), array());
+               }
+               $tag_tmp = $matches[2];
+
+               $metaValue = array();
+               $value = array();
+               $matches = array();
+               if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0)     {
+                       foreach ($matches[1] as $part)  {
+                               $firstChar = substr($part, 0, 1);
+                               if ($firstChar=='"' || $firstChar=="'") {
+                                       $metaValue[] = $firstChar;
+                                       $value[] = substr($part, 1, -1);
+                               } else  {
+                                       $metaValue[] = '';
+                                       $value[] = $part;
+                               }
+                       }
+               }
+               return array($value,$metaValue);
        }
-       
+
        /**
         * Checks whether block/solo tags are found in the correct amounts in HTML content
         * Block tags are tags which are required to have an equal amount of start and end tags, eg. "<table>...</table>"
@@ -367,7 +621,7 @@ class t3lib_parsehtml {
         * NOTICE: Correct XHTML might actually fail since "<br></br>" is allowed as well as "<br/>". However only the LATTER is accepted by this function (with "br" in the "solo-tag" list), the first example will result in a warning.
         * NOTICE: Correct XHTML might actually fail since "<p/>" is allowed as well as "<p></p>". However only the LATTER is accepted by this function (with "p" in the "block-tag" list), the first example will result in an ERROR!
         * NOTICE: Correct HTML version "something" allows eg. <p> and <li> to be NON-ended (implicitly ended by other tags). However this is NOT accepted by this function (with "p" and "li" in the block-tag list) and it will result in an ERROR!
-        * 
+        *
         * @param       string          HTML content to analyze
         * @param       string          Tag names for block tags (eg. table or div or p) in lowercase, commalist (eg. "table,div,p")
         * @param       string          Tag names for solo tags (eg. img, br or input) in lowercase, commalist ("img,br,input")
@@ -381,12 +635,12 @@ class t3lib_parsehtml {
                $analyzedOutput['warnings']=array();    // Lists warnings.
                $analyzedOutput['blocks']=array();      // Lists stats for block-tags
                $analyzedOutput['solo']=array();        // Lists stats for solo-tags
-               
+
                        // Block tags, must have endings...
                $blockTags = explode(',',$blockTags);
                foreach($blockTags as $tagName) {
-                       $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
-                       $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
+                       $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
+                       $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
                        $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
                        if ($countBegin)        $analyzedOutput['counts'][$tagName]=$countBegin;
                        if ($countBegin-$countEnd)      {
@@ -397,37 +651,56 @@ class t3lib_parsehtml {
                                }
                        }
                }
-               
+
                        // Solo tags, must NOT have endings...
                $soloTags = explode(',',$soloTags);
                foreach($soloTags as $tagName)  {
-                       $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
-                       $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
+                       $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
+                       $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
                        $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
                        if ($countBegin)        $analyzedOutput['counts'][$tagName]=$countBegin;
                        if ($countEnd)  {
                                $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
                        }
                }
-               
+
                return $analyzedOutput;
-       }       
+       }
+
+
+
+
+
+
+
+
+
+
+
+
+       /*********************************
+        *
+        * Clean HTML code
+        *
+        *********************************/
 
        /**
         * Function that can clean up HTML content according to configuration given in the $tags array.
-        * 
-        * Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this:            $tags = array_flip(explode(',','b,a,i,u'))     
+        *
+        * Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this:            $tags = array_flip(explode(',','b,a,i,u'))
         * If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options:
-        *         
+        *
         *      $tags[$tagname] = Array(
         *              'overrideAttribs' => ''         If set, this string is preset as the attributes of the tag
         *              'allowedAttribs' =>   '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed.
         *              'fixAttrib' => Array(
         *                      '[attribute name]' => Array (
+        *                              'set' => Force the attribute value to this value.
+        *                              'unset' => Boolean: If set, the attribute is unset.
         *                              'default' =>    If no attribute exists by this name, this value is set as default value (if this value is not blank)
         *                              'always' =>     Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists
         *                              'trim,intval,lower,upper' =>    All booleans. If any of these keys are set, the value is passed through the respective PHP-functions.
-        *                              'range' => Array ('[low limit]','[high limit, optional]')               Setting integer range. 
+        *                              'range' => Array ('[low limit]','[high limit, optional]')               Setting integer range.
         *                              'list' => Array ('[value1/default]','[value2]','[value3]')              Attribute must be in this list. If not, the value is set to the first element.
         *                              'removeIfFalse' =>      Boolean/'blank'.        If set, then the attribute is removed if it is 'false'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed)
         *                              'removeIfEquals' =>     [value] If the attribute value matches the value set here, then it is removed.
@@ -439,7 +712,7 @@ class t3lib_parsehtml {
         *              'rmTagIfNoAttrib' => '',        Boolean. If set, then the tag is removed if no attributes happend to be there.
         *              'nesting' => '',        Boolean/'global'. If set true, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>'
         *      )
-        * 
+        *
         * @param       string          $content; is the HTML-content being processed. This is also the result being returned.
         * @param       array           $tags; is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure.
         * @param       string          $keepAll; boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to &lt; and &gt;
@@ -450,31 +723,54 @@ class t3lib_parsehtml {
        function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())      {
                $newContent = array();
                $tokArr = explode('<',$content);
-               $newContent[]=$this->processContent(current($tokArr),$hSC,$addConfig);
+               $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
                next($tokArr);
 
-               $c=1;
-               $tagRegister=array();
-               $tagStack=array();
+               $c = 1;
+               $tagRegister = array();
+               $tagStack = array();
+               $inComment = false; $skipTag = false;
                while(list(,$tok)=each($tokArr))        {
+                       if ($inComment) {
+                               if (($eocPos = strpos($tok, '-->')) === false) {
+                                       // End of comment is not found in the token. Go futher until end of comment is found in other tokens.
+                                       $newContent[$c++] = '<' . $tok;
+                                       continue;
+                               }
+                               // Comment ends in the middle of the token: add comment and proceed with rest of the token
+                               $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
+                               $tok = substr($tok, $eocPos + 3);
+                               $inComment = false; $skipTag = true;
+                       }
+                       elseif (substr($tok, 0, 3) == '!--') {
+                               if (($eocPos = strpos($tok, '-->')) === false) {
+                                       // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
+                                       $newContent[$c++] = '<' . $tok;
+                                       $inComment = true;
+                                       continue;
+                               }
+                               // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
+                               $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
+                               $tok = substr($tok, $eocPos + 3);
+                               $skipTag = true;
+                       }
                        $firstChar = substr($tok,0,1);
-#                      if (strcmp(trim($firstChar),''))        {               // It is a tag...
-                       if (ereg('[[:alnum:]\/]',$firstChar))   {               // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004)
-                               $tagEnd = strcspn($tok,'>');
-                               if (strlen($tok)!=$tagEnd)      {       // If there is and end-bracket...
+                       if (!$skipTag && preg_match('/[[:alnum:]\/]/',$firstChar)==1)   {               // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
+                               $tagEnd = strpos($tok,'>');
+                               if ($tagEnd)    {       // If there is and end-bracket...       tagEnd can't be 0 as the first character can't be a >
                                        $endTag = $firstChar=='/' ? 1 : 0;
                                        $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
-                                       $tagParts = split('[[:space:]]',$tagContent,2);
+                                       $tagParts = preg_split('/\s+/s',$tagContent,2);
                                        $tagName = strtolower($tagParts[0]);
                                        if (isset($tags[$tagName]))     {
                                                if (is_array($tags[$tagName]))  {       // If there is processing to do for the tag:
-                                               
+
                                                        if (!$endTag)   {       // If NOT an endtag, do attribute processing (added dec. 2003)
                                                                        // Override attributes
                                                                if (strcmp($tags[$tagName]['overrideAttribs'],''))      {
                                                                        $tagParts[1]=$tags[$tagName]['overrideAttribs'];
                                                                }
-                                                       
+
                                                                        // Allowed tags
                                                                if (strcmp($tags[$tagName]['allowedAttribs'],''))       {
                                                                        if (!strcmp($tags[$tagName]['allowedAttribs'],'0'))     {       // No attribs allowed
@@ -483,20 +779,25 @@ class t3lib_parsehtml {
                                                                                $tagAttrib = $this->get_tag_attributes($tagParts[1]);
                                                                                $tagParts[1]='';
                                                                                $newTagAttrib = array();
-                                                                               $tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
-                                                                               while(list(,$allowTag)=each($tList))    {
+                                                                               if (!($tList = $tags[$tagName]['_allowedAttribs']))     {
+                                                                                               // Just explode attribts for tag once
+                                                                                       $tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
+                                                                               }
+                                                                               foreach ($tList as $allowTag)   {
                                                                                        if (isset($tagAttrib[0][$allowTag]))    $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
                                                                                }
                                                                                $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
                                                                        }
                                                                }
-                                                                       
+
                                                                        // Fixed attrib values
                                                                if (is_array($tags[$tagName]['fixAttrib']))     {
                                                                        $tagAttrib = $this->get_tag_attributes($tagParts[1]);
                                                                        $tagParts[1]='';
                                                                        reset($tags[$tagName]['fixAttrib']);
                                                                        while(list($attr,$params)=each($tags[$tagName]['fixAttrib']))   {
+                                                                               if (strlen($params['set']))     $tagAttrib[0][$attr] = $params['set'];
+                                                                               if (strlen($params['unset']))   unset($tagAttrib[0][$attr]);
                                                                                if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr]))      $tagAttrib[0][$attr]=$params['default'];
                                                                                if ($params['always'] || isset($tagAttrib[0][$attr]))   {
                                                                                        if ($params['trim'])    {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
@@ -557,7 +858,7 @@ class t3lib_parsehtml {
                                                                // rmTagIfNoAttrib
                                                        if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib'])       {
                                                                $setTag=1;
-                                                               
+
                                                                if ($tags[$tagName]['nesting']) {
                                                                        if (!is_array($tagRegister[$tagName]))  $tagRegister[$tagName]=array();
 
@@ -566,7 +867,7 @@ class t3lib_parsehtml {
                                                                                        $lastEl = end($tagStack);
                                                                                        $correctTag = !strcmp($tagName,$lastEl);
                                                                                } else $correctTag=1;
-       */                                                                      
+       */
                                                                                $correctTag=1;
                                                                                if ($tags[$tagName]['nesting']=='global')       {
                                                                                        $lastEl = end($tagStack);
@@ -575,13 +876,13 @@ class t3lib_parsehtml {
                                                                                                        while(count($tagStack) && strcmp($tagName,$lastEl))     {
                                                                                                                $elPos = end($tagRegister[$lastEl]);
                                                                                                                unset($newContent[$elPos]);
-                                                                                                               
+
                                                                                                                array_pop($tagRegister[$lastEl]);
                                                                                                                array_pop($tagStack);
                                                                                                                $lastEl = end($tagStack);
                                                                                                        }
                                                                                                } else {
-                                                                                                       $correctTag=0;  // In this case the 
+                                                                                                       $correctTag=0;  // In this case the
                                                                                                }
                                                                                        }
                                                                                }
@@ -596,7 +897,7 @@ class t3lib_parsehtml {
                                                                                if ($tags[$tagName]['nesting']=='global')       {array_push($tagStack,$tagName);}
                                                                        }
                                                                }
-                                                               
+
                                                                if ($setTag)    {
                                                                                // Setting the tag
                                                                        $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='&lt;');
@@ -618,15 +919,14 @@ class t3lib_parsehtml {
                                        $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);      // There were not end-bracket, so no tag...
                                }
                        } else {
-                               $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);      // It was not a tag anyways
+                               $newContent[$c++]=$this->processContent(($skipTag ? '' : '<') . $tok, $hSC, $addConfig);        // It was not a tag anyways
+                               $skipTag = false;
                        }
                }
-               
+
                        // Unsetting tags:
-               reset($tagRegister);
-               while(list($tag,$positions)=each($tagRegister)) {
-                       reset($positions);
-                       while(list(,$pKey)=each($positions))    {
+               foreach ($tagRegister as $tag => $positions)    {
+                       foreach ($positions as $pKey)   {
                                unset($newContent[$pKey]);
                        }
                }
@@ -635,88 +935,8 @@ class t3lib_parsehtml {
        }
 
        /**
-        * Returns an array with all attributes as keys. Attributes are only lowercase a-z
-        * If a attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset()
-        * 
-        * @param       string          Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>')
-        * @param       boolean         If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set!
-        * @return      array           array(Tag attributes,Attribute meta-data)
-        */
-       function get_tag_attributes($tag,$deHSC=0)      {
-               list($components,$metaC) = $this->split_tag_attributes($tag);
-               $name = '';      // attribute name is stored here
-               $valuemode = '';
-               $attributes=array();
-               $attributesMeta=array();
-               if (is_array($components))      {
-                       while (list($key,$val) = each ($components))    {
-                               if ($val != '=')        {       // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
-                                       if ($valuemode) {
-                                               if ($name)      {
-                                                       $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
-                                                       $attributesMeta[$name]['dashType']=$metaC[$key];
-                                                       $name = '';
-                                               }
-                                       } else {
-                                               if ($namekey = ereg_replace('[^a-zA-Z0-9_-]','',$val))  {
-                                                       $name = strtolower($namekey);
-                                                       $attributesMeta[$name]=array();
-                                                       $attributesMeta[$name]['origTag']=$namekey;
-                                                       $attributes[$name] = '';
-                                               }
-                                       }
-                                       $valuemode = '';
-                               } else {
-                                       $valuemode = 'on';
-                               }
-                       }
-                       if (is_array($attributes))      reset($attributes);
-                       return array($attributes,$attributesMeta);
-               }
-       }
-       
-       /**
-        * Returns an array with the 'components' from an attribute list. The result is normally analyzed by get_tag_attributes
-        * Removes tag-name if found
-        * 
-        * @param       string          The tag or attributes
-        * @return      array           
-        * @access private
-        * @see t3lib_div::split_tag_attributes()
-        */
-       function split_tag_attributes($tag)     {
-               $tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag)));
-                       // Removes any > in the end of the string
-               $tag_tmp = trim(eregi_replace ('>$','',$tag_tmp));
-
-               $metaValue = array();
-               $value = array();
-               while (strcmp($tag_tmp,''))     {       // Compared with empty string instead , 030102
-                       $firstChar=substr($tag_tmp,0,1);
-                       if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'")) {
-                               $reg=explode($firstChar,$tag_tmp,3);
-                               $value[]=$reg[1];
-                               $metaValue[]=$firstChar;
-                               $tag_tmp=trim($reg[2]);
-                       } elseif (!strcmp($firstChar,'=')) {
-                               $value[] = '=';
-                               $metaValue[]='';
-                               $tag_tmp = trim(substr($tag_tmp,1));            // Removes = chars.
-                       } else {
-                                       // There are '' around the value. We look for the next ' ' or '>'
-                               $reg = split('[[:space:]=]',$tag_tmp,2);
-                               $value[] = trim($reg[0]);
-                               $metaValue[]='';
-                               $tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]);
-                       }
-               }
-               if (is_array($value))   reset($value);
-               return array($value,$metaValue);
-       }
-       
-       /**
         * Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1)
-        * 
+        *
         * @param       string          Input value
         * @param       integer         Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1)
         * @return      string          Output value
@@ -737,16 +957,17 @@ class t3lib_parsehtml {
 
        /**
         * Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives
-        * 
+        *
         * @param       string          Prefix string
         * @param       string          HTML content
         * @param       array           Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase
+        * @param       string          Suffix string (put after the resource).
         * @return      string          Processed HTML content
         */
-       function prefixResourcePath($main_prefix,$content,$alternatives=array())        {
-               $parts = $this->splitTags('td,table,body,img,input,form,link,script,a',$content);
-               reset($parts);
-               while(list($k,$v)=each($parts)) {
+       function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')     {
+
+               $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
+               foreach ($parts as $k => $v)    {
                        if ($k%2)       {
                                $params = $this->get_tag_attributes($v,1);
                                $tagEnd = substr($v,-2)=='/>' ? ' />' : '>';    // Detect tag-ending so that it is re-applied correctly.
@@ -760,7 +981,7 @@ class t3lib_parsehtml {
                                        case 'table':
                                                $src = $params[0]['background'];
                                                if ($src)       {
-                                                       $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background']);
+                                                       $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
                                                        $somethingDone=1;
                                                }
                                        break;
@@ -768,9 +989,10 @@ class t3lib_parsehtml {
                                        case 'img':
                                        case 'input':
                                        case 'script':
+                                       case 'embed':
                                                $src = $params[0]['src'];
                                                if ($src)       {
-                                                       $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src']);
+                                                       $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
                                                        $somethingDone=1;
                                                }
                                        break;
@@ -778,7 +1000,7 @@ class t3lib_parsehtml {
                                        case 'a':
                                                $src = $params[0]['href'];
                                                if ($src)       {
-                                                       $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href']);
+                                                       $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
                                                        $somethingDone=1;
                                                }
                                        break;
@@ -786,43 +1008,56 @@ class t3lib_parsehtml {
                                        case 'form':
                                                $src = $params[0]['action'];
                                                if ($src)       {
-                                                       $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action']);
+                                                       $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
                                                        $somethingDone=1;
                                                }
                                        break;
                                }
                                if ($somethingDone)     {
-                                       $tagParts = split('[[:space:]]',$v,2);
+                                       $tagParts = preg_split('/\s+/s',$v,2);
                                        $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
-                                       $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).
-                                                                       $tagEnd;
+                                       $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd;
                                }
                        }
                }
-               
-               return implode('',$parts);
-       }               
-       
+               $content = implode('',$parts);
+
+                       // Fix <style> section:
+               $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
+               if (strlen($prefix))    {
+                       $parts = $this->splitIntoBlock('style',$content);
+                       foreach($parts as $k => $v)     {
+                               if ($k%2)       {
+                                       $parts[$k] = preg_replace('/(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))/i','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
+                               }
+                       }
+                       $content = implode('',$parts);
+               }
+
+               return $content;
+       }
+
        /**
         * Internal sub-function for ->prefixResourcePath()
-        * 
+        *
         * @param       string          Prefix string
         * @param       string          Relative path/URL
+        * @param       string          Suffix string
         * @return      string          Output path, prefixed if no scheme in input string
         * @access private
         */
-       function prefixRelPath($prefix,$srcVal) {
+       function prefixRelPath($prefix,$srcVal,$suffix='')      {
                $pU = parse_url($srcVal);
-               if (!$pU['scheme'])             {       // If not an absolute URL.
-                       $srcVal = $prefix.$srcVal;
+               if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/')       { // If not an absolute URL.
+                       $srcVal = $prefix.$srcVal.$suffix;
                }
                return $srcVal;
        }
 
        /**
-        * Cleans up the input $value for fonttags. 
+        * Cleans up the input $value for fonttags.
         * If keepFace,-Size and -Color is set then font-tags with an allowed property is kept. Else deleted.
-        * 
+        *
         * @param       string          HTML content with font-tags inside to clean up.
         * @param       boolean         If set, keep "face" attribute
         * @param       boolean         If set, keep "size" attribute
@@ -831,15 +1066,14 @@ class t3lib_parsehtml {
         */
        function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)     {
                $fontSplit = $this->splitIntoBlock('font',$value);      // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
-               reset($fontSplit);
-               while(list($k,$v)=each($fontSplit))     {
+               foreach ($fontSplit as $k => $v)        {
                        if ($k%2)       {       // font:
                                $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
                                $newAttribs=array();
                                if ($keepFace && $attribArray['face'])  $newAttribs[]='face="'.$attribArray['face'].'"';
                                if ($keepSize && $attribArray['size'])  $newAttribs[]='size="'.$attribArray['size'].'"';
                                if ($keepColor && $attribArray['color'])        $newAttribs[]='color="'.$attribArray['color'].'"';
-                                       
+
                                $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
                                if (count($newAttribs)) {
                                        $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
@@ -850,10 +1084,10 @@ class t3lib_parsehtml {
                }
                return implode('',$fontSplit);
        }
-       
+
        /**
         * This is used to map certain tag-names into other names.
-        * 
+        *
         * @param       string          HTML content
         * @param       array           Array with tag key=>value pairs where key is from-tag and value is to-tag
         * @param       string          Alternative less-than char to search for (search regex string)
@@ -861,18 +1095,16 @@ class t3lib_parsehtml {
         * @return      string          Processed HTML content
         */
        function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') {
-       
+
                foreach($tags as $from => $to)  {
-                       $value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value);
-                       $value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value);
-                       $value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value);
+                       $value = preg_replace('/'.preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value);
                }
                return $value;
        }
 
        /**
         * This converts htmlspecialchar()'ed tags (from $tagList) back to real tags. Eg. '&lt;strong&gt' would be converted back to '<strong>' if found in $tagList
-        * 
+        *
         * @param       string          HTML content
         * @param       string          Tag list, separated by comma. Lowercase!
         * @return      string          Processed HTML content
@@ -889,7 +1121,7 @@ class t3lib_parsehtml {
                                if (strlen($tok)!=$tagEnd)      {
                                        $endTag = $firstChar=='/' ? 1 : 0;
                                        $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
-                                       $tagParts = split('[[:space:]]',$tagContent,2);
+                                       $tagParts = preg_split('/\s+/s',$tagContent,2);
                                        $tagName = strtolower($tagParts[0]);
                                        if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))      {
                                                $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
@@ -897,14 +1129,14 @@ class t3lib_parsehtml {
                                } else $contentParts[$k] = '&lt;'.$tok;
                        } else $contentParts[$k] = '&lt;'.$tok;
                }
-               
+
                return implode('',$contentParts);
        }
-       
+
        /**
         * Strips tags except the tags in the list, $tagList
         * OBSOLETE - use PHP function strip_tags()
-        * 
+        *
         * @param       string          Value to process
         * @param       string          List of tags
         * @return      string          Output value
@@ -914,43 +1146,46 @@ class t3lib_parsehtml {
                $tags=t3lib_div::trimExplode(',',$tagList,1);
                $forthArr=array();
                $backArr=array();
-               while(list(,$theTag)=each($tags))       {
+               foreach ($tags as $theTag)      {
                        $forthArr[$theTag]=md5($theTag);
                        $backArr[md5($theTag)]=$theTag;
                }
-                       $value = $this->mapTags($value,$forthArr,'<','_');
-                       $value=strip_tags($value);
-                       $value = $this->mapTags($value,$backArr,'_','<');
+               $value = $this->mapTags($value,$forthArr,'<','_');
+               $value=strip_tags($value);
+               $value = $this->mapTags($value,$backArr,'_','<');
                return $value;
        }
-       
+
        /**
         * Internal function for case shifting of a string or whole array
-        * 
+        *
         * @param       mixed           Input string/array
-        * @param       boolean         If $str is a string AND this boolean is true, the string is returned in uppercase
+        * @param       boolean         If $str is a string AND this boolean(caseSensitive) is false, the string is returned in uppercase
         * @param       string          Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array.
         * @return      string          Output string, processed
         * @access private
         */
        function caseShift($str,$flag,$cacheKey='')     {
+               $cacheKey .= $flag?1:0;
                if (is_array($str))     {
                        if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))    {
                                reset($str);
-                               while(list($k)=each($str))      {
-                                       $str[$k] = strtoupper($str[$k]);
+                               foreach ($str as $k => $v)      {
+                                       if (!$flag)     {
+                                               $str[$k] = strtoupper($v);
+                                       }
                                }
                                if ($cacheKey)  $this->caseShift_cache[$cacheKey]=$str;
                        } else {
                                $str = $this->caseShift_cache[$cacheKey];
                        }
-               } elseif (!$flag)       $str = strtoupper($str);
+               } elseif (!$flag)       { $str = strtoupper($str); }
                return $str;
        }
-       
+
        /**
         * Compiling an array with tag attributes into a string
-        * 
+        *
         * @param       array           Tag attributes
         * @param       array           Meta information about these attributes (like if they were quoted)
         * @param       boolean         If set, then the attribute names will be set in lower case, value quotes in double-quotes and the value will be htmlspecialchar()'ed
@@ -959,8 +1194,7 @@ class t3lib_parsehtml {
         */
        function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)     {
                $accu=array();
-               reset($tagAttrib);
-               while(list($k,$v)=each($tagAttrib))     {
+               foreach ($tagAttrib as $k =>$v) {
                        if ($xhtmlClean)        {
                                $attr=strtolower($k);
                                if (strcmp($v,'') || isset($meta[$k]['dashType']))      {
@@ -977,13 +1211,13 @@ class t3lib_parsehtml {
                }
                return implode(' ',$accu);
        }
-       
+
        /**
         * Get tag attributes, the classic version (which had some limitations?)
-        * 
+        *
         * @param       string          The tag
         * @param       boolean         De-htmlspecialchar flag.
-        * @return      array           
+        * @return      array
         * @access private
         */
        function get_tag_attributes_classic($tag,$deHSC=0)      {
@@ -992,11 +1226,28 @@ class t3lib_parsehtml {
        }
 
        /**
+        * Indents input content with $number instances of $indentChar
+        *
+        * @param       string          Content string, multiple lines.
+        * @param       integer         Number of indents
+        * @param       string          Indent character/string
+        * @return      string          Indented code (typ. HTML)
+        */
+       function indentLines($content, $number=1, $indentChar="\t")     {
+               $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
+               $lines = explode(chr(10),str_replace(chr(13),'',$content));
+               foreach ($lines as $k => $v)    {
+                       $lines[$k] = $preTab.$v;
+               }
+               return implode(chr(10), $lines);
+       }
+
+       /**
         * Converts TSconfig into an array for the HTMLcleaner function.
-        * 
+        *
         * @param       array           TSconfig for HTMLcleaner
         * @param       array           Array of tags to keep (?)
-        * @return      array           
+        * @return      array
         * @access private
         */
        function HTMLparserConfig($TSconfig,$keepTags=array())  {
@@ -1015,7 +1266,7 @@ class t3lib_parsehtml {
                        }
 
                        reset($TSconfig['tags.']);
-                       while(list($key,$tagC)=each($TSconfig['tags.']))        {
+                       foreach ($TSconfig['tags.'] as $key => $tagC)   {
                                if (is_array($tagC) && $key==strtolower($key))  {
                                        $key=substr($key,0,-1);
                                        if (!is_array($keepTags[$key])) $keepTags[$key]=array();
@@ -1083,25 +1334,25 @@ class t3lib_parsehtml {
                                $keepTags[$tn]['rmTagIfNoAttrib']=1;
                        }
                }
-               
+
                        // Create additional configuration:
                $addConfig=array();
                if ($TSconfig['xhtml_cleaning'])        {
                        $addConfig['xhtml']=1;
-               }               
-               
+               }
+
                return array(
-                       $keepTags, 
-                       ''.$TSconfig['keepNonMatchedTags'], 
+                       $keepTags,
+                       ''.$TSconfig['keepNonMatchedTags'],
                        intval($TSconfig['htmlSpecialChars']),
                        $addConfig
                );
        }
-       
+
        /**
         * Tries to convert the content to be XHTML compliant and other stuff like that.
         * STILL EXPERIMENTAL. See comments below.
-        * 
+        *
         *                      What it does NOT do (yet) according to XHTML specs.:
         *                      - Wellformedness: Nesting is NOT checked
         *                      - name/id attribute issue is not observed at this point.
@@ -1112,13 +1363,13 @@ class t3lib_parsehtml {
         *                      - stylesheet element and attribute names are NOT converted to lowercase
         *                      - ampersands (and entities in general I think) MUST be converted to an entity reference! (&amps;). This may mean further conversion of non-tag content before output to page. May be related to the charset issue as a whole.
         *                      - Minimized values not allowed: Must do this: selected="selected"
-        *                      
+        *
         *                      What it does at this point:
         *                      - All tags (frame,base,meta,link + img,br,hr,area,input) is ended with "/>" - others?
         *                      - Lowercase for elements and attributes
         *                      - All attributes in quotes
         *                      - Add "alt" attribute to img-tags if it's not there already.
-        * 
+        *
         * @param       string          Content to clean up
         * @return      string          Cleaned up content returned.
         * @access private
@@ -1131,14 +1382,13 @@ class t3lib_parsehtml {
                        0,                      // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
                        array('xhtml' => 1)
                );
-
                return $content;
        }
 
        /**
         * Processing all tags themselves
         * (Some additions by Sacha Vorbeck)
-        * 
+        *
         * @param       string          Tag to process
         * @param       array           Configuration array passing instructions for processing. If count()==0, function will return value unprocessed. See source code for details
         * @param       boolean         Is endtag, then set this.
@@ -1149,17 +1399,16 @@ class t3lib_parsehtml {
        function processTag($value,$conf,$endTag,$protected=0)  {
                        // Return immediately if protected or no parameters
                if ($protected || !count($conf))        return $value;
-
                        // OK then, begin processing for XHTML output:
                        // STILL VERY EXPERIMENTAL!!
                if ($conf['xhtml'])     {
                        if ($endTag)    {       // Endtags are just set lowercase right away
                                $value = strtolower($value);
-                       } elseif (substr($value,0,2)!='<!') {   // ... and comments are ignored.
+                       } elseif (substr($value,0,4)!='<!--') { // ... and comments are ignored.
                                $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));    // Finding inner value with out < >
-                               list($tagName,$tagP)=split('[[:space:]]',$inValue,2);   // Separate attributes and tagname
+                               list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2);   // Separate attributes and tagname
                                $tagName = strtolower($tagName);
-                               
+
                                        // Process attributes
                                $tagAttrib = $this->get_tag_attributes($tagP);
                                if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt']))            $tagAttrib[0]['alt']='';        // Set alt attribute for all images (not XHTML though...)
@@ -1168,11 +1417,11 @@ class t3lib_parsehtml {
                                reset($tagAttrib[0]);
                                while(list($attrib_name,$attrib_value)=each($tagAttrib[0]))     {
                                                // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
-                                       $outA[]=$attrib_name.'="'.htmlspecialchars($this->bidir_htmlspecialchars($attrib_value,-1)).'"';
+                                       $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
                                }
                                $newTag='<'.trim($tagName.' '.implode(' ',$outA));
                                        // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
-                               if (t3lib_div::inList('img,br,hr,meta,link,base,area,input',$tagName) || substr($value,-2)=='/>')       {
+                               if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>')     {
                                        $newTag.=' />';
                                } else {
                                        $newTag.='>';
@@ -1180,13 +1429,13 @@ class t3lib_parsehtml {
                                $value = $newTag;
                        }
                }
-               
+
                return $value;
        }
-       
+
        /**
         * Processing content between tags for HTML_cleaner
-        * 
+        *
         * @param       string          The value
         * @param       integer         Direction, either -1 or +1. 0 (zero) means no change to input value.
         * @param       mixed           Not used, ignore.
@@ -1204,4 +1453,5 @@ class t3lib_parsehtml {
 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php'])        {
        include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
 }
+
 ?>
\ No newline at end of file