Tiny bug with date function in t3lib_befunc. Casted timestamp to integer.
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml.php
index f89ad31..566c663 100644 (file)
@@ -2,7 +2,7 @@
 /***************************************************************
 *  Copyright notice
 *
-*  (c) 1999-2004 Kasper Skaarhoj (kasper@typo3.com)
+*  (c) 1999-2009 Kasper Skaarhoj (kasperYYYY@typo3.com)
 *  All rights reserved
 *
 *  This script is part of the TYPO3 project. The TYPO3 project is
@@ -30,7 +30,7 @@
  * $Id$
  * Revised for TYPO3 3.6 July/2003 by Kasper Skaarhoj
  *
- * @author     Kasper Skaarhoj <kasper@typo3.com>
+ * @author     Kasper Skaarhoj <kasperYYYY@typo3.com>
  */
 /**
  * [CLASS/FUNCTION INDEX of SCRIPT]
  *
  *  106: class t3lib_parsehtml
  *  123:     function getSubpart($content, $marker)
- *  151:     function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)
+ *  156:     function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)
  *
  *              SECTION: Parsing HTML code
- *  223:     function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)
- *  284:     function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)
- *  320:     function splitTags($tag,$content)
- *  354:     function getAllParts($parts,$tag_parts=1,$include_tag=1)
- *  373:     function removeFirstAndLastTag($str)
- *  392:     function getFirstTag($str)
- *  407:     function getFirstTagName($str,$preserveCase=FALSE)
- *  422:     function get_tag_attributes($tag,$deHSC=0)
- *  464:     function split_tag_attributes($tag)
- *  507:     function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')
+ *  247:     function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)
+ *  308:     function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)
+ *  344:     function splitTags($tag,$content)
+ *  378:     function getAllParts($parts,$tag_parts=1,$include_tag=1)
+ *  396:     function removeFirstAndLastTag($str)
+ *  412:     function getFirstTag($str)
+ *  426:     function getFirstTagName($str,$preserveCase=FALSE)
+ *  445:     function get_tag_attributes($tag,$deHSC=0)
+ *  486:     function split_tag_attributes($tag)
+ *  524:     function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')
  *
  *              SECTION: Clean HTML code
- *  598:     function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())
- *  792:     function bidir_htmlspecialchars($value,$dir)
- *  814:     function prefixResourcePath($main_prefix,$content,$alternatives=array())
- *  882:     function prefixRelPath($prefix,$srcVal)
- *  900:     function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)
- *  931:     function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')
- *  948:     function unprotectTags($content,$tagList='')
- *  981:     function stripTagsExcept($value,$tagList)
- * 1004:     function caseShift($str,$flag,$cacheKey='')
- * 1028:     function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)
- * 1057:     function get_tag_attributes_classic($tag,$deHSC=0)
- * 1070:     function indentLines($content, $number=1, $indentChar="\t")
- * 1087:     function HTMLparserConfig($TSconfig,$keepTags=array())
- * 1211:     function XHTML_clean($content)
- * 1234:     function processTag($value,$conf,$endTag,$protected=0)
- * 1281:     function processContent($value,$dir,$conf)
+ *  617:     function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())
+ *  814:     function bidir_htmlspecialchars($value,$dir)
+ *  837:     function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')
+ *  919:     function prefixRelPath($prefix,$srcVal,$suffix='')
+ *  937:     function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)
+ *  967:     function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')
+ *  982:     function unprotectTags($content,$tagList='')
+ * 1015:     function stripTagsExcept($value,$tagList)
+ * 1038:     function caseShift($str,$flag,$cacheKey='')
+ * 1065:     function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)
+ * 1093:     function get_tag_attributes_classic($tag,$deHSC=0)
+ * 1106:     function indentLines($content, $number=1, $indentChar="\t")
+ * 1123:     function HTMLparserConfig($TSconfig,$keepTags=array())
+ * 1247:     function XHTML_clean($content)
+ * 1269:     function processTag($value,$conf,$endTag,$protected=0)
+ * 1315:     function processContent($value,$dir,$conf)
  *
  * TOTAL FUNCTIONS: 28
  * (This index is automatically created/updated by the extension "extdeveval")
  * Functions for parsing HTML.
  * You are encouraged to use this class in your own applications
  *
- * @author     Kasper Skaarhoj <kasper@typo3.com>
+ * @author     Kasper Skaarhoj <kasperYYYY@typo3.com>
  * @package TYPO3
  * @subpackage t3lib
  */
-class t3lib_parsehtml {
-       var $caseShift_cache=array();
+class t3lib_parsehtml  {
 
-
-       // *******************************************'
-       // COPY FROM class.tslib_content.php: / BEGIN
-       // substituteSubpart
-       // Cleaned locally 2/2003 !!!! (so different from tslib_content version)
-       // *******************************************'
+       protected $caseShift_cache = array();
 
        /**
-        * Returns the first subpart encapsulated in the marker, $marker (possibly present in $content as a HTML comment)
+        * Returns the first subpart encapsulated in the marker, $marker
+        * (possibly present in $content as a HTML comment)
         *
         * @param       string          Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
         * @param       string          Marker string, eg. "###CONTENT_PART###"
         * @return      string
         */
-       function getSubpart($content, $marker)  {
-               if ($marker && strstr($content,$marker))        {
-                       $start = strpos($content, $marker)+strlen($marker);
-                       $stop = @strpos($content, $marker, $start+1);
-                       $sub = substr($content, $start, $stop-$start);
+       public static function getSubpart($content, $marker) {
+               $start = strpos($content, $marker);
+
+               if ($start === false) {
+                       return '';
+               }
+
+               $start += strlen($marker);
+               $stop   = strpos($content, $marker, $start);
 
-                       $reg=Array();
-                       ereg('^[^<]*-->',$sub,$reg);
-                       $start+=strlen($reg[0]);
+                       // Q: What shall get returned if no stop marker is given
+                       // /*everything till the end*/ or nothing?
+               if ($stop===false) {
+                       return ''; /*substr($content, $start)*/
+               }
 
-                       $reg=Array();
-                       ereg('<!--[^>]*$',$sub,$reg);
-                       $stop-=strlen($reg[0]);
+               $content = substr($content, $start, $stop-$start);
 
-                       return substr($content, $start, $stop-$start);
+               $matches = array();
+               if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches) === 1) {
+                       return $matches[2];
                }
+
+               $matches = array(); // resetting $matches
+               if (preg_match('/(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches) === 1) {
+                       return $matches[1];
+               }
+
+               $matches = array(); // resetting $matches
+               if (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $content, $matches) === 1) {
+                       return $matches[2];
+               }
+
+               return $content;
        }
 
        /**
@@ -148,54 +161,171 @@ class t3lib_parsehtml {
         * @param       boolean         If set, the marker around the subpart is not removed, but kept in the output
         * @return      string          Processed input content
         */
-       function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) {
+       public static function substituteSubpart($content, $marker, $subpartContent, $recursive = 1, $keepMarker = 0) {
                $start = strpos($content, $marker);
-               $stop = @strpos($content, $marker, $start+1)+strlen($marker);
-               if ($start && $stop>$start)     {
-                       // code before
-                       $before = substr($content, 0, $start);
-                       $reg=Array();
-                       ereg('<!--[^>]*$',$before,$reg);
-                       $start-=strlen($reg[0]);
-                       if ($keepMarker)        {
-                               $reg_k=Array();
-                               if ($reg[0])    ereg('^[^>]*-->',substr($content,$start),$reg_k);
-                               $before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker));
+
+               if ($start === false) {
+                       return $content;
+               }
+
+               $startAM = $start + strlen($marker);
+               $stop    = strpos($content, $marker, $startAM);
+
+               if ($stop===false) {
+                       return $content;
+               }
+
+               $stopAM  = $stop + strlen($marker);
+               $before  = substr($content, 0, $start);
+               $after   = substr($content, $stopAM);
+               $between = substr($content, $startAM, $stop-$startAM);
+
+               if ($recursive) {
+                       $after = t3lib_parsehtml::substituteSubpart(
+                               $after,
+                               $marker,
+                               $subpartContent,
+                               $recursive,
+                               $keepMarker
+                       );
+               }
+
+               if ($keepMarker) {
+                       $matches = array();
+                       if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
+                               $before  .= $marker.$matches[1];
+                               $between  = $matches[2];
+                               $after    = $matches[3] . $marker . $after;
+                       } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
+                               $before  .= $marker;
+                               $between  = $matches[1];
+                               $after    = $matches[2] . $marker . $after;
+                       } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches) === 1) {
+                               $before  .= $marker . $matches[1];
+                               $between  = $matches[2];
+                               $after    = $marker . $after;
+                       } else  {
+                               $before .= $marker;
+                               $after   = $marker . $after;
                        }
-                       $before = substr($content, 0, $start);
-                               // code after
-                       $after = substr($content, $stop);
-                       $reg=Array();
-                       ereg('^[^<]*-->',$after,$reg);
-                       $stop+=strlen($reg[0]);
-                       if ($keepMarker)        {
-                               $reg_k=Array();
-                               if ($reg[0])    ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k);
-                               $sLen = strlen($reg_k[0]?$reg_k[0]:$marker);
-                               $after_marker = substr($content, $stop-$sLen,$sLen);
+
+               } else {
+                       $matches = array();
+                       if (preg_match('/^(.*)\<\!\-\-[^\>]*$/s', $before, $matches) === 1) {
+                               $before = $matches[1];
                        }
-                       $after = substr($content, $stop);
 
+                       if (is_array($subpartContent)) {
+                               $matches = array();
+                               if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
+                                       $between = $matches[2];
+                               } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) {
+                                       $between = $matches[1];
+                               } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1) {
+                                       $between = $matches[2];
+                               }
+                       }
 
-                               // replace?
-                       if (is_array($subpartContent))  {
-                               $substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1];
-                       } else {
-                               $substContent=$subpartContent;
+                       $matches = array(); // resetting $matches
+                       if (preg_match('/^[^\<]*\-\-\>(.*)$/s', $after, $matches) === 1) {
+                               $after = $matches[1];
                        }
+               }
 
-                       if ($recursive && strpos($after, $marker))      {
-                               return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent);
-                       } else {
-                               return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after;
+               if (is_array($subpartContent)) {
+                       $between = $subpartContent[0] . $between . $subpartContent[1];
+               } else  {
+                       $between = $subpartContent;
+               }
+
+               return $before . $between . $after;
+       }
+
+       /**
+        * Substitues multiple subparts at once
+        *
+        * @param       string          The content stream, typically HTML template content.
+        * @param       array           The array of key/value pairs being subpart/content values used in the substitution. For each element in this array the function will substitute a subpart in the content stream with the content.
+        * @return      string          The processed HTML content string.
+        */
+       public static function substituteSubpartArray($content, array $subpartsContent) {
+               foreach ($subpartsContent as $subpartMarker => $subpartContent) {
+                       $content = t3lib_parsehtml::substituteSubpart(
+                               $content,
+                               $subpartMarker,
+                               $subpartContent
+                       );
+               }
+
+               return $content;
+       }
+
+
+       /**
+        * Substitutes a marker string in the input content
+        * (by a simple str_replace())
+        *
+        * @param       string          The content stream, typically HTML template content.
+        * @param       string          The marker string, typically on the form "###[the marker string]###"
+        * @param       mixed           The content to insert instead of the marker string found.
+        * @return      string          The processed HTML content string.
+        * @see substituteSubpart()
+        */
+       public static function substituteMarker($content, $marker, $markContent) {
+               return str_replace($marker, $markContent, $content);
+       }
+
+
+       /**
+        * Traverses the input $markContentArray array and for each key the marker
+        * by the same name (possibly wrapped and in upper case) will be
+        * substituted with the keys value in the array. This is very useful if you
+        * have a data-record to substitute in some content. In particular when you
+        * use the $wrap and $uppercase values to pre-process the markers. Eg. a
+        * key name like "myfield" could effectively be represented by the marker
+        * "###MYFIELD###" if the wrap value was "###|###" and the $uppercase
+        * boolean true.
+        *
+        * @param       string          The content stream, typically HTML template content.
+        * @param       array           The array of key/value pairs being marker/content values used in the substitution. For each element in this array the function will substitute a marker in the content stream with the content.
+        * @param       string          A wrap value - [part 1] | [part 2] - for the markers before substitution
+        * @param       boolean         If set, all marker string substitution is done with upper-case markers.
+        * @param       boolean         If set, all unused marker are deleted.
+        * @return      string          The processed output stream
+        * @see substituteMarker(), substituteMarkerInObject(), TEMPLATE()
+        */
+       public static function substituteMarkerArray($content, $markContentArray, $wrap = '', $uppercase = 0, $deleteUnused = 0) {
+               if (is_array($markContentArray)) {
+                       $wrapArr = t3lib_div::trimExplode('|', $wrap);
+
+                       foreach ($markContentArray as $marker => $markContent) {
+                               if ($uppercase) {
+                                               // use strtr instead of strtoupper to avoid locale problems with Turkish
+                                       $marker = strtr(
+                                               $marker,
+                                               'abcdefghijklmnopqrstuvwxyz',
+                                               'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                                       );
+                               }
+
+                               if (count($wrapArr) > 0) {
+                                       $marker = $wrapArr[0] . $marker . $wrapArr[1];
+                               }
+
+                               $content = str_replace($marker, $markContent, $content);
+                       }
+
+                       if ($deleteUnused) {
+                               if (empty($wrap)) {
+                                       $wrapArr = array('###', '###');
+                               }
+
+                               $content = preg_replace('/'.preg_quote($wrapArr[0]).'([A-Z0-9_-|]*)'.preg_quote($wrapArr[1]).'/is', '', $content);
                        }
-               } else {
-                       return $content;
                }
+
+               return $content;
        }
-       // *******************************************'
-       // COPY FROM class.tslib_content.php: / END
-       // *******************************************'
 
 
 
@@ -222,9 +352,9 @@ class t3lib_parsehtml {
         */
        function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) {
                $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
-               $regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
+               $regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si';
 
-               $parts = spliti($regexStr,$content);
+               $parts = preg_split($regexStr, $content);
 
                $newParts=array();
                $pointer=strlen($parts[0]);
@@ -319,8 +449,8 @@ class t3lib_parsehtml {
         */
        function splitTags($tag,$content)       {
                $tags = t3lib_div::trimExplode(',',$tag,1);
-               $regexStr = '<('.implode('|',$tags).')(>|\/>|[[:space:]][^>]*>)';
-               $parts = spliti($regexStr,$content);
+               $regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si';
+               $parts = preg_split($regexStr, $content);
 
                $pointer = strlen($parts[0]);
                $newParts = array();
@@ -352,9 +482,8 @@ class t3lib_parsehtml {
         * @see splitIntoBlock(), splitTags()
         */
        function getAllParts($parts,$tag_parts=1,$include_tag=1)        {
-               reset($parts);
                $newParts=array();
-               while(list($k,$v)=each($parts)) {
+               foreach ($parts as $k => $v)    {
                        if (($k+($tag_parts?0:1))%2)    {
                                if (!$include_tag)      $v=$this->removeFirstAndLastTag($v);
                                $newParts[]=$v;
@@ -365,21 +494,18 @@ class t3lib_parsehtml {
 
        /**
         * Removes the first and last tag in the string
-        * Anything before and after the first and last tags respectively is also removed
+        * Anything before the first and after the last tags respectively is also removed
         *
         * @param       string          String to process
         * @return      string
         */
        function removeFirstAndLastTag($str)    {
-                       // First:
-               $endLen = strcspn($str,'>')+1;
-               $str = substr($str,$endLen);
-                       // Last:
-               $str = strrev($str);
-               $endLen = strcspn($str,'<')+1;
-               $str = substr($str,$endLen);
+                       // End of first tag:
+               $start = strpos($str,'>');
+                       // Begin of last tag:
+               $end = strrpos($str,'<');
                        // return
-               return strrev($str);
+               return substr($str, $start+1, $end-$start-1);
        }
 
        /**
@@ -391,9 +517,8 @@ class t3lib_parsehtml {
         */
        function getFirstTag($str)      {
                        // First:
-               $endLen = strcspn($str,'>')+1;
-               $str = substr($str,0,$endLen);
-               return $str;
+               $endLen = strpos($str,'>')+1;
+               return substr($str,0,$endLen);
        }
 
        /**
@@ -405,10 +530,14 @@ class t3lib_parsehtml {
         * @see getFirstTag()
         */
        function getFirstTagName($str,$preserveCase=FALSE)      {
-               list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2);
-               if (!$preserveCase)     $tag = strtoupper($tag);
-
-               return trim($tag);
+               $matches = array();
+               if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1)        {
+                       if (!$preserveCase)     {
+                               return strtoupper($matches[1]);
+                       }
+                       return $matches[1];
+               }
+               return '';
        }
 
        /**
@@ -422,11 +551,11 @@ class t3lib_parsehtml {
        function get_tag_attributes($tag,$deHSC=0)      {
                list($components,$metaC) = $this->split_tag_attributes($tag);
                $name = '';      // attribute name is stored here
-               $valuemode = '';
+               $valuemode = false;
                $attributes = array();
                $attributesMeta = array();
                if (is_array($components))      {
-                       while (list($key,$val) = each ($components))    {
+                       foreach ($components as $key => $val)   {
                                if ($val != '=')        {       // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
                                        if ($valuemode) {
                                                if ($name)      {
@@ -435,19 +564,18 @@ class t3lib_parsehtml {
                                                        $name = '';
                                                }
                                        } else {
-                                               if ($namekey = ereg_replace('[^a-zA-Z0-9_:-]','',$val)) {
+                                               if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val))     {
                                                        $name = strtolower($namekey);
                                                        $attributesMeta[$name]=array();
                                                        $attributesMeta[$name]['origTag']=$namekey;
                                                        $attributes[$name] = '';
                                                }
                                        }
-                                       $valuemode = '';
+                                       $valuemode = false;
                                } else {
-                                       $valuemode = 'on';
+                                       $valuemode = true;
                                }
                        }
-                       if (is_array($attributes))      reset($attributes);
                        return array($attributes,$attributesMeta);
                }
        }
@@ -462,32 +590,27 @@ class t3lib_parsehtml {
         * @see t3lib_div::split_tag_attributes()
         */
        function split_tag_attributes($tag)     {
-               $tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag)));
-                       // Removes any > in the end of the string
-               $tag_tmp = trim(eregi_replace ('>$','',$tag_tmp));
+               $matches = array();
+               if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1)  {
+                       return array(array(), array());
+               }
+               $tag_tmp = $matches[2];
 
                $metaValue = array();
                $value = array();
-               while (strcmp($tag_tmp,''))     {       // Compared with empty string instead , 030102
-                       $firstChar=substr($tag_tmp,0,1);
-                       if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'")) {
-                               $reg=explode($firstChar,$tag_tmp,3);
-                               $value[]=$reg[1];
-                               $metaValue[]=$firstChar;
-                               $tag_tmp=trim($reg[2]);
-                       } elseif (!strcmp($firstChar,'=')) {
-                               $value[] = '=';
-                               $metaValue[]='';
-                               $tag_tmp = trim(substr($tag_tmp,1));            // Removes = chars.
-                       } else {
-                                       // There are '' around the value. We look for the next ' ' or '>'
-                               $reg = split('[[:space:]=]',$tag_tmp,2);
-                               $value[] = trim($reg[0]);
-                               $metaValue[]='';
-                               $tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]);
+               $matches = array();
+               if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0)     {
+                       foreach ($matches[1] as $part)  {
+                               $firstChar = substr($part, 0, 1);
+                               if ($firstChar=='"' || $firstChar=="'") {
+                                       $metaValue[] = $firstChar;
+                                       $value[] = substr($part, 1, -1);
+                               } else  {
+                                       $metaValue[] = '';
+                                       $value[] = $part;
+                               }
                        }
                }
-               if (is_array($value))   reset($value);
                return array($value,$metaValue);
        }
 
@@ -516,8 +639,8 @@ class t3lib_parsehtml {
                        // Block tags, must have endings...
                $blockTags = explode(',',$blockTags);
                foreach($blockTags as $tagName) {
-                       $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
-                       $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
+                       $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
+                       $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
                        $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
                        if ($countBegin)        $analyzedOutput['counts'][$tagName]=$countBegin;
                        if ($countBegin-$countEnd)      {
@@ -532,8 +655,8 @@ class t3lib_parsehtml {
                        // Solo tags, must NOT have endings...
                $soloTags = explode(',',$soloTags);
                foreach($soloTags as $tagName)  {
-                       $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
-                       $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
+                       $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
+                       $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
                        $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
                        if ($countBegin)        $analyzedOutput['counts'][$tagName]=$countBegin;
                        if ($countEnd)  {
@@ -572,8 +695,8 @@ class t3lib_parsehtml {
         *              'allowedAttribs' =>   '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed.
         *              'fixAttrib' => Array(
         *                      '[attribute name]' => Array (
-        *                              'set' => Force the attribute value to this value.
-        *                              'unset' => Boolean: If set, the attribute is unset.
+        *                              'set' => Force the attribute value to this value.
+        *                              'unset' => Boolean: If set, the attribute is unset.
         *                              'default' =>    If no attribute exists by this name, this value is set as default value (if this value is not blank)
         *                              'always' =>     Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists
         *                              'trim,intval,lower,upper' =>    All booleans. If any of these keys are set, the value is passed through the respective PHP-functions.
@@ -606,15 +729,38 @@ class t3lib_parsehtml {
                $c = 1;
                $tagRegister = array();
                $tagStack = array();
+               $inComment = false; $skipTag = false;
                while(list(,$tok)=each($tokArr))        {
+                       if ($inComment) {
+                               if (($eocPos = strpos($tok, '-->')) === false) {
+                                       // End of comment is not found in the token. Go futher until end of comment is found in other tokens.
+                                       $newContent[$c++] = '<' . $tok;
+                                       continue;
+                               }
+                               // Comment ends in the middle of the token: add comment and proceed with rest of the token
+                               $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
+                               $tok = substr($tok, $eocPos + 3);
+                               $inComment = false; $skipTag = true;
+                       }
+                       elseif (substr($tok, 0, 3) == '!--') {
+                               if (($eocPos = strpos($tok, '-->')) === false) {
+                                       // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
+                                       $newContent[$c++] = '<' . $tok;
+                                       $inComment = true;
+                                       continue;
+                               }
+                               // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
+                               $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
+                               $tok = substr($tok, $eocPos + 3);
+                               $skipTag = true;
+                       }
                        $firstChar = substr($tok,0,1);
-#                      if (strcmp(trim($firstChar),''))        {               // It is a tag...
-                       if (ereg('[[:alnum:]\/]',$firstChar))   {               // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
-                               $tagEnd = strcspn($tok,'>');
-                               if (strlen($tok)!=$tagEnd)      {       // If there is and end-bracket...
+                       if (!$skipTag && preg_match('/[[:alnum:]\/]/',$firstChar)==1)   {               // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
+                               $tagEnd = strpos($tok,'>');
+                               if ($tagEnd)    {       // If there is and end-bracket...       tagEnd can't be 0 as the first character can't be a >
                                        $endTag = $firstChar=='/' ? 1 : 0;
                                        $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
-                                       $tagParts = split('[[:space:]]',$tagContent,2);
+                                       $tagParts = preg_split('/\s+/s',$tagContent,2);
                                        $tagName = strtolower($tagParts[0]);
                                        if (isset($tags[$tagName]))     {
                                                if (is_array($tags[$tagName]))  {       // If there is processing to do for the tag:
@@ -633,8 +779,11 @@ class t3lib_parsehtml {
                                                                                $tagAttrib = $this->get_tag_attributes($tagParts[1]);
                                                                                $tagParts[1]='';
                                                                                $newTagAttrib = array();
-                                                                               $tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
-                                                                               while(list(,$allowTag)=each($tList))    {
+                                                                               if (!($tList = $tags[$tagName]['_allowedAttribs']))     {
+                                                                                               // Just explode attribts for tag once
+                                                                                       $tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
+                                                                               }
+                                                                               foreach ($tList as $allowTag)   {
                                                                                        if (isset($tagAttrib[0][$allowTag]))    $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
                                                                                }
                                                                                $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
@@ -770,15 +919,14 @@ class t3lib_parsehtml {
                                        $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);      // There were not end-bracket, so no tag...
                                }
                        } else {
-                               $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);      // It was not a tag anyways
+                               $newContent[$c++]=$this->processContent(($skipTag ? '' : '<') . $tok, $hSC, $addConfig);        // It was not a tag anyways
+                               $skipTag = false;
                        }
                }
 
                        // Unsetting tags:
-               reset($tagRegister);
-               while(list($tag,$positions)=each($tagRegister)) {
-                       reset($positions);
-                       while(list(,$pKey)=each($positions))    {
+               foreach ($tagRegister as $tag => $positions)    {
+                       foreach ($positions as $pKey)   {
                                unset($newContent[$pKey]);
                        }
                }
@@ -813,12 +961,13 @@ class t3lib_parsehtml {
         * @param       string          Prefix string
         * @param       string          HTML content
         * @param       array           Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase
+        * @param       string          Suffix string (put after the resource).
         * @return      string          Processed HTML content
         */
-       function prefixResourcePath($main_prefix,$content,$alternatives=array())        {
+       function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')     {
 
-               $parts = $this->splitTags('td,table,body,img,input,form,link,script,a',$content);
-               foreach($parts as $k => $v)     {
+               $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
+               foreach ($parts as $k => $v)    {
                        if ($k%2)       {
                                $params = $this->get_tag_attributes($v,1);
                                $tagEnd = substr($v,-2)=='/>' ? ' />' : '>';    // Detect tag-ending so that it is re-applied correctly.
@@ -832,7 +981,7 @@ class t3lib_parsehtml {
                                        case 'table':
                                                $src = $params[0]['background'];
                                                if ($src)       {
-                                                       $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background']);
+                                                       $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
                                                        $somethingDone=1;
                                                }
                                        break;
@@ -840,9 +989,10 @@ class t3lib_parsehtml {
                                        case 'img':
                                        case 'input':
                                        case 'script':
+                                       case 'embed':
                                                $src = $params[0]['src'];
                                                if ($src)       {
-                                                       $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src']);
+                                                       $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
                                                        $somethingDone=1;
                                                }
                                        break;
@@ -850,7 +1000,7 @@ class t3lib_parsehtml {
                                        case 'a':
                                                $src = $params[0]['href'];
                                                if ($src)       {
-                                                       $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href']);
+                                                       $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
                                                        $somethingDone=1;
                                                }
                                        break;
@@ -858,16 +1008,15 @@ class t3lib_parsehtml {
                                        case 'form':
                                                $src = $params[0]['action'];
                                                if ($src)       {
-                                                       $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action']);
+                                                       $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
                                                        $somethingDone=1;
                                                }
                                        break;
                                }
                                if ($somethingDone)     {
-                                       $tagParts = split('[[:space:]]',$v,2);
+                                       $tagParts = preg_split('/\s+/s',$v,2);
                                        $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
-                                       $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).
-                                                                       $tagEnd;
+                                       $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd;
                                }
                        }
                }
@@ -879,7 +1028,7 @@ class t3lib_parsehtml {
                        $parts = $this->splitIntoBlock('style',$content);
                        foreach($parts as $k => $v)     {
                                if ($k%2)       {
-                                       $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2\3',$parts[$k]);
+                                       $parts[$k] = preg_replace('/(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))/i','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
                                }
                        }
                        $content = implode('',$parts);
@@ -893,13 +1042,14 @@ class t3lib_parsehtml {
         *
         * @param       string          Prefix string
         * @param       string          Relative path/URL
+        * @param       string          Suffix string
         * @return      string          Output path, prefixed if no scheme in input string
         * @access private
         */
-       function prefixRelPath($prefix,$srcVal) {
+       function prefixRelPath($prefix,$srcVal,$suffix='')      {
                $pU = parse_url($srcVal);
                if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/')       { // If not an absolute URL.
-                       $srcVal = $prefix.$srcVal;
+                       $srcVal = $prefix.$srcVal.$suffix;
                }
                return $srcVal;
        }
@@ -916,8 +1066,7 @@ class t3lib_parsehtml {
         */
        function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)     {
                $fontSplit = $this->splitIntoBlock('font',$value);      // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
-               reset($fontSplit);
-               while(list($k,$v)=each($fontSplit))     {
+               foreach ($fontSplit as $k => $v)        {
                        if ($k%2)       {       // font:
                                $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
                                $newAttribs=array();
@@ -948,9 +1097,7 @@ class t3lib_parsehtml {
        function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') {
 
                foreach($tags as $from => $to)  {
-                       $value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value);
-                       $value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value);
-                       $value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value);
+                       $value = preg_replace('/'.preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value);
                }
                return $value;
        }
@@ -974,7 +1121,7 @@ class t3lib_parsehtml {
                                if (strlen($tok)!=$tagEnd)      {
                                        $endTag = $firstChar=='/' ? 1 : 0;
                                        $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
-                                       $tagParts = split('[[:space:]]',$tagContent,2);
+                                       $tagParts = preg_split('/\s+/s',$tagContent,2);
                                        $tagName = strtolower($tagParts[0]);
                                        if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))      {
                                                $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
@@ -999,13 +1146,13 @@ class t3lib_parsehtml {
                $tags=t3lib_div::trimExplode(',',$tagList,1);
                $forthArr=array();
                $backArr=array();
-               while(list(,$theTag)=each($tags))       {
+               foreach ($tags as $theTag)      {
                        $forthArr[$theTag]=md5($theTag);
                        $backArr[md5($theTag)]=$theTag;
                }
-                       $value = $this->mapTags($value,$forthArr,'<','_');
-                       $value=strip_tags($value);
-                       $value = $this->mapTags($value,$backArr,'_','<');
+               $value = $this->mapTags($value,$forthArr,'<','_');
+               $value=strip_tags($value);
+               $value = $this->mapTags($value,$backArr,'_','<');
                return $value;
        }
 
@@ -1013,23 +1160,26 @@ class t3lib_parsehtml {
         * Internal function for case shifting of a string or whole array
         *
         * @param       mixed           Input string/array
-        * @param       boolean         If $str is a string AND this boolean is true, the string is returned in uppercase
+        * @param       boolean         If $str is a string AND this boolean(caseSensitive) is false, the string is returned in uppercase
         * @param       string          Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array.
         * @return      string          Output string, processed
         * @access private
         */
        function caseShift($str,$flag,$cacheKey='')     {
+               $cacheKey .= $flag?1:0;
                if (is_array($str))     {
                        if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))    {
                                reset($str);
-                               while(list($k)=each($str))      {
-                                       $str[$k] = strtoupper($str[$k]);
+                               foreach ($str as $k => $v)      {
+                                       if (!$flag)     {
+                                               $str[$k] = strtoupper($v);
+                                       }
                                }
                                if ($cacheKey)  $this->caseShift_cache[$cacheKey]=$str;
                        } else {
                                $str = $this->caseShift_cache[$cacheKey];
                        }
-               } elseif (!$flag)       $str = strtoupper($str);
+               } elseif (!$flag)       { $str = strtoupper($str); }
                return $str;
        }
 
@@ -1044,8 +1194,7 @@ class t3lib_parsehtml {
         */
        function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)     {
                $accu=array();
-               reset($tagAttrib);
-               while(list($k,$v)=each($tagAttrib))     {
+               foreach ($tagAttrib as $k =>$v) {
                        if ($xhtmlClean)        {
                                $attr=strtolower($k);
                                if (strcmp($v,'') || isset($meta[$k]['dashType']))      {
@@ -1087,7 +1236,7 @@ class t3lib_parsehtml {
        function indentLines($content, $number=1, $indentChar="\t")     {
                $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
                $lines = explode(chr(10),str_replace(chr(13),'',$content));
-               while(list($k,$v) = each($lines))       {
+               foreach ($lines as $k => $v)    {
                        $lines[$k] = $preTab.$v;
                }
                return implode(chr(10), $lines);
@@ -1117,7 +1266,7 @@ class t3lib_parsehtml {
                        }
 
                        reset($TSconfig['tags.']);
-                       while(list($key,$tagC)=each($TSconfig['tags.']))        {
+                       foreach ($TSconfig['tags.'] as $key => $tagC)   {
                                if (is_array($tagC) && $key==strtolower($key))  {
                                        $key=substr($key,0,-1);
                                        if (!is_array($keepTags[$key])) $keepTags[$key]=array();
@@ -1255,9 +1404,9 @@ class t3lib_parsehtml {
                if ($conf['xhtml'])     {
                        if ($endTag)    {       // Endtags are just set lowercase right away
                                $value = strtolower($value);
-                       } elseif (substr($value,0,2)!='<!') {   // ... and comments are ignored.
+                       } elseif (substr($value,0,4)!='<!--') { // ... and comments are ignored.
                                $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));    // Finding inner value with out < >
-                               list($tagName,$tagP)=split('[[:space:]]',$inValue,2);   // Separate attributes and tagname
+                               list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2);   // Separate attributes and tagname
                                $tagName = strtolower($tagName);
 
                                        // Process attributes
@@ -1268,11 +1417,11 @@ class t3lib_parsehtml {
                                reset($tagAttrib[0]);
                                while(list($attrib_name,$attrib_value)=each($tagAttrib[0]))     {
                                                // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
-                                       $outA[]=$attrib_name.'="'.htmlspecialchars($this->bidir_htmlspecialchars($attrib_value,-1)).'"';
+                                       $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
                                }
                                $newTag='<'.trim($tagName.' '.implode(' ',$outA));
                                        // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
-                               if (t3lib_div::inList('img,br,hr,meta,link,base,area,input',$tagName) || substr($value,-2)=='/>')       {
+                               if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>')     {
                                        $newTag.=' />';
                                } else {
                                        $newTag.='>';
@@ -1304,4 +1453,5 @@ class t3lib_parsehtml {
 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php'])        {
        include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
 }
-?>
+
+?>
\ No newline at end of file