Added <span title=""> to path in backend modules in Web main module; you can now...
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2004 Kasper Skaarhoj (kasper@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Contains class with functions for parsing HTML code.
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 July/2003 by Kasper Skaarhoj
32 *
33 * @author Kasper Skaarhoj <kasper@typo3.com>
34 */
35 /**
36 * [CLASS/FUNCTION INDEX of SCRIPT]
37 *
38 *
39 *
40 * 106: class t3lib_parsehtml
41 * 123: function getSubpart($content, $marker)
42 * 151: function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)
43 *
44 * SECTION: Parsing HTML code
45 * 223: function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)
46 * 284: function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)
47 * 319: function splitTags($tag,$content)
48 * 353: function getAllParts($parts,$tag_parts=1,$include_tag=1)
49 * 372: function removeFirstAndLastTag($str)
50 * 391: function getFirstTag($str)
51 * 406: function getFirstTagName($str,$preserveCase=FALSE)
52 * 421: function get_tag_attributes($tag,$deHSC=0)
53 * 463: function split_tag_attributes($tag)
54 * 506: function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')
55 *
56 * SECTION: Clean HTML code
57 * 597: function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())
58 * 791: function bidir_htmlspecialchars($value,$dir)
59 * 813: function prefixResourcePath($main_prefix,$content,$alternatives=array())
60 * 881: function prefixRelPath($prefix,$srcVal)
61 * 899: function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)
62 * 930: function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')
63 * 947: function unprotectTags($content,$tagList='')
64 * 980: function stripTagsExcept($value,$tagList)
65 * 1003: function caseShift($str,$flag,$cacheKey='')
66 * 1027: function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)
67 * 1056: function get_tag_attributes_classic($tag,$deHSC=0)
68 * 1069: function indentLines($content, $number=1, $indentChar="\t")
69 * 1086: function HTMLparserConfig($TSconfig,$keepTags=array())
70 * 1210: function XHTML_clean($content)
71 * 1233: function processTag($value,$conf,$endTag,$protected=0)
72 * 1280: function processContent($value,$dir,$conf)
73 *
74 * TOTAL FUNCTIONS: 28
75 * (This index is automatically created/updated by the extension "extdeveval")
76 *
77 */
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 /**
99 * Functions for parsing HTML.
100 * You are encouraged to use this class in your own applications
101 *
102 * @author Kasper Skaarhoj <kasper@typo3.com>
103 * @package TYPO3
104 * @subpackage t3lib
105 */
106 class t3lib_parsehtml {
107 var $caseShift_cache=array();
108
109
110 // *******************************************'
111 // COPY FROM class.tslib_content.php: / BEGIN
112 // substituteSubpart
113 // Cleaned locally 2/2003 !!!! (so different from tslib_content version)
114 // *******************************************'
115
116 /**
117 * Returns the first subpart encapsulated in the marker, $marker (possibly present in $content as a HTML comment)
118 *
119 * @param string Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
120 * @param string Marker string, eg. "###CONTENT_PART###"
121 * @return string
122 */
123 function getSubpart($content, $marker) {
124 if ($marker && strstr($content,$marker)) {
125 $start = strpos($content, $marker)+strlen($marker);
126 $stop = @strpos($content, $marker, $start+1);
127 $sub = substr($content, $start, $stop-$start);
128
129 $reg=Array();
130 ereg('^[^<]*-->',$sub,$reg);
131 $start+=strlen($reg[0]);
132
133 $reg=Array();
134 ereg('<!--[^>]*$',$sub,$reg);
135 $stop-=strlen($reg[0]);
136
137 return substr($content, $start, $stop-$start);
138 }
139 }
140
141 /**
142 * Substitutes a subpart in $content with the content of $subpartContent.
143 *
144 * @param string Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
145 * @param string Marker string, eg. "###CONTENT_PART###"
146 * @param array If $subpartContent happens to be an array, it's [0] and [1] elements are wrapped around the content of the subpart (fetched by getSubpart())
147 * @param boolean If $recursive is set, the function calls itself with the content set to the remaining part of the content after the second marker. This means that proceding subparts are ALSO substituted!
148 * @param boolean If set, the marker around the subpart is not removed, but kept in the output
149 * @return string Processed input content
150 */
151 function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) {
152 $start = strpos($content, $marker);
153 $stop = @strpos($content, $marker, $start+1)+strlen($marker);
154 if ($start && $stop>$start) {
155 // code before
156 $before = substr($content, 0, $start);
157 $reg=Array();
158 ereg('<!--[^>]*$',$before,$reg);
159 $start-=strlen($reg[0]);
160 if ($keepMarker) {
161 $reg_k=Array();
162 if ($reg[0]) ereg('^[^>]*-->',substr($content,$start),$reg_k);
163 $before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker));
164 }
165 $before = substr($content, 0, $start);
166 // code after
167 $after = substr($content, $stop);
168 $reg=Array();
169 ereg('^[^<]*-->',$after,$reg);
170 $stop+=strlen($reg[0]);
171 if ($keepMarker) {
172 $reg_k=Array();
173 if ($reg[0]) ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k);
174 $sLen = strlen($reg_k[0]?$reg_k[0]:$marker);
175 $after_marker = substr($content, $stop-$sLen,$sLen);
176 }
177 $after = substr($content, $stop);
178
179
180 // replace?
181 if (is_array($subpartContent)) {
182 $substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1];
183 } else {
184 $substContent=$subpartContent;
185 }
186
187 if ($recursive && strpos($after, $marker)) {
188 return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent);
189 } else {
190 return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after;
191 }
192 } else {
193 return $content;
194 }
195 }
196 // *******************************************'
197 // COPY FROM class.tslib_content.php: / END
198 // *******************************************'
199
200
201
202
203
204
205
206 /************************************
207 *
208 * Parsing HTML code
209 *
210 ************************************/
211
212 /**
213 * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
214 * Even numbers in the array are outside the blocks, Odd numbers are block-content.
215 * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
216 *
217 * @param string List of tags, comma separated.
218 * @param string HTML-content
219 * @param boolean If set, excessive end tags are ignored - you should probably set this in most cases.
220 * @return array Even numbers in the array are outside the blocks, Odd numbers are block-content.
221 * @see splitTags(), getAllParts(), removeFirstAndLastTag()
222 */
223 function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) {
224 $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
225 $regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
226
227 $parts = spliti($regexStr,$content);
228
229 $newParts=array();
230 $pointer=strlen($parts[0]);
231 $buffer=$parts[0];
232 $nested=0;
233 reset($parts);
234 next($parts);
235 while(list($k,$v)=each($parts)) {
236 $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
237 $tagLen = strcspn(substr($content,$pointer),'>')+1;
238
239 if (!$isEndTag) { // We meet a start-tag:
240 if (!$nested) { // Ground level:
241 $newParts[]=$buffer; // previous buffer stored
242 $buffer='';
243 }
244 $nested++; // We are inside now!
245 $mbuffer=substr($content,$pointer,strlen($v)+$tagLen); // New buffer set and pointer increased
246 $pointer+=strlen($mbuffer);
247 $buffer.=$mbuffer;
248 } else { // If we meet an endtag:
249 $nested--; // decrease nested-level
250 $eliminated=0;
251 if ($eliminateExtraEndTags && $nested<0) {
252 $nested=0;
253 $eliminated=1;
254 } else {
255 $buffer.=substr($content,$pointer,$tagLen); // In any case, add the endtag to current buffer and increase pointer
256 }
257 $pointer+=$tagLen;
258 if (!$nested && !$eliminated) { // if we're back on ground level, (and not by eliminating tags...
259 $newParts[]=$buffer;
260 $buffer='';
261 }
262 $mbuffer=substr($content,$pointer,strlen($v)); // New buffer set and pointer increased
263 $pointer+=strlen($mbuffer);
264 $buffer.=$mbuffer;
265 }
266
267 }
268 $newParts[]=$buffer;
269 return $newParts;
270 }
271
272 /**
273 * Splitting content into blocks *recursively* and processing tags/content with call back functions.
274 *
275 * @param string Tag list, see splitIntoBlock()
276 * @param string Content, see splitIntoBlock()
277 * @param object Object where call back methods are.
278 * @param string Name of call back method for content; "function callBackContent($str,$level)"
279 * @param string Name of call back method for tags; "function callBackTags($tags,$level)"
280 * @param integer Indent level
281 * @return string Processed content
282 * @see splitIntoBlock()
283 */
284 function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0) {
285 $parts = $this->splitIntoBlock($tag,$content,TRUE);
286 foreach($parts as $k => $v) {
287 if ($k%2) {
288 $firstTagName = $this->getFirstTagName($v, TRUE);
289 $tagsArray = array();
290 $tagsArray['tag_start'] = $this->getFirstTag($v);
291 $tagsArray['tag_end'] = '</'.$firstTagName.'>';
292 $tagsArray['tag_name'] = strtolower($firstTagName);
293 $tagsArray['add_level'] = 1;
294
295 if ($callBackTags) $tagsArray = $procObj->$callBackTags($tagsArray,$level);
296
297 $parts[$k] =
298 $tagsArray['tag_start'].
299 $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']).
300 $tagsArray['tag_end'];
301 } else {
302 if ($callBackContent) $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
303 }
304 }
305
306 return implode('',$parts);
307 }
308
309 /**
310 * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
311 * Even numbers in the array are outside the blocks, Odd numbers are block-content.
312 * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
313 *
314 * @param string List of tags
315 * @param string HTML-content
316 * @return array Even numbers in the array are outside the blocks, Odd numbers are block-content.
317 * @see splitIntoBlock(), getAllParts(), removeFirstAndLastTag()
318 */
319 function splitTags($tag,$content) {
320 $tags=t3lib_div::trimExplode(',',$tag,1);
321 $regexStr = '<('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
322 $parts = spliti($regexStr,$content);
323
324 $pointer=strlen($parts[0]);
325 $newParts=array();
326 $newParts[]=$parts[0];
327 reset($parts);
328 next($parts);
329 while(list($k,$v)=each($parts)) {
330 $tagLen = strcspn(substr($content,$pointer),'>')+1;
331
332 // Set tag:
333 $tag = substr($content,$pointer,$tagLen); // New buffer set and pointer increased
334 $newParts[]=$tag;
335 $pointer+=strlen($tag);
336
337 // Set content:
338 $newParts[]=$v;
339 $pointer+=strlen($v);
340 }
341 return $newParts;
342 }
343
344 /**
345 * Returns an array with either tag or non-tag content of the result from ->splitIntoBlock()/->splitTags()
346 *
347 * @param array Parts generated by ->splitIntoBlock() or >splitTags()
348 * @param boolean Whether to return the tag-parts (default,true) or what was outside the tags.
349 * @param boolean Whether to include the tags in the tag-parts (most useful for input made by ->splitIntoBlock())
350 * @return array Tag-parts/Non-tag-parts depending on input argument settings
351 * @see splitIntoBlock(), splitTags()
352 */
353 function getAllParts($parts,$tag_parts=1,$include_tag=1) {
354 reset($parts);
355 $newParts=array();
356 while(list($k,$v)=each($parts)) {
357 if (($k+($tag_parts?0:1))%2) {
358 if (!$include_tag) $v=$this->removeFirstAndLastTag($v);
359 $newParts[]=$v;
360 }
361 }
362 return $newParts;
363 }
364
365 /**
366 * Removes the first and last tag in the string
367 * Anything before and after the first and last tags respectively is also removed
368 *
369 * @param string String to process
370 * @return string
371 */
372 function removeFirstAndLastTag($str) {
373 // First:
374 $endLen = strcspn($str,'>')+1;
375 $str = substr($str,$endLen);
376 // Last:
377 $str = strrev($str);
378 $endLen = strcspn($str,'<')+1;
379 $str = substr($str,$endLen);
380 // return
381 return strrev($str);
382 }
383
384 /**
385 * Returns the first tag in $str
386 * Actually everything from the begining of the $str is returned, so you better make sure the tag is the first thing...
387 *
388 * @param string HTML string with tags
389 * @return string
390 */
391 function getFirstTag($str) {
392 // First:
393 $endLen = strcspn($str,'>')+1;
394 $str = substr($str,0,$endLen);
395 return $str;
396 }
397
398 /**
399 * Returns the NAME of the first tag in $str
400 *
401 * @param string HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do)
402 * @param boolean If set, then the tag is NOT converted to uppercase by case is preserved.
403 * @return string Tag name in upper case
404 * @see getFirstTag()
405 */
406 function getFirstTagName($str,$preserveCase=FALSE) {
407 list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2);
408 if (!$preserveCase) $tag = strtoupper($tag);
409
410 return trim($tag);
411 }
412
413 /**
414 * Returns an array with all attributes as keys. Attributes are only lowercase a-z
415 * If a attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset()
416 *
417 * @param string Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>')
418 * @param boolean If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set!
419 * @return array array(Tag attributes,Attribute meta-data)
420 */
421 function get_tag_attributes($tag,$deHSC=0) {
422 list($components,$metaC) = $this->split_tag_attributes($tag);
423 $name = ''; // attribute name is stored here
424 $valuemode = '';
425 $attributes=array();
426 $attributesMeta=array();
427 if (is_array($components)) {
428 while (list($key,$val) = each ($components)) {
429 if ($val != '=') { // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
430 if ($valuemode) {
431 if ($name) {
432 $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
433 $attributesMeta[$name]['dashType']=$metaC[$key];
434 $name = '';
435 }
436 } else {
437 if ($namekey = ereg_replace('[^a-zA-Z0-9_-]','',$val)) {
438 $name = strtolower($namekey);
439 $attributesMeta[$name]=array();
440 $attributesMeta[$name]['origTag']=$namekey;
441 $attributes[$name] = '';
442 }
443 }
444 $valuemode = '';
445 } else {
446 $valuemode = 'on';
447 }
448 }
449 if (is_array($attributes)) reset($attributes);
450 return array($attributes,$attributesMeta);
451 }
452 }
453
454 /**
455 * Returns an array with the 'components' from an attribute list. The result is normally analyzed by get_tag_attributes
456 * Removes tag-name if found
457 *
458 * @param string The tag or attributes
459 * @return array
460 * @access private
461 * @see t3lib_div::split_tag_attributes()
462 */
463 function split_tag_attributes($tag) {
464 $tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag)));
465 // Removes any > in the end of the string
466 $tag_tmp = trim(eregi_replace ('>$','',$tag_tmp));
467
468 $metaValue = array();
469 $value = array();
470 while (strcmp($tag_tmp,'')) { // Compared with empty string instead , 030102
471 $firstChar=substr($tag_tmp,0,1);
472 if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'")) {
473 $reg=explode($firstChar,$tag_tmp,3);
474 $value[]=$reg[1];
475 $metaValue[]=$firstChar;
476 $tag_tmp=trim($reg[2]);
477 } elseif (!strcmp($firstChar,'=')) {
478 $value[] = '=';
479 $metaValue[]='';
480 $tag_tmp = trim(substr($tag_tmp,1)); // Removes = chars.
481 } else {
482 // There are '' around the value. We look for the next ' ' or '>'
483 $reg = split('[[:space:]=]',$tag_tmp,2);
484 $value[] = trim($reg[0]);
485 $metaValue[]='';
486 $tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]);
487 }
488 }
489 if (is_array($value)) reset($value);
490 return array($value,$metaValue);
491 }
492
493 /**
494 * Checks whether block/solo tags are found in the correct amounts in HTML content
495 * Block tags are tags which are required to have an equal amount of start and end tags, eg. "<table>...</table>"
496 * Solo tags are tags which are required to have ONLY start tags (possibly with an XHTML ending like ".../>")
497 * NOTICE: Correct XHTML might actually fail since "<br></br>" is allowed as well as "<br/>". However only the LATTER is accepted by this function (with "br" in the "solo-tag" list), the first example will result in a warning.
498 * NOTICE: Correct XHTML might actually fail since "<p/>" is allowed as well as "<p></p>". However only the LATTER is accepted by this function (with "p" in the "block-tag" list), the first example will result in an ERROR!
499 * NOTICE: Correct HTML version "something" allows eg. <p> and <li> to be NON-ended (implicitly ended by other tags). However this is NOT accepted by this function (with "p" and "li" in the block-tag list) and it will result in an ERROR!
500 *
501 * @param string HTML content to analyze
502 * @param string Tag names for block tags (eg. table or div or p) in lowercase, commalist (eg. "table,div,p")
503 * @param string Tag names for solo tags (eg. img, br or input) in lowercase, commalist ("img,br,input")
504 * @return array Analyse data.
505 */
506 function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area') {
507 $content = strtolower($content);
508 $analyzedOutput=array();
509 $analyzedOutput['counts']=array(); // Counts appearances of start-tags
510 $analyzedOutput['errors']=array(); // Lists ERRORS
511 $analyzedOutput['warnings']=array(); // Lists warnings.
512 $analyzedOutput['blocks']=array(); // Lists stats for block-tags
513 $analyzedOutput['solo']=array(); // Lists stats for solo-tags
514
515 // Block tags, must have endings...
516 $blockTags = explode(',',$blockTags);
517 foreach($blockTags as $tagName) {
518 $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
519 $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
520 $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
521 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin;
522 if ($countBegin-$countEnd) {
523 if ($countBegin-$countEnd > 0) {
524 $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
525 } else {
526 $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
527 }
528 }
529 }
530
531 // Solo tags, must NOT have endings...
532 $soloTags = explode(',',$soloTags);
533 foreach($soloTags as $tagName) {
534 $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
535 $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
536 $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
537 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin;
538 if ($countEnd) {
539 $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
540 }
541 }
542
543 return $analyzedOutput;
544 }
545
546
547
548
549
550
551
552
553
554
555
556
557 /*********************************
558 *
559 * Clean HTML code
560 *
561 *********************************/
562
563 /**
564 * Function that can clean up HTML content according to configuration given in the $tags array.
565 *
566 * Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this: $tags = array_flip(explode(',','b,a,i,u'))
567 * If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options:
568 *
569 * $tags[$tagname] = Array(
570 * 'overrideAttribs' => '' If set, this string is preset as the attributes of the tag
571 * 'allowedAttribs' => '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed.
572 * 'fixAttrib' => Array(
573 * '[attribute name]' => Array (
574 * 'default' => If no attribute exists by this name, this value is set as default value (if this value is not blank)
575 * 'always' => Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists
576 * 'trim,intval,lower,upper' => All booleans. If any of these keys are set, the value is passed through the respective PHP-functions.
577 * 'range' => Array ('[low limit]','[high limit, optional]') Setting integer range.
578 * 'list' => Array ('[value1/default]','[value2]','[value3]') Attribute must be in this list. If not, the value is set to the first element.
579 * 'removeIfFalse' => Boolean/'blank'. If set, then the attribute is removed if it is 'false'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed)
580 * 'removeIfEquals' => [value] If the attribute value matches the value set here, then it is removed.
581 * 'casesensitiveComp' => 1 If set, then the removeIfEquals and list comparisons will be case sensitive. Otherwise not.
582 * )
583 * ),
584 * 'protect' => '', Boolean. If set, the tag <> is converted to &lt; and &gt;
585 * 'remap' => '', String. If set, the tagname is remapped to this tagname
586 * 'rmTagIfNoAttrib' => '', Boolean. If set, then the tag is removed if no attributes happend to be there.
587 * 'nesting' => '', Boolean/'global'. If set true, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>'
588 * )
589 *
590 * @param string $content; is the HTML-content being processed. This is also the result being returned.
591 * @param array $tags; is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure.
592 * @param string $keepAll; boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to &lt; and &gt;
593 * @param integer $hSC; Values -1,0,1,2: Set to zero= disabled, set to 1 then the content BETWEEN tags is htmlspecialchar()'ed, set to -1 its the opposite and set to 2 the content will be HSC'ed BUT with preservation for real entities (eg. "&amp;" or "&#234;")
594 * @param array Configuration array send along as $conf to the internal functions ->processContent() and ->processTag()
595 * @return string Processed HTML content
596 */
597 function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array()) {
598 $newContent = array();
599 $tokArr = explode('<',$content);
600 $newContent[]=$this->processContent(current($tokArr),$hSC,$addConfig);
601 next($tokArr);
602
603 $c=1;
604 $tagRegister=array();
605 $tagStack=array();
606 while(list(,$tok)=each($tokArr)) {
607 $firstChar = substr($tok,0,1);
608 # if (strcmp(trim($firstChar),'')) { // It is a tag...
609 if (ereg('[[:alnum:]\/]',$firstChar)) { // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004)
610 $tagEnd = strcspn($tok,'>');
611 if (strlen($tok)!=$tagEnd) { // If there is and end-bracket...
612 $endTag = $firstChar=='/' ? 1 : 0;
613 $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
614 $tagParts = split('[[:space:]]',$tagContent,2);
615 $tagName = strtolower($tagParts[0]);
616 if (isset($tags[$tagName])) {
617 if (is_array($tags[$tagName])) { // If there is processing to do for the tag:
618
619 if (!$endTag) { // If NOT an endtag, do attribute processing (added dec. 2003)
620 // Override attributes
621 if (strcmp($tags[$tagName]['overrideAttribs'],'')) {
622 $tagParts[1]=$tags[$tagName]['overrideAttribs'];
623 }
624
625 // Allowed tags
626 if (strcmp($tags[$tagName]['allowedAttribs'],'')) {
627 if (!strcmp($tags[$tagName]['allowedAttribs'],'0')) { // No attribs allowed
628 $tagParts[1]='';
629 } elseif (trim($tagParts[1])) {
630 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
631 $tagParts[1]='';
632 $newTagAttrib = array();
633 $tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
634 while(list(,$allowTag)=each($tList)) {
635 if (isset($tagAttrib[0][$allowTag])) $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
636 }
637 $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
638 }
639 }
640
641 // Fixed attrib values
642 if (is_array($tags[$tagName]['fixAttrib'])) {
643 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
644 $tagParts[1]='';
645 reset($tags[$tagName]['fixAttrib']);
646 while(list($attr,$params)=each($tags[$tagName]['fixAttrib'])) {
647 if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr])) $tagAttrib[0][$attr]=$params['default'];
648 if ($params['always'] || isset($tagAttrib[0][$attr])) {
649 if ($params['trim']) {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
650 if ($params['intval']) {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
651 if ($params['lower']) {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
652 if ($params['upper']) {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
653 if ($params['range']) {
654 if (isset($params['range'][1])) {
655 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
656 } else {
657 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
658 }
659 }
660 if (is_array($params['list'])) {
661 if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName))) $tagAttrib[0][$attr]=$params['list'][0];
662 }
663 if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],''))) {
664 unset($tagAttrib[0][$attr]);
665 }
666 if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp']))) {
667 unset($tagAttrib[0][$attr]);
668 }
669 if ($params['prefixLocalAnchors']) {
670 if (substr($tagAttrib[0][$attr],0,1)=='#') {
671 $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
672 $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
673 if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL'))) {
674 $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
675 }
676 }
677 }
678 if ($params['prefixRelPathWith']) {
679 $urlParts = parse_url($tagAttrib[0][$attr]);
680 if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/') { // If it is NOT an absolute URL (by http: or starting "/")
681 $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
682 }
683 }
684 if ($params['userFunc']) {
685 $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
686 }
687 }
688 }
689 $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
690 }
691 } else { // If endTag, remove any possible attributes:
692 $tagParts[1]='';
693 }
694
695 // Protecting the tag by converting < and > to &lt; and &gt; ??
696 if ($tags[$tagName]['protect']) {
697 $lt = '&lt;'; $gt = '&gt;';
698 } else {
699 $lt = '<'; $gt = '>';
700 }
701 // Remapping tag name?
702 if ($tags[$tagName]['remap']) $tagParts[0] = $tags[$tagName]['remap'];
703
704 // rmTagIfNoAttrib
705 if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) {
706 $setTag=1;
707
708 if ($tags[$tagName]['nesting']) {
709 if (!is_array($tagRegister[$tagName])) $tagRegister[$tagName]=array();
710
711 if ($endTag) {
712 /* if ($tags[$tagName]['nesting']=='global') {
713 $lastEl = end($tagStack);
714 $correctTag = !strcmp($tagName,$lastEl);
715 } else $correctTag=1;
716 */
717 $correctTag=1;
718 if ($tags[$tagName]['nesting']=='global') {
719 $lastEl = end($tagStack);
720 if (strcmp($tagName,$lastEl)) {
721 if (in_array($tagName,$tagStack)) {
722 while(count($tagStack) && strcmp($tagName,$lastEl)) {
723 $elPos = end($tagRegister[$lastEl]);
724 unset($newContent[$elPos]);
725
726 array_pop($tagRegister[$lastEl]);
727 array_pop($tagStack);
728 $lastEl = end($tagStack);
729 }
730 } else {
731 $correctTag=0; // In this case the
732 }
733 }
734 }
735 if (!count($tagRegister[$tagName]) || !$correctTag) {
736 $setTag=0;
737 } else {
738 array_pop($tagRegister[$tagName]);
739 if ($tags[$tagName]['nesting']=='global') {array_pop($tagStack);}
740 }
741 } else {
742 array_push($tagRegister[$tagName],$c);
743 if ($tags[$tagName]['nesting']=='global') {array_push($tagStack,$tagName);}
744 }
745 }
746
747 if ($setTag) {
748 // Setting the tag
749 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='&lt;');
750 }
751 }
752 } else {
753 $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
754 }
755 } elseif ($keepAll) { // This is if the tag was not defined in the array for processing:
756 if (!strcmp($keepAll,'protect')) {
757 $lt = '&lt;'; $gt = '&gt;';
758 } else {
759 $lt = '<'; $gt = '>';
760 }
761 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='&lt;');
762 }
763 $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
764 } else {
765 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig); // There were not end-bracket, so no tag...
766 }
767 } else {
768 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig); // It was not a tag anyways
769 }
770 }
771
772 // Unsetting tags:
773 reset($tagRegister);
774 while(list($tag,$positions)=each($tagRegister)) {
775 reset($positions);
776 while(list(,$pKey)=each($positions)) {
777 unset($newContent[$pKey]);
778 }
779 }
780
781 return implode('',$newContent);
782 }
783
784 /**
785 * Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1)
786 *
787 * @param string Input value
788 * @param integer Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1)
789 * @return string Output value
790 */
791 function bidir_htmlspecialchars($value,$dir) {
792 if ($dir==1) {
793 $value = htmlspecialchars($value);
794 } elseif ($dir==2) {
795 $value = t3lib_div::deHSCentities(htmlspecialchars($value));
796 } elseif ($dir==-1) {
797 $value = str_replace('&gt;','>',$value);
798 $value = str_replace('&lt;','<',$value);
799 $value = str_replace('&quot;','"',$value);
800 $value = str_replace('&amp;','&',$value);
801 }
802 return $value;
803 }
804
805 /**
806 * Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives
807 *
808 * @param string Prefix string
809 * @param string HTML content
810 * @param array Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase
811 * @return string Processed HTML content
812 */
813 function prefixResourcePath($main_prefix,$content,$alternatives=array()) {
814 $parts = $this->splitTags('td,table,body,img,input,form,link,script,a',$content);
815 reset($parts);
816 while(list($k,$v)=each($parts)) {
817 if ($k%2) {
818 $params = $this->get_tag_attributes($v,1);
819 $tagEnd = substr($v,-2)=='/>' ? ' />' : '>'; // Detect tag-ending so that it is re-applied correctly.
820 $firstTagName = $this->getFirstTagName($v); // The 'name' of the first tag
821 $somethingDone=0;
822 $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
823 switch(strtolower($firstTagName)) {
824 // background - attribute:
825 case 'td':
826 case 'body':
827 case 'table':
828 $src = $params[0]['background'];
829 if ($src) {
830 $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background']);
831 $somethingDone=1;
832 }
833 break;
834 // src attribute
835 case 'img':
836 case 'input':
837 case 'script':
838 $src = $params[0]['src'];
839 if ($src) {
840 $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src']);
841 $somethingDone=1;
842 }
843 break;
844 case 'link':
845 case 'a':
846 $src = $params[0]['href'];
847 if ($src) {
848 $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href']);
849 $somethingDone=1;
850 }
851 break;
852 // action attribute
853 case 'form':
854 $src = $params[0]['action'];
855 if ($src) {
856 $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action']);
857 $somethingDone=1;
858 }
859 break;
860 }
861 if ($somethingDone) {
862 $tagParts = split('[[:space:]]',$v,2);
863 $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
864 $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).
865 $tagEnd;
866 }
867 }
868 }
869
870 return implode('',$parts);
871 }
872
873 /**
874 * Internal sub-function for ->prefixResourcePath()
875 *
876 * @param string Prefix string
877 * @param string Relative path/URL
878 * @return string Output path, prefixed if no scheme in input string
879 * @access private
880 */
881 function prefixRelPath($prefix,$srcVal) {
882 $pU = parse_url($srcVal);
883 if (!$pU['scheme']) { // If not an absolute URL.
884 $srcVal = $prefix.$srcVal;
885 }
886 return $srcVal;
887 }
888
889 /**
890 * Cleans up the input $value for fonttags.
891 * If keepFace,-Size and -Color is set then font-tags with an allowed property is kept. Else deleted.
892 *
893 * @param string HTML content with font-tags inside to clean up.
894 * @param boolean If set, keep "face" attribute
895 * @param boolean If set, keep "size" attribute
896 * @param boolean If set, keep "color" attribute
897 * @return string Processed HTML content
898 */
899 function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0) {
900 $fontSplit = $this->splitIntoBlock('font',$value); // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
901 reset($fontSplit);
902 while(list($k,$v)=each($fontSplit)) {
903 if ($k%2) { // font:
904 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
905 $newAttribs=array();
906 if ($keepFace && $attribArray['face']) $newAttribs[]='face="'.$attribArray['face'].'"';
907 if ($keepSize && $attribArray['size']) $newAttribs[]='size="'.$attribArray['size'].'"';
908 if ($keepColor && $attribArray['color']) $newAttribs[]='color="'.$attribArray['color'].'"';
909
910 $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
911 if (count($newAttribs)) {
912 $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
913 } else {
914 $fontSplit[$k]=$innerContent;
915 }
916 }
917 }
918 return implode('',$fontSplit);
919 }
920
921 /**
922 * This is used to map certain tag-names into other names.
923 *
924 * @param string HTML content
925 * @param array Array with tag key=>value pairs where key is from-tag and value is to-tag
926 * @param string Alternative less-than char to search for (search regex string)
927 * @param string Alternative less-than char to replace with (replace regex string)
928 * @return string Processed HTML content
929 */
930 function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') {
931
932 foreach($tags as $from => $to) {
933 $value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value);
934 $value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value);
935 $value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value);
936 }
937 return $value;
938 }
939
940 /**
941 * This converts htmlspecialchar()'ed tags (from $tagList) back to real tags. Eg. '&lt;strong&gt' would be converted back to '<strong>' if found in $tagList
942 *
943 * @param string HTML content
944 * @param string Tag list, separated by comma. Lowercase!
945 * @return string Processed HTML content
946 */
947 function unprotectTags($content,$tagList='') {
948 $tagsArray = t3lib_div::trimExplode(',',$tagList,1);
949 $contentParts = explode('&lt;',$content);
950 next($contentParts); // bypass the first
951 while(list($k,$tok)=each($contentParts)) {
952 $firstChar = substr($tok,0,1);
953 if (strcmp(trim($firstChar),'')) {
954 $subparts = explode('&gt;',$tok,2);
955 $tagEnd = strlen($subparts[0]);
956 if (strlen($tok)!=$tagEnd) {
957 $endTag = $firstChar=='/' ? 1 : 0;
958 $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
959 $tagParts = split('[[:space:]]',$tagContent,2);
960 $tagName = strtolower($tagParts[0]);
961 if (!strcmp($tagList,'') || in_array($tagName,$tagsArray)) {
962 $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
963 } else $contentParts[$k] = '&lt;'.$tok;
964 } else $contentParts[$k] = '&lt;'.$tok;
965 } else $contentParts[$k] = '&lt;'.$tok;
966 }
967
968 return implode('',$contentParts);
969 }
970
971 /**
972 * Strips tags except the tags in the list, $tagList
973 * OBSOLETE - use PHP function strip_tags()
974 *
975 * @param string Value to process
976 * @param string List of tags
977 * @return string Output value
978 * @ignore
979 */
980 function stripTagsExcept($value,$tagList) {
981 $tags=t3lib_div::trimExplode(',',$tagList,1);
982 $forthArr=array();
983 $backArr=array();
984 while(list(,$theTag)=each($tags)) {
985 $forthArr[$theTag]=md5($theTag);
986 $backArr[md5($theTag)]=$theTag;
987 }
988 $value = $this->mapTags($value,$forthArr,'<','_');
989 $value=strip_tags($value);
990 $value = $this->mapTags($value,$backArr,'_','<');
991 return $value;
992 }
993
994 /**
995 * Internal function for case shifting of a string or whole array
996 *
997 * @param mixed Input string/array
998 * @param boolean If $str is a string AND this boolean is true, the string is returned in uppercase
999 * @param string Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array.
1000 * @return string Output string, processed
1001 * @access private
1002 */
1003 function caseShift($str,$flag,$cacheKey='') {
1004 if (is_array($str)) {
1005 if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey])) {
1006 reset($str);
1007 while(list($k)=each($str)) {
1008 $str[$k] = strtoupper($str[$k]);
1009 }
1010 if ($cacheKey) $this->caseShift_cache[$cacheKey]=$str;
1011 } else {
1012 $str = $this->caseShift_cache[$cacheKey];
1013 }
1014 } elseif (!$flag) $str = strtoupper($str);
1015 return $str;
1016 }
1017
1018 /**
1019 * Compiling an array with tag attributes into a string
1020 *
1021 * @param array Tag attributes
1022 * @param array Meta information about these attributes (like if they were quoted)
1023 * @param boolean If set, then the attribute names will be set in lower case, value quotes in double-quotes and the value will be htmlspecialchar()'ed
1024 * @return string Imploded attributes, eg: 'attribute="value" attrib2="value2"'
1025 * @access private
1026 */
1027 function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0) {
1028 $accu=array();
1029 reset($tagAttrib);
1030 while(list($k,$v)=each($tagAttrib)) {
1031 if ($xhtmlClean) {
1032 $attr=strtolower($k);
1033 if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
1034 $attr.='="'.htmlspecialchars($v).'"';
1035 }
1036 } else {
1037 $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
1038 if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
1039 $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
1040 $attr.='='.$dash.$v.$dash;
1041 }
1042 }
1043 $accu[]=$attr;
1044 }
1045 return implode(' ',$accu);
1046 }
1047
1048 /**
1049 * Get tag attributes, the classic version (which had some limitations?)
1050 *
1051 * @param string The tag
1052 * @param boolean De-htmlspecialchar flag.
1053 * @return array
1054 * @access private
1055 */
1056 function get_tag_attributes_classic($tag,$deHSC=0) {
1057 $attr=$this->get_tag_attributes($tag,$deHSC);
1058 return is_array($attr[0])?$attr[0]:array();
1059 }
1060
1061 /**
1062 * Indents input content with $number instances of $indentChar
1063 *
1064 * @param string Content string, multiple lines.
1065 * @param integer Number of indents
1066 * @param string Indent character/string
1067 * @return string Indented code (typ. HTML)
1068 */
1069 function indentLines($content, $number=1, $indentChar="\t") {
1070 $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
1071 $lines = explode(chr(10),str_replace(chr(13),'',$content));
1072 while(list($k,$v) = each($lines)) {
1073 $lines[$k] = $preTab.$v;
1074 }
1075 return implode(chr(10), $lines);
1076 }
1077
1078 /**
1079 * Converts TSconfig into an array for the HTMLcleaner function.
1080 *
1081 * @param array TSconfig for HTMLcleaner
1082 * @param array Array of tags to keep (?)
1083 * @return array
1084 * @access private
1085 */
1086 function HTMLparserConfig($TSconfig,$keepTags=array()) {
1087 // Allow tags (base list, merged with incoming array)
1088 $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
1089 $keepTags = array_merge($alTags,$keepTags);
1090
1091 // Set config properties.
1092 if (is_array($TSconfig['tags.'])) {
1093 reset($TSconfig['tags.']);
1094 while(list($key,$tagC)=each($TSconfig['tags.'])) {
1095 if (!is_array($tagC) && $key==strtolower($key)) {
1096 if (!strcmp($tagC,'0')) unset($keepTags[$key]);
1097 if (!strcmp($tagC,'1') && !isset($keepTags[$key])) $keepTags[$key]=1;
1098 }
1099 }
1100
1101 reset($TSconfig['tags.']);
1102 while(list($key,$tagC)=each($TSconfig['tags.'])) {
1103 if (is_array($tagC) && $key==strtolower($key)) {
1104 $key=substr($key,0,-1);
1105 if (!is_array($keepTags[$key])) $keepTags[$key]=array();
1106 if (is_array($tagC['fixAttrib.'])) {
1107 reset($tagC['fixAttrib.']);
1108 while(list($atName,$atConfig)=each($tagC['fixAttrib.'])) {
1109 if (is_array($atConfig)) {
1110 $atName=substr($atName,0,-1);
1111 if (!is_array($keepTags[$key]['fixAttrib'][$atName])) {
1112 $keepTags[$key]['fixAttrib'][$atName]=array();
1113 }
1114 $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
1115 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],'')) $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
1116 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],'')) $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
1117 }
1118 }
1119 }
1120 unset($tagC['fixAttrib.']);
1121 unset($tagC['fixAttrib']);
1122 $keepTags[$key] = array_merge($keepTags[$key],$tagC); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
1123 }
1124 }
1125 }
1126 // localNesting
1127 if ($TSconfig['localNesting']) {
1128 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
1129 while(list(,$tn)=each($lN)) {
1130 if (isset($keepTags[$tn])) {
1131 $keepTags[$tn]['nesting']=1;
1132 }
1133 }
1134 }
1135 if ($TSconfig['globalNesting']) {
1136 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
1137 while(list(,$tn)=each($lN)) {
1138 if (isset($keepTags[$tn])) {
1139 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
1140 $keepTags[$tn]['nesting']='global';
1141 }
1142 }
1143 }
1144 if ($TSconfig['rmTagIfNoAttrib']) {
1145 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
1146 while(list(,$tn)=each($lN)) {
1147 if (isset($keepTags[$tn])) {
1148 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
1149 $keepTags[$tn]['rmTagIfNoAttrib']=1;
1150 }
1151 }
1152 }
1153 if ($TSconfig['noAttrib']) {
1154 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
1155 while(list(,$tn)=each($lN)) {
1156 if (isset($keepTags[$tn])) {
1157 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
1158 $keepTags[$tn]['allowedAttribs']=0;
1159 }
1160 }
1161 }
1162 if ($TSconfig['removeTags']) {
1163 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
1164 while(list(,$tn)=each($lN)) {
1165 $keepTags[$tn]=array();
1166 $keepTags[$tn]['allowedAttribs']=0;
1167 $keepTags[$tn]['rmTagIfNoAttrib']=1;
1168 }
1169 }
1170
1171 // Create additional configuration:
1172 $addConfig=array();
1173 if ($TSconfig['xhtml_cleaning']) {
1174 $addConfig['xhtml']=1;
1175 }
1176
1177 return array(
1178 $keepTags,
1179 ''.$TSconfig['keepNonMatchedTags'],
1180 intval($TSconfig['htmlSpecialChars']),
1181 $addConfig
1182 );
1183 }
1184
1185 /**
1186 * Tries to convert the content to be XHTML compliant and other stuff like that.
1187 * STILL EXPERIMENTAL. See comments below.
1188 *
1189 * What it does NOT do (yet) according to XHTML specs.:
1190 * - Wellformedness: Nesting is NOT checked
1191 * - name/id attribute issue is not observed at this point.
1192 * - Certain nesting of elements not allowed. Most interesting, <PRE> cannot contain img, big,small,sub,sup ...
1193 * - Wrapping scripts and style element contents in CDATA - or alternatively they should have entitites converted.
1194 * - Setting charsets may put some special requirements on both XML declaration/ meta-http-equiv. (C.9)
1195 * - UTF-8 encoding is in fact expected by XML!!
1196 * - stylesheet element and attribute names are NOT converted to lowercase
1197 * - ampersands (and entities in general I think) MUST be converted to an entity reference! (&amps;). This may mean further conversion of non-tag content before output to page. May be related to the charset issue as a whole.
1198 * - Minimized values not allowed: Must do this: selected="selected"
1199 *
1200 * What it does at this point:
1201 * - All tags (frame,base,meta,link + img,br,hr,area,input) is ended with "/>" - others?
1202 * - Lowercase for elements and attributes
1203 * - All attributes in quotes
1204 * - Add "alt" attribute to img-tags if it's not there already.
1205 *
1206 * @param string Content to clean up
1207 * @return string Cleaned up content returned.
1208 * @access private
1209 */
1210 function XHTML_clean($content) {
1211 $content = $this->HTMLcleaner(
1212 $content,
1213 array(), // No tags treated specially
1214 1, // Keep ALL tags.
1215 0, // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
1216 array('xhtml' => 1)
1217 );
1218
1219 return $content;
1220 }
1221
1222 /**
1223 * Processing all tags themselves
1224 * (Some additions by Sacha Vorbeck)
1225 *
1226 * @param string Tag to process
1227 * @param array Configuration array passing instructions for processing. If count()==0, function will return value unprocessed. See source code for details
1228 * @param boolean Is endtag, then set this.
1229 * @param boolean If set, just return value straight away
1230 * @return string Processed value.
1231 * @access private
1232 */
1233 function processTag($value,$conf,$endTag,$protected=0) {
1234 // Return immediately if protected or no parameters
1235 if ($protected || !count($conf)) return $value;
1236
1237 // OK then, begin processing for XHTML output:
1238 // STILL VERY EXPERIMENTAL!!
1239 if ($conf['xhtml']) {
1240 if ($endTag) { // Endtags are just set lowercase right away
1241 $value = strtolower($value);
1242 } elseif (substr($value,0,2)!='<!') { // ... and comments are ignored.
1243 $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1)); // Finding inner value with out < >
1244 list($tagName,$tagP)=split('[[:space:]]',$inValue,2); // Separate attributes and tagname
1245 $tagName = strtolower($tagName);
1246
1247 // Process attributes
1248 $tagAttrib = $this->get_tag_attributes($tagP);
1249 if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt'])) $tagAttrib[0]['alt']=''; // Set alt attribute for all images (not XHTML though...)
1250 if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type'])) $tagAttrib[0]['type']='text/javascript'; // Set type attribute for all script-tags
1251 $outA=array();
1252 reset($tagAttrib[0]);
1253 while(list($attrib_name,$attrib_value)=each($tagAttrib[0])) {
1254 // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
1255 $outA[]=$attrib_name.'="'.htmlspecialchars($this->bidir_htmlspecialchars($attrib_value,-1)).'"';
1256 }
1257 $newTag='<'.trim($tagName.' '.implode(' ',$outA));
1258 // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
1259 if (t3lib_div::inList('img,br,hr,meta,link,base,area,input',$tagName) || substr($value,-2)=='/>') {
1260 $newTag.=' />';
1261 } else {
1262 $newTag.='>';
1263 }
1264 $value = $newTag;
1265 }
1266 }
1267
1268 return $value;
1269 }
1270
1271 /**
1272 * Processing content between tags for HTML_cleaner
1273 *
1274 * @param string The value
1275 * @param integer Direction, either -1 or +1. 0 (zero) means no change to input value.
1276 * @param mixed Not used, ignore.
1277 * @return string The processed value.
1278 * @access private
1279 */
1280 function processContent($value,$dir,$conf) {
1281 if ($dir!=0) $value = $this->bidir_htmlspecialchars($value,$dir);
1282 return $value;
1283 }
1284 }
1285
1286
1287
1288 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']) {
1289 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
1290 }
1291 ?>