class.t3lib_parsehtml.php 51.8 KB
Newer Older
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1
2
3
<?php
/***************************************************************
*  Copyright notice
4
*
5
*  (c) 1999-2006 Kasper Skaarhoj (kasperYYYY@typo3.com)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
*  All rights reserved
*
*  This script is part of the TYPO3 project. The TYPO3 project is
*  free software; you can redistribute it and/or modify
*  it under the terms of the GNU General Public License as published by
*  the Free Software Foundation; either version 2 of the License, or
*  (at your option) any later version.
*
*  The GNU General Public License can be found at
*  http://www.gnu.org/copyleft/gpl.html.
*  A copy is found in the textfile GPL.txt and important notices to the license
*  from the author is found in LICENSE.txt distributed with these scripts.
*
*
*  This script is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU General Public License for more details.
*
*  This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
/**
 * Contains class with functions for parsing HTML code.
 *
30
31
32
 * $Id$
 * Revised for TYPO3 3.6 July/2003 by Kasper Skaarhoj
 *
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
33
 * @author	Kasper Skaarhoj <kasperYYYY@typo3.com>
Kasper Skårhøj's avatar
Kasper Skårhøj committed
34
35
36
37
38
39
 */
/**
 * [CLASS/FUNCTION INDEX of SCRIPT]
 *
 *
 *
40
41
 *  106: class t3lib_parsehtml
 *  123:     function getSubpart($content, $marker)
42
 *  156:     function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
43
 *
44
 *              SECTION: Parsing HTML code
45
46
47
48
49
50
51
52
53
54
 *  247:     function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)
 *  308:     function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)
 *  344:     function splitTags($tag,$content)
 *  378:     function getAllParts($parts,$tag_parts=1,$include_tag=1)
 *  396:     function removeFirstAndLastTag($str)
 *  412:     function getFirstTag($str)
 *  426:     function getFirstTagName($str,$preserveCase=FALSE)
 *  445:     function get_tag_attributes($tag,$deHSC=0)
 *  486:     function split_tag_attributes($tag)
 *  524:     function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')
55
56
 *
 *              SECTION: Clean HTML code
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
 *  617:     function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())
 *  814:     function bidir_htmlspecialchars($value,$dir)
 *  837:     function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')
 *  919:     function prefixRelPath($prefix,$srcVal,$suffix='')
 *  937:     function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)
 *  967:     function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')
 *  982:     function unprotectTags($content,$tagList='')
 * 1015:     function stripTagsExcept($value,$tagList)
 * 1038:     function caseShift($str,$flag,$cacheKey='')
 * 1065:     function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)
 * 1093:     function get_tag_attributes_classic($tag,$deHSC=0)
 * 1106:     function indentLines($content, $number=1, $indentChar="\t")
 * 1123:     function HTMLparserConfig($TSconfig,$keepTags=array())
 * 1247:     function XHTML_clean($content)
 * 1269:     function processTag($value,$conf,$endTag,$protected=0)
 * 1315:     function processContent($value,$dir,$conf)
73
74
 *
 * TOTAL FUNCTIONS: 28
Kasper Skårhøj's avatar
Kasper Skårhøj committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
 * (This index is automatically created/updated by the extension "extdeveval")
 *
 */




















/**
Kasper Skårhøj's avatar
Kasper Skårhøj committed
99
 * Functions for parsing HTML.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
100
 * You are encouraged to use this class in your own applications
Kasper Skårhøj's avatar
Kasper Skårhøj committed
101
 *
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
102
 * @author	Kasper Skaarhoj <kasperYYYY@typo3.com>
103
104
 * @package TYPO3
 * @subpackage t3lib
Kasper Skårhøj's avatar
Kasper Skårhøj committed
105
 */
106
class t3lib_parsehtml	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
107
108
109
110
111
	var $caseShift_cache=array();


	// *******************************************'
	// COPY FROM class.tslib_content.php: / BEGIN
112
	// substituteSubpart
Kasper Skårhøj's avatar
Kasper Skårhøj committed
113
114
115
116
117
	// Cleaned locally 2/2003 !!!! (so different from tslib_content version)
	// *******************************************'

	/**
	 * Returns the first subpart encapsulated in the marker, $marker (possibly present in $content as a HTML comment)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
118
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
119
120
	 * @param	string		Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
	 * @param	string		Marker string, eg. "###CONTENT_PART###"
Kasper Skårhøj's avatar
Kasper Skårhøj committed
121
	 * @return	string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
122
123
	 */
	function getSubpart($content, $marker)	{
124
125
126
127
128
129
130
		$start = strpos($content, $marker);
		if ($start===false)	{ return ''; }
		$start += strlen($marker);
		$stop = strpos($content, $marker, $start);
			// Q: What shall get returned if no stop marker is given /*everything till the end*/ or nothing
		if ($stop===false)	{ return /*substr($content, $start)*/ ''; }
		$content = substr($content, $start, $stop-$start);
131
		$matches = array();
132
133
134
		if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1)	{
			return $matches[2];
		}
135
		$matches = array();
136
137
		if (preg_match('/(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1)	{
			return $matches[1];
Kasper Skårhøj's avatar
Kasper Skårhøj committed
138
		}
139
		$matches = array();
140
141
142
143
		if (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $content, $matches)===1)	{
			return $matches[2];
		}
		return $content;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
144
145
146
147
	}

	/**
	 * Substitutes a subpart in $content with the content of $subpartContent.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
148
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
149
150
151
152
153
154
155
156
157
	 * @param	string		Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
	 * @param	string		Marker string, eg. "###CONTENT_PART###"
	 * @param	array		If $subpartContent happens to be an array, it's [0] and [1] elements are wrapped around the content of the subpart (fetched by getSubpart())
	 * @param	boolean		If $recursive is set, the function calls itself with the content set to the remaining part of the content after the second marker. This means that proceding subparts are ALSO substituted!
	 * @param	boolean		If set, the marker around the subpart is not removed, but kept in the output
	 * @return	string		Processed input content
	 */
	function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)	{
		$start = strpos($content, $marker);
158
159
160
161
162
163
164
165
166
167
168
169
170
171
		if ($start===false)	{ return $content; }
		$startAM = $start+strlen($marker);
		$stop = strpos($content, $marker, $startAM);
		if ($stop===false)	{ return $content; }
		$stopAM = $stop+strlen($marker);
		$before = substr($content, 0, $start);
		$after = substr($content, $stopAM);
		$between = substr($content, $startAM, $stop-$startAM);

		if ($recursive)	{
			$after = t3lib_parsehtml::substituteSubpart($after, $marker, $subpartContent, $recursive, $keepMarker);
		}

		if ($keepMarker)	{
172
			$matches = array();
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
			if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)	{
				$before .= $marker.$matches[1];
				$between = $matches[2];
				$after = $matches[3].$marker.$after;
			} elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)	{
				$before .= $marker;
				$between = $matches[1];
				$after = $matches[2].$marker.$after;
			} elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1)	{
				$before .= $marker.$matches[1];
				$between = $matches[2];
				$after = $marker.$after;
			} else	{
				$before .= $marker;
				$after = $marker.$after;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
188
			}
189
		} else	{
190
			$matches = array();
191
192
			if (preg_match('/^(.*)\<\!\-\-[^\>]*$/s', $before, $matches)===1)	{
				$before = $matches[1];
Kasper Skårhøj's avatar
Kasper Skårhøj committed
193
194
			}
			if (is_array($subpartContent))	{
195
				$matches = array();
196
197
198
199
200
201
202
				if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)	{
					$between = $matches[2];
				} elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)	{
					$between = $matches[1];
				} elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1)	{
					$between = $matches[2];
				}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
203
			}
204
			$matches = array();
205
206
			if (preg_match('/^[^\<]*\-\-\>(.*)$/s', $after, $matches)===1)	{
				$after = $matches[1];
Kasper Skårhøj's avatar
Kasper Skårhøj committed
207
208
			}
		}
209
210
211
212
213
214
215
216

		if (is_array($subpartContent))	{
			$between = $subpartContent[0].$between.$subpartContent[1];
		} else	{
			$between = $subpartContent;
		}

		return $before.$between.$after;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
217
	}
218
219


Kasper Skårhøj's avatar
Kasper Skårhøj committed
220
221
222
223
224
225
226
227
228
	// *******************************************'
	// COPY FROM class.tslib_content.php: / END
	// *******************************************'






229
230
231
232
233
234
235

	/************************************
	 *
	 * Parsing HTML code
	 *
	 ************************************/

Kasper Skårhøj's avatar
Kasper Skårhøj committed
236
237
	/**
	 * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
Kasper Skårhøj's avatar
Kasper Skårhøj committed
238
	 * Even numbers in the array are outside the blocks, Odd numbers are block-content.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
239
	 * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
240
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
241
242
243
244
245
246
247
248
	 * @param	string		List of tags, comma separated.
	 * @param	string		HTML-content
	 * @param	boolean		If set, excessive end tags are ignored - you should probably set this in most cases.
	 * @return	array		Even numbers in the array are outside the blocks, Odd numbers are block-content.
	 * @see splitTags(), getAllParts(), removeFirstAndLastTag()
	 */
	function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)	{
		$tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
249
		$regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si';
Kasper Skårhøj's avatar
Kasper Skårhøj committed
250

251
		$parts = preg_split($regexStr, $content);
252

Kasper Skårhøj's avatar
Kasper Skårhøj committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
		$newParts=array();
		$pointer=strlen($parts[0]);
		$buffer=$parts[0];
		$nested=0;
		reset($parts);
		next($parts);
		while(list($k,$v)=each($parts))	{
			$isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
			$tagLen = strcspn(substr($content,$pointer),'>')+1;

			if (!$isEndTag)	{	// We meet a start-tag:
				if (!$nested)	{	// Ground level:
					$newParts[]=$buffer;	// previous buffer stored
					$buffer='';
				}
				$nested++;	// We are inside now!
				$mbuffer=substr($content,$pointer,strlen($v)+$tagLen);	// New buffer set and pointer increased
				$pointer+=strlen($mbuffer);
				$buffer.=$mbuffer;
			} else {	// If we meet an endtag:
				$nested--;	// decrease nested-level
				$eliminated=0;
				if ($eliminateExtraEndTags && $nested<0)	{
					$nested=0;
					$eliminated=1;
				} else {
					$buffer.=substr($content,$pointer,$tagLen);	// In any case, add the endtag to current buffer and increase pointer
				}
				$pointer+=$tagLen;
				if (!$nested && !$eliminated)	{	// if we're back on ground level, (and not by eliminating tags...
					$newParts[]=$buffer;
					$buffer='';
				}
				$mbuffer=substr($content,$pointer,strlen($v));	// New buffer set and pointer increased
				$pointer+=strlen($mbuffer);
				$buffer.=$mbuffer;
			}
290

Kasper Skårhøj's avatar
Kasper Skårhøj committed
291
292
293
294
295
		}
		$newParts[]=$buffer;
		return $newParts;
	}

296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
	/**
	 * Splitting content into blocks *recursively* and processing tags/content with call back functions.
	 *
	 * @param	string		Tag list, see splitIntoBlock()
	 * @param	string		Content, see splitIntoBlock()
	 * @param	object		Object where call back methods are.
	 * @param	string		Name of call back method for content; "function callBackContent($str,$level)"
	 * @param	string		Name of call back method for tags; "function callBackTags($tags,$level)"
	 * @param	integer		Indent level
	 * @return	string		Processed content
	 * @see splitIntoBlock()
	 */
	function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)	{
		$parts = $this->splitIntoBlock($tag,$content,TRUE);
		foreach($parts as $k => $v)	{
			if ($k%2)	{
				$firstTagName = $this->getFirstTagName($v, TRUE);
				$tagsArray = array();
				$tagsArray['tag_start'] = $this->getFirstTag($v);
				$tagsArray['tag_end'] = '</'.$firstTagName.'>';
				$tagsArray['tag_name'] = strtolower($firstTagName);
				$tagsArray['add_level'] = 1;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
318
				$tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
319
320
321
322
323

				if ($callBackTags)	$tagsArray = $procObj->$callBackTags($tagsArray,$level);

				$parts[$k] =
					$tagsArray['tag_start'].
Kasper Skårhøj's avatar
Kasper Skårhøj committed
324
					$tagsArray['content'].
325
326
327
328
329
330
331
332
333
					$tagsArray['tag_end'];
			} else {
				if ($callBackContent)	$parts[$k] = $procObj->$callBackContent($parts[$k],$level);
			}
		}

		return implode('',$parts);
	}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
334
335
	/**
	 * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
Kasper Skårhøj's avatar
Kasper Skårhøj committed
336
	 * Even numbers in the array are outside the blocks, Odd numbers are block-content.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
337
	 * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
338
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
339
340
341
342
343
344
	 * @param	string		List of tags
	 * @param	string		HTML-content
	 * @return	array		Even numbers in the array are outside the blocks, Odd numbers are block-content.
	 * @see splitIntoBlock(), getAllParts(), removeFirstAndLastTag()
	 */
	function splitTags($tag,$content)	{
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
345
		$tags = t3lib_div::trimExplode(',',$tag,1);
346
347
		$regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si';
		$parts = preg_split($regexStr, $content);
348

Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
349
350
351
		$pointer = strlen($parts[0]);
		$newParts = array();
		$newParts[] = $parts[0];
Kasper Skårhøj's avatar
Kasper Skårhøj committed
352
353
354
355
356
357
358
		reset($parts);
		next($parts);
		while(list($k,$v)=each($parts))	{
			$tagLen = strcspn(substr($content,$pointer),'>')+1;

				// Set tag:
			$tag = substr($content,$pointer,$tagLen);	// New buffer set and pointer increased
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
359
360
			$newParts[] = $tag;
			$pointer+= strlen($tag);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
361
362

				// Set content:
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
363
364
			$newParts[] = $v;
			$pointer+= strlen($v);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
365
366
367
368
369
370
		}
		return $newParts;
	}

	/**
	 * Returns an array with either tag or non-tag content of the result from ->splitIntoBlock()/->splitTags()
Kasper Skårhøj's avatar
Kasper Skårhøj committed
371
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
372
373
374
375
376
377
378
379
	 * @param	array		Parts generated by ->splitIntoBlock() or >splitTags()
	 * @param	boolean		Whether to return the tag-parts (default,true) or what was outside the tags.
	 * @param	boolean		Whether to include the tags in the tag-parts (most useful for input made by ->splitIntoBlock())
	 * @return	array		Tag-parts/Non-tag-parts depending on input argument settings
	 * @see splitIntoBlock(), splitTags()
	 */
	function getAllParts($parts,$tag_parts=1,$include_tag=1)	{
		$newParts=array();
380
		foreach ($parts as $k => $v)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
381
382
383
384
385
386
387
388
389
390
			if (($k+($tag_parts?0:1))%2)	{
				if (!$include_tag)	$v=$this->removeFirstAndLastTag($v);
				$newParts[]=$v;
			}
		}
		return $newParts;
	}

	/**
	 * Removes the first and last tag in the string
391
	 * Anything before the first and after the last tags respectively is also removed
Kasper Skårhøj's avatar
Kasper Skårhøj committed
392
	 *
393
	 * @param	string		String to process
Kasper Skårhøj's avatar
Kasper Skårhøj committed
394
	 * @return	string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
395
396
	 */
	function removeFirstAndLastTag($str)	{
397
398
399
400
			// End of first tag:
		$start = strpos($str,'>');
			// Begin of last tag:
		$end = strrpos($str,'<');
Kasper Skårhøj's avatar
Kasper Skårhøj committed
401
			// return
402
		return substr($str, $start+1, $end-$start-1);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
403
404
405
406
407
	}

	/**
	 * Returns the first tag in $str
	 * Actually everything from the begining of the $str is returned, so you better make sure the tag is the first thing...
Kasper Skårhøj's avatar
Kasper Skårhøj committed
408
	 *
409
	 * @param	string		HTML string with tags
Kasper Skårhøj's avatar
Kasper Skårhøj committed
410
	 * @return	string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
411
412
413
	 */
	function getFirstTag($str)	{
			// First:
414
415
		$endLen = strpos($str,'>')+1;
		return substr($str,0,$endLen);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
416
417
418
419
	}

	/**
	 * Returns the NAME of the first tag in $str
Kasper Skårhøj's avatar
Kasper Skårhøj committed
420
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
421
	 * @param	string		HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do)
422
	 * @param	boolean		If set, then the tag is NOT converted to uppercase by case is preserved.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
423
424
425
	 * @return	string		Tag name in upper case
	 * @see getFirstTag()
	 */
426
	function getFirstTagName($str,$preserveCase=FALSE)	{
427
		$matches = array();
428
429
430
431
432
433
434
		if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1)	{
			if (!$preserveCase)	{
				return strtoupper($matches[1]);
			}
			return $matches[1];
		}
		return '';
Kasper Skårhøj's avatar
Kasper Skårhøj committed
435
	}
436
437
438
439
440
441
442
443
444
445
446
447

	/**
	 * Returns an array with all attributes as keys. Attributes are only lowercase a-z
	 * If a attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset()
	 *
	 * @param	string		Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>')
	 * @param	boolean		If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set!
	 * @return	array		array(Tag attributes,Attribute meta-data)
	 */
	function get_tag_attributes($tag,$deHSC=0)	{
		list($components,$metaC) = $this->split_tag_attributes($tag);
		$name = '';	 // attribute name is stored here
448
		$valuemode = false;
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
449
450
		$attributes = array();
		$attributesMeta = array();
451
		if (is_array($components))	{
452
			foreach ($components as $key => $val)	{
453
454
455
456
457
458
459
460
				if ($val != '=')	{	// Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
					if ($valuemode)	{
						if ($name)	{
							$attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
							$attributesMeta[$name]['dashType']=$metaC[$key];
							$name = '';
						}
					} else {
461
						if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val))	{
462
463
464
465
466
467
							$name = strtolower($namekey);
							$attributesMeta[$name]=array();
							$attributesMeta[$name]['origTag']=$namekey;
							$attributes[$name] = '';
						}
					}
468
					$valuemode = false;
469
				} else {
470
					$valuemode = true;
471
472
473
474
475
				}
			}
			return array($attributes,$attributesMeta);
		}
	}
476

477
478
479
480
481
482
483
484
485
486
	/**
	 * Returns an array with the 'components' from an attribute list. The result is normally analyzed by get_tag_attributes
	 * Removes tag-name if found
	 *
	 * @param	string		The tag or attributes
	 * @return	array
	 * @access private
	 * @see t3lib_div::split_tag_attributes()
	 */
	function split_tag_attributes($tag)	{
487
		$matches = array();
488
489
490
491
		if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1)	{
			return array(array(), array());
		}
		$tag_tmp = $matches[2];
492
493
494

		$metaValue = array();
		$value = array();
495
		$matches = array();
496
497
498
499
500
501
502
503
504
505
		if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0)	{
			foreach ($matches[1] as $part)	{
				$firstChar = substr($part, 0, 1);
				if ($firstChar=='"' || $firstChar=="'")	{
					$metaValue[] = $firstChar;
					$value[] = substr($part, 1, -1);
				} else	{
					$metaValue[] = '';
					$value[] = $part;
				}
506
507
508
509
510
			}
		}
		return array($value,$metaValue);
	}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
511
512
513
514
515
516
517
	/**
	 * Checks whether block/solo tags are found in the correct amounts in HTML content
	 * Block tags are tags which are required to have an equal amount of start and end tags, eg. "<table>...</table>"
	 * Solo tags are tags which are required to have ONLY start tags (possibly with an XHTML ending like ".../>")
	 * NOTICE: Correct XHTML might actually fail since "<br></br>" is allowed as well as "<br/>". However only the LATTER is accepted by this function (with "br" in the "solo-tag" list), the first example will result in a warning.
	 * NOTICE: Correct XHTML might actually fail since "<p/>" is allowed as well as "<p></p>". However only the LATTER is accepted by this function (with "p" in the "block-tag" list), the first example will result in an ERROR!
	 * NOTICE: Correct HTML version "something" allows eg. <p> and <li> to be NON-ended (implicitly ended by other tags). However this is NOT accepted by this function (with "p" and "li" in the block-tag list) and it will result in an ERROR!
Kasper Skårhøj's avatar
Kasper Skårhøj committed
518
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
519
520
521
522
523
524
525
526
527
528
529
530
531
	 * @param	string		HTML content to analyze
	 * @param	string		Tag names for block tags (eg. table or div or p) in lowercase, commalist (eg. "table,div,p")
	 * @param	string		Tag names for solo tags (eg. img, br or input) in lowercase, commalist ("img,br,input")
	 * @return	array		Analyse data.
	 */
	function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')	{
		$content = strtolower($content);
		$analyzedOutput=array();
		$analyzedOutput['counts']=array();	// Counts appearances of start-tags
		$analyzedOutput['errors']=array();	// Lists ERRORS
		$analyzedOutput['warnings']=array();	// Lists warnings.
		$analyzedOutput['blocks']=array();	// Lists stats for block-tags
		$analyzedOutput['solo']=array();	// Lists stats for solo-tags
532

Kasper Skårhøj's avatar
Kasper Skårhøj committed
533
534
535
			// Block tags, must have endings...
		$blockTags = explode(',',$blockTags);
		foreach($blockTags as $tagName)	{
536
537
			$countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
			$countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
538
539
540
541
542
543
544
545
546
547
			$analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
			if ($countBegin)	$analyzedOutput['counts'][$tagName]=$countBegin;
			if ($countBegin-$countEnd)	{
				if ($countBegin-$countEnd > 0)	{
					$analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
				} else {
					$analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
				}
			}
		}
548

Kasper Skårhøj's avatar
Kasper Skårhøj committed
549
550
551
			// Solo tags, must NOT have endings...
		$soloTags = explode(',',$soloTags);
		foreach($soloTags as $tagName)	{
552
553
			$countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
			$countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
554
555
556
557
558
559
			$analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
			if ($countBegin)	$analyzedOutput['counts'][$tagName]=$countBegin;
			if ($countEnd)	{
				$analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
			}
		}
560

Kasper Skårhøj's avatar
Kasper Skårhøj committed
561
		return $analyzedOutput;
562
	}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
563

564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580











	/*********************************
	 *
	 * Clean HTML code
	 *
	 *********************************/

Kasper Skårhøj's avatar
Kasper Skårhøj committed
581
582
	/**
	 * Function that can clean up HTML content according to configuration given in the $tags array.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
583
584
	 *
	 * Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this:		 $tags = array_flip(explode(',','b,a,i,u'))
Kasper Skårhøj's avatar
Kasper Skårhøj committed
585
	 * If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options:
Kasper Skårhøj's avatar
Kasper Skårhøj committed
586
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
587
588
589
590
591
	 * 	$tags[$tagname] = Array(
	 * 		'overrideAttribs' => ''		If set, this string is preset as the attributes of the tag
	 * 		'allowedAttribs' =>   '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed.
	 * 		'fixAttrib' => Array(
	 * 			'[attribute name]' => Array (
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
592
593
	 * 				'set' => Force the attribute value to this value.
	 * 				'unset' => Boolean: If set, the attribute is unset.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
594
595
596
	 * 				'default' => 	If no attribute exists by this name, this value is set as default value (if this value is not blank)
	 * 				'always' => 	Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists
	 * 				'trim,intval,lower,upper' => 	All booleans. If any of these keys are set, the value is passed through the respective PHP-functions.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
597
	 * 				'range' => Array ('[low limit]','[high limit, optional]')		Setting integer range.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
598
599
600
601
602
603
604
605
606
607
608
	 * 				'list' => Array ('[value1/default]','[value2]','[value3]')		Attribute must be in this list. If not, the value is set to the first element.
	 * 				'removeIfFalse' => 	Boolean/'blank'.	If set, then the attribute is removed if it is 'false'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed)
	 * 				'removeIfEquals' => 	[value]	If the attribute value matches the value set here, then it is removed.
	 * 				'casesensitiveComp' => 1	If set, then the removeIfEquals and list comparisons will be case sensitive. Otherwise not.
	 * 			)
	 * 		),
	 * 		'protect' => '',	Boolean. If set, the tag <> is converted to &lt; and &gt;
	 * 		'remap' => '',		String. If set, the tagname is remapped to this tagname
	 * 		'rmTagIfNoAttrib' => '',	Boolean. If set, then the tag is removed if no attributes happend to be there.
	 * 		'nesting' => '',	Boolean/'global'. If set true, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>'
	 * 	)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
609
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
610
611
612
	 * @param	string		$content; is the HTML-content being processed. This is also the result being returned.
	 * @param	array		$tags; is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure.
	 * @param	string		$keepAll; boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to &lt; and &gt;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
613
	 * @param	integer		$hSC; Values -1,0,1,2: Set to zero= disabled, set to 1 then the content BETWEEN tags is htmlspecialchar()'ed, set to -1 its the opposite and set to 2 the content will be HSC'ed BUT with preservation for real entities (eg. "&amp;" or "&#234;")
Kasper Skårhøj's avatar
Kasper Skårhøj committed
614
615
616
617
618
619
	 * @param	array		Configuration array send along as $conf to the internal functions ->processContent() and ->processTag()
	 * @return	string		Processed HTML content
	 */
	function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())	{
		$newContent = array();
		$tokArr = explode('<',$content);
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
620
		$newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
621
622
		next($tokArr);

Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
623
624
625
		$c = 1;
		$tagRegister = array();
		$tagStack = array();
Kasper Skårhøj's avatar
Kasper Skårhøj committed
626
627
		while(list(,$tok)=each($tokArr))	{
			$firstChar = substr($tok,0,1);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
628
#			if (strcmp(trim($firstChar),''))	{		// It is a tag...
629
630
631
			if (preg_match('/[[:alnum:]\/]/',$firstChar)==1)	{		// It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
				$tagEnd = strpos($tok,'>');
				if ($tagEnd)	{	// If there is and end-bracket...	tagEnd can't be 0 as the first character can't be a >
Kasper Skårhøj's avatar
Kasper Skårhøj committed
632
633
					$endTag = $firstChar=='/' ? 1 : 0;
					$tagContent = substr($tok,$endTag,$tagEnd-$endTag);
634
					$tagParts = preg_split('/\s+/s',$tagContent,2);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
635
636
637
					$tagName = strtolower($tagParts[0]);
					if (isset($tags[$tagName]))	{
						if (is_array($tags[$tagName]))	{	// If there is processing to do for the tag:
638

Kasper Skårhøj's avatar
Kasper Skårhøj committed
639
640
641
642
643
							if (!$endTag)	{	// If NOT an endtag, do attribute processing (added dec. 2003)
									// Override attributes
								if (strcmp($tags[$tagName]['overrideAttribs'],''))	{
									$tagParts[1]=$tags[$tagName]['overrideAttribs'];
								}
644

Kasper Skårhøj's avatar
Kasper Skårhøj committed
645
646
647
648
649
650
651
652
									// Allowed tags
								if (strcmp($tags[$tagName]['allowedAttribs'],''))	{
									if (!strcmp($tags[$tagName]['allowedAttribs'],'0'))	{	// No attribs allowed
										$tagParts[1]='';
									} elseif (trim($tagParts[1])) {
										$tagAttrib = $this->get_tag_attributes($tagParts[1]);
										$tagParts[1]='';
										$newTagAttrib = array();
653
654
655
656
657
										if (!($tList = $tags[$tagName]['_allowedAttribs']))	{
												// Just explode attribts for tag once
											$tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
										}
										foreach ($tList as $allowTag)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
658
659
660
											if (isset($tagAttrib[0][$allowTag]))	$newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
										}
										$tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
661
662
									}
								}
663

Kasper Skårhøj's avatar
Kasper Skårhøj committed
664
665
666
667
668
669
									// Fixed attrib values
								if (is_array($tags[$tagName]['fixAttrib']))	{
									$tagAttrib = $this->get_tag_attributes($tagParts[1]);
									$tagParts[1]='';
									reset($tags[$tagName]['fixAttrib']);
									while(list($attr,$params)=each($tags[$tagName]['fixAttrib']))	{
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
670
671
										if (strlen($params['set']))	$tagAttrib[0][$attr] = $params['set'];
										if (strlen($params['unset']))	unset($tagAttrib[0][$attr]);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
										if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr]))	$tagAttrib[0][$attr]=$params['default'];
										if ($params['always'] || isset($tagAttrib[0][$attr]))	{
											if ($params['trim'])	{$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
											if ($params['intval'])	{$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
											if ($params['lower'])	{$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
											if ($params['upper'])	{$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
											if ($params['range'])	{
												if (isset($params['range'][1]))	{
													$tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
												} else {
													$tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
												}
											}
											if (is_array($params['list']))	{
												if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName)))	$tagAttrib[0][$attr]=$params['list'][0];
											}
											if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],'')))	{
												unset($tagAttrib[0][$attr]);
											}
											if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp'])))	{
												unset($tagAttrib[0][$attr]);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
693
											}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
											if ($params['prefixLocalAnchors'])	{
												if (substr($tagAttrib[0][$attr],0,1)=='#')	{
													$prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
													$tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
													if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL')))		{
														$tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
													}
												}
											}
											if ($params['prefixRelPathWith'])	{
												$urlParts = parse_url($tagAttrib[0][$attr]);
												if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/')	{	// If it is NOT an absolute URL (by http: or starting "/")
													$tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
												}
											}
											if ($params['userFunc'])	{
												$tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
											}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
712
713
										}
									}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
714
									$tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
715
								}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
716
717
							} else {	// If endTag, remove any possible attributes:
								$tagParts[1]='';
Kasper Skårhøj's avatar
Kasper Skårhøj committed
718
719
720
721
722
723
724
725
726
727
728
729
730
							}

								// Protecting the tag by converting < and > to &lt; and &gt; ??
							if ($tags[$tagName]['protect'])	{
								$lt = '&lt;';	$gt = '&gt;';
							} else {
								$lt = '<';	$gt = '>';
							}
								// Remapping tag name?
							if ($tags[$tagName]['remap'])	$tagParts[0] = $tags[$tagName]['remap'];

								// rmTagIfNoAttrib
							if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib'])	{
731
								$setTag = !$tags[$tagName]['rmTagIfNoAttrib'];
732

Kasper Skårhøj's avatar
Kasper Skårhøj committed
733
734
735
736
737
738
739
740
								if ($tags[$tagName]['nesting'])	{
									if (!is_array($tagRegister[$tagName]))	$tagRegister[$tagName]=array();

									if ($endTag)	{
/*										if ($tags[$tagName]['nesting']=='global')	{
											$lastEl = end($tagStack);
											$correctTag = !strcmp($tagName,$lastEl);
										} else $correctTag=1;
741
	*/
Kasper Skårhøj's avatar
Kasper Skårhøj committed
742
743
744
745
746
747
748
749
										$correctTag=1;
										if ($tags[$tagName]['nesting']=='global')	{
											$lastEl = end($tagStack);
											if (strcmp($tagName,$lastEl))	{
												if (in_array($tagName,$tagStack))	{
													while(count($tagStack) && strcmp($tagName,$lastEl))	{
														$elPos = end($tagRegister[$lastEl]);
														unset($newContent[$elPos]);
750

Kasper Skårhøj's avatar
Kasper Skårhøj committed
751
752
753
754
755
														array_pop($tagRegister[$lastEl]);
														array_pop($tagStack);
														$lastEl = end($tagStack);
													}
												} else {
756
													$correctTag=0;	// In this case the
Kasper Skårhøj's avatar
Kasper Skårhøj committed
757
758
759
760
761
762
763
764
765
766
767
768
769
770
												}
											}
										}
										if (!count($tagRegister[$tagName]) || !$correctTag)	{
											$setTag=0;
										} else {
											array_pop($tagRegister[$tagName]);
											if ($tags[$tagName]['nesting']=='global')	{array_pop($tagStack);}
										}
									} else {
										array_push($tagRegister[$tagName],$c);
										if ($tags[$tagName]['nesting']=='global')	{array_push($tagStack,$tagName);}
									}
								}
771

Kasper Skårhøj's avatar
Kasper Skårhøj committed
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
								if ($setTag)	{
										// Setting the tag
									$newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='&lt;');
								}
							}
						} else {
							$newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
						}
					} elseif ($keepAll) {	// This is if the tag was not defined in the array for processing:
						if (!strcmp($keepAll,'protect'))	{
							$lt = '&lt;';	$gt = '&gt;';
						} else {
							$lt = '<';	$gt = '>';
						}
						$newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='&lt;');
					}
					$newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
				} else {
					$newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);	// There were not end-bracket, so no tag...
				}
			} else {
				$newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);	// It was not a tag anyways
			}
		}
796

Kasper Skårhøj's avatar
Kasper Skårhøj committed
797
			// Unsetting tags:
798
799
		foreach ($tagRegister as $tag => $positions)	{
			foreach ($positions as $pKey)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
800
801
802
803
804
805
806
807
				unset($newContent[$pKey]);
			}
		}

		return implode('',$newContent);
	}

	/**
Kasper Skårhøj's avatar
Kasper Skårhøj committed
808
	 * Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
809
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
810
	 * @param	string		Input value
Kasper Skårhøj's avatar
Kasper Skårhøj committed
811
	 * @param	integer		Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
812
813
814
	 * @return	string		Output value
	 */
	function bidir_htmlspecialchars($value,$dir)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
815
		if ($dir==1)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
816
			$value = htmlspecialchars($value);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
817
818
819
		} elseif ($dir==2)	{
			$value = t3lib_div::deHSCentities(htmlspecialchars($value));
		} elseif ($dir==-1) {
Kasper Skårhøj's avatar
Kasper Skårhøj committed
820
821
822
823
824
825
826
827
828
829
			$value = str_replace('&gt;','>',$value);
			$value = str_replace('&lt;','<',$value);
			$value = str_replace('&quot;','"',$value);
			$value = str_replace('&amp;','&',$value);
		}
		return $value;
	}

	/**
	 * Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives
Kasper Skårhøj's avatar
Kasper Skårhøj committed
830
	 *
831
	 * @param	string		Prefix string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
832
833
	 * @param	string		HTML content
	 * @param	array		Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase
834
	 * @param	string		Suffix string (put after the resource).
Kasper Skårhøj's avatar
Kasper Skårhøj committed
835
836
	 * @return	string		Processed HTML content
	 */
837
	function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')	{
838

839
		$parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
840
		foreach ($parts as $k => $v)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
841
842
843
844
845
846
847
848
849
850
851
852
853
			if ($k%2)	{
				$params = $this->get_tag_attributes($v,1);
				$tagEnd = substr($v,-2)=='/>' ? ' />' : '>';	// Detect tag-ending so that it is re-applied correctly.
				$firstTagName = $this->getFirstTagName($v);	// The 'name' of the first tag
				$somethingDone=0;
				$prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
				switch(strtolower($firstTagName))	{
						// background - attribute:
					case 'td':
					case 'body':
					case 'table':
						$src = $params[0]['background'];
						if ($src)	{
854
							$params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
855
856
857
858
859
860
861
							$somethingDone=1;
						}
					break;
						// src attribute
					case 'img':
					case 'input':
					case 'script':
862
					case 'embed':
Kasper Skårhøj's avatar
Kasper Skårhøj committed
863
864
						$src = $params[0]['src'];
						if ($src)	{
865
							$params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
866
867
868
869
870
871
872
							$somethingDone=1;
						}
					break;
					case 'link':
					case 'a':
						$src = $params[0]['href'];
						if ($src)	{
873
							$params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
874
875
876
877
878
879
880
							$somethingDone=1;
						}
					break;
						// action attribute
					case 'form':
						$src = $params[0]['action'];
						if ($src)	{
881
							$params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
882
883
884
885
886
							$somethingDone=1;
						}
					break;
				}
				if ($somethingDone)	{
887
					$tagParts = preg_split('/\s+/s',$v,2);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
888
					$tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
889
					$parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
890
891
892
				}
			}
		}
893
894
895
896
897
898
899
900
		$content = implode('',$parts);

			// Fix <style> section:
		$prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
		if (strlen($prefix))	{
			$parts = $this->splitIntoBlock('style',$content);
			foreach($parts as $k => $v)	{
				if ($k%2)	{
901
					$parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
902
903
904
905
				}
			}
			$content = implode('',$parts);
		}
906

907
		return $content;
908
909
	}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
910
911
	/**
	 * Internal sub-function for ->prefixResourcePath()
Kasper Skårhøj's avatar
Kasper Skårhøj committed
912
	 *
913
	 * @param	string		Prefix string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
914
	 * @param	string		Relative path/URL
915
	 * @param	string		Suffix string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
916
917
918
	 * @return	string		Output path, prefixed if no scheme in input string
	 * @access private
	 */
919
	function prefixRelPath($prefix,$srcVal,$suffix='')	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
920
		$pU = parse_url($srcVal);
921
		if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/')	{ // If not an absolute URL.
922
			$srcVal = $prefix.$srcVal.$suffix;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
923
924
925
926
927
		}
		return $srcVal;
	}

	/**
Kasper Skårhøj's avatar
Kasper Skårhøj committed
928
	 * Cleans up the input $value for fonttags.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
929
	 * If keepFace,-Size and -Color is set then font-tags with an allowed property is kept. Else deleted.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
930
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
931
932
933
934
935
936
937
938
	 * @param	string		HTML content with font-tags inside to clean up.
	 * @param	boolean		If set, keep "face" attribute
	 * @param	boolean		If set, keep "size" attribute
	 * @param	boolean		If set, keep "color" attribute
	 * @return	string		Processed HTML content
	 */
	function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)	{
		$fontSplit = $this->splitIntoBlock('font',$value);	// ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
939
		foreach ($fontSplit as $k => $v)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
940
941
942
943
944
945
			if ($k%2)	{	// font:
				$attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
				$newAttribs=array();
				if ($keepFace && $attribArray['face'])	$newAttribs[]='face="'.$attribArray['face'].'"';
				if ($keepSize && $attribArray['size'])	$newAttribs[]='size="'.$attribArray['size'].'"';
				if ($keepColor && $attribArray['color'])	$newAttribs[]='color="'.$attribArray['color'].'"';
946

Kasper Skårhøj's avatar
Kasper Skårhøj committed
947
948
949
950
951
952
953
954
955
956
				$innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
				if (count($newAttribs))	{
					$fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
				} else {
					$fontSplit[$k]=$innerContent;
				}
			}
		}
		return implode('',$fontSplit);
	}
957

Kasper Skårhøj's avatar
Kasper Skårhøj committed
958
959
	/**
	 * This is used to map certain tag-names into other names.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
960
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
961
962
963
964
965
966
967
	 * @param	string		HTML content
	 * @param	array		Array with tag key=>value pairs where key is from-tag and value is to-tag
	 * @param	string		Alternative less-than char to search for (search regex string)
	 * @param	string		Alternative less-than char to replace with (replace regex string)
	 * @return	string		Processed HTML content
	 */
	function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')	{
968

Kasper Skårhøj's avatar
Kasper Skårhøj committed
969
		foreach($tags as $from => $to)	{
970
			$value = preg_replace('/'.preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
971
972
973
974
975
976
		}
		return $value;
	}

	/**
	 * This converts htmlspecialchar()'ed tags (from $tagList) back to real tags. Eg. '&lt;strong&gt' would be converted back to '<strong>' if found in $tagList
Kasper Skårhøj's avatar
Kasper Skårhøj committed
977
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
	 * @param	string		HTML content
	 * @param	string		Tag list, separated by comma. Lowercase!
	 * @return	string		Processed HTML content
	 */
	function unprotectTags($content,$tagList='')	{
		$tagsArray = t3lib_div::trimExplode(',',$tagList,1);
		$contentParts = explode('&lt;',$content);
		next($contentParts);	// bypass the first
		while(list($k,$tok)=each($contentParts))	{
			$firstChar = substr($tok,0,1);
			if (strcmp(trim($firstChar),''))	{
				$subparts = explode('&gt;',$tok,2);
				$tagEnd = strlen($subparts[0]);
				if (strlen($tok)!=$tagEnd)	{
					$endTag = $firstChar=='/' ? 1 : 0;
					$tagContent = substr($tok,$endTag,$tagEnd-$endTag);
994
					$tagParts = preg_split('/\s+/s',$tagContent,2);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
995
996
997
998
999
1000
1001
					$tagName = strtolower($tagParts[0]);
					if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))	{
						$contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
					} else $contentParts[$k] = '&lt;'.$tok;
				} else $contentParts[$k] = '&lt;'.$tok;
			} else $contentParts[$k] = '&lt;'.$tok;
		}
1002

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1003
1004
		return implode('',$contentParts);
	}
1005

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1006
1007
1008
	/**
	 * Strips tags except the tags in the list, $tagList
	 * OBSOLETE - use PHP function strip_tags()
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1009
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1010
1011
1012
1013
1014
1015
1016
1017
1018
	 * @param	string		Value to process
	 * @param	string		List of tags
	 * @return	string		Output value
	 * @ignore
	 */
	function stripTagsExcept($value,$tagList)	{
		$tags=t3lib_div::trimExplode(',',$tagList,1);
		$forthArr=array();
		$backArr=array();
1019
		foreach ($tags as $theTag)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1020
1021
1022
			$forthArr[$theTag]=md5($theTag);
			$backArr[md5($theTag)]=$theTag;
		}
1023
1024
1025
		$value = $this->mapTags($value,$forthArr,'<','_');
		$value=strip_tags($value);
		$value = $this->mapTags($value,$backArr,'_','<');
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1026
1027
		return $value;
	}
1028

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1029
	/**
1030
	 * Internal function for case shifting of a string or whole array
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1031
	 *
1032
	 * @param	mixed		Input string/array
1033
	 * @param	boolean		If $str is a string AND this boolean(caseSensitive) is false, the string is returned in uppercase
1034
	 * @param	string		Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1035
1036
1037
1038
	 * @return	string		Output string, processed
	 * @access private
	 */
	function caseShift($str,$flag,$cacheKey='')	{
1039
		$cacheKey .= $flag?1:0;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1040
1041
1042
		if (is_array($str))	{
			if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))	{
				reset($str);
1043
1044
1045
1046
				foreach ($str as $k => $v)	{
					if (!$flag)	{
						$str[$k] = strtoupper($v);
					}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1047
1048
1049
1050
1051
				}
				if ($cacheKey)	$this->caseShift_cache[$cacheKey]=$str;
			} else {
				$str = $this->caseShift_cache[$cacheKey];
			}
1052
		} elseif (!$flag)	{ $str = strtoupper($str); }
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1053
1054
		return $str;
	}
1055

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1056
1057
	/**
	 * Compiling an array with tag attributes into a string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1058
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1059
1060
1061
1062
1063
1064
1065
1066
	 * @param	array		Tag attributes
	 * @param	array		Meta information about these attributes (like if they were quoted)
	 * @param	boolean		If set, then the attribute names will be set in lower case, value quotes in double-quotes and the value will be htmlspecialchar()'ed
	 * @return	string		Imploded attributes, eg: 'attribute="value" attrib2="value2"'
	 * @access private
	 */
	function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)	{
		$accu=array();
1067
		foreach ($tagAttrib as $k =>$v)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
			if ($xhtmlClean)	{
				$attr=strtolower($k);
				if (strcmp($v,'') || isset($meta[$k]['dashType']))	{
					$attr.='="'.htmlspecialchars($v).'"';
				}
			} else {
				$attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
				if (strcmp($v,'') || isset($meta[$k]['dashType']))	{
					$dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
					$attr.='='.$dash.$v.$dash;
				}
			}
			$accu[]=$attr;
		}
		return implode(' ',$accu);
	}
1084

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1085
1086
	/**
	 * Get tag attributes, the classic version (which had some limitations?)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1087
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1088
1089
	 * @param	string		The tag
	 * @param	boolean		De-htmlspecialchar flag.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1090
	 * @return	array
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1091
1092
1093
1094
1095
1096
1097
	 * @access private
	 */
	function get_tag_attributes_classic($tag,$deHSC=0)	{
		$attr=$this->get_tag_attributes($tag,$deHSC);
		return is_array($attr[0])?$attr[0]:array();
	}

1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
	/**
	 * Indents input content with $number instances of $indentChar
	 *
	 * @param	string		Content string, multiple lines.
	 * @param	integer		Number of indents
	 * @param	string		Indent character/string
	 * @return	string		Indented code (typ. HTML)
	 */
	function indentLines($content, $number=1, $indentChar="\t")	{
		$preTab = str_pad('', $number*strlen($indentChar), $indentChar);
		$lines = explode(chr(10),str_replace(chr(13),'',$content));
1109
		foreach ($lines as $k => $v)	{
1110
1111
1112
1113
1114
			$lines[$k] = $preTab.$v;
		}
		return implode(chr(10), $lines);
	}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1115
1116
	/**
	 * Converts TSconfig into an array for the HTMLcleaner function.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1117
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1118
	 * @param	array		TSconfig for HTMLcleaner
1119
	 * @param	array		Array of tags to keep (?)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1120
	 * @return	array
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1121
1122
1123
1124
1125
	 * @access private
	 */
	function HTMLparserConfig($TSconfig,$keepTags=array())	{
			// Allow tags (base list, merged with incoming array)
		$alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
1126
		$keepTags = array_merge($alTags,$keepTags);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138

			// Set config properties.
		if (is_array($TSconfig['tags.']))	{
			reset($TSconfig['tags.']);
			while(list($key,$tagC)=each($TSconfig['tags.']))	{
				if (!is_array($tagC) && $key==strtolower($key))	{
					if (!strcmp($tagC,'0'))	unset($keepTags[$key]);
					if (!strcmp($tagC,'1') && !isset($keepTags[$key]))	$keepTags[$key]=1;
				}
			}

			reset($TSconfig['tags.']);
1139
			foreach ($TSconfig['tags.'] as $key => $tagC)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
				if (is_array($tagC) && $key==strtolower($key))	{
					$key=substr($key,0,-1);
					if (!is_array($keepTags[$key]))	$keepTags[$key]=array();
					if (is_array($tagC['fixAttrib.']))	{
						reset($tagC['fixAttrib.']);
						while(list($atName,$atConfig)=each($tagC['fixAttrib.']))	{
							if (is_array($atConfig))	{
								$atName=substr($atName,0,-1);
								if (!is_array($keepTags[$key]['fixAttrib'][$atName]))	{
									$keepTags[$key]['fixAttrib'][$atName]=array();
								}
								$keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig);		// Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
								if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],''))	$keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
								if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],''))	$keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
							}
						}
					}
					unset($tagC['fixAttrib.']);
					unset($tagC['fixAttrib']);
					$keepTags[$key] = array_merge($keepTags[$key],$tagC);			// Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
				}
			}
		}
			// localNesting
		if ($TSconfig['localNesting'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
			while(list(,$tn)=each($lN))	{
				if (isset($keepTags[$tn]))	{
					$keepTags[$tn]['nesting']=1;
				}
			}
		}
		if ($TSconfig['globalNesting'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
			while(list(,$tn)=each($lN))	{
				if (isset($keepTags[$tn]))	{
					if (!is_array($keepTags[$tn]))	$keepTags[$tn]=array();
					$keepTags[$tn]['nesting']='global';
				}
			}
		}
		if ($TSconfig['rmTagIfNoAttrib'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
			while(list(,$tn)=each($lN))	{
				if (isset($keepTags[$tn]))	{
					if (!is_array($keepTags[$tn]))	$keepTags[$tn]=array();
					$keepTags[$tn]['rmTagIfNoAttrib']=1;
				}
			}
		}
		if ($TSconfig['noAttrib'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
			while(list(,$tn)=each($lN))	{
				if (isset($keepTags[$tn]))	{
					if (!is_array($keepTags[$tn]))	$keepTags[$tn]=array();
					$keepTags[$tn]['allowedAttribs']=0;
				}
			}
		}
		if ($TSconfig['removeTags'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
			while(list(,$tn)=each($lN))	{
				$keepTags[$tn]=array();
				$keepTags[$tn]['allowedAttribs']=0;
				$keepTags[$tn]['rmTagIfNoAttrib']=1;
			}
		}
1207

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1208
1209
1210
1211
			// Create additional configuration:
		$addConfig=array();
		if ($TSconfig['xhtml_cleaning'])	{
			$addConfig['xhtml']=1;
1212
1213
		}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1214
		return array(
1215
1216
			$keepTags,
			''.$TSconfig['keepNonMatchedTags'],
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1217
1218
1219
			intval($TSconfig['htmlSpecialChars']),
			$addConfig
		);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1220
	}
1221

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1222
1223
1224
	/**
	 * Tries to convert the content to be XHTML compliant and other stuff like that.
	 * STILL EXPERIMENTAL. See comments below.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1225
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
	 * 			What it does NOT do (yet) according to XHTML specs.:
	 * 			- Wellformedness: Nesting is NOT checked
	 * 			- name/id attribute issue is not observed at this point.
	 * 			- Certain nesting of elements not allowed. Most interesting, <PRE> cannot contain img, big,small,sub,sup ...
	 * 			- Wrapping scripts and style element contents in CDATA - or alternatively they should have entitites converted.
	 * 			- Setting charsets may put some special requirements on both XML declaration/ meta-http-equiv. (C.9)
	 * 			- UTF-8 encoding is in fact expected by XML!!
	 * 			- stylesheet element and attribute names are NOT converted to lowercase
	 * 			- ampersands (and entities in general I think) MUST be converted to an entity reference! (&amps;). This may mean further conversion of non-tag content before output to page. May be related to the charset issue as a whole.
	 * 			- Minimized values not allowed: Must do this: selected="selected"
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1236
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1237
	 * 			What it does at this point:
1238
	 * 			- All tags (frame,base,meta,link + img,br,hr,area,input) is ended with "/>" - others?
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1239
1240
1241
	 * 			- Lowercase for elements and attributes
	 * 			- All attributes in quotes
	 * 			- Add "alt" attribute to img-tags if it's not there already.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1242
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
	 * @param	string		Content to clean up
	 * @return	string		Cleaned up content returned.
	 * @access private
	 */
	function XHTML_clean($content)	{
		$content = $this->HTMLcleaner(
			$content,
			array(),	// No tags treated specially
			1,			// Keep ALL tags.
			0,			// All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
			array('xhtml' => 1)
		);
		return $content;
	}

	/**
	 * Processing all tags themselves
1260
	 * (Some additions by Sacha Vorbeck)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1261
	 *
1262
	 * @param	string		Tag to process
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1263
1264
1265
	 * @param	array		Configuration array passing instructions for processing. If count()==0, function will return value unprocessed. See source code for details
	 * @param	boolean		Is endtag, then set this.
	 * @param	boolean		If set, just return value straight away
1266
	 * @return	string		Processed value.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
	 * @access private
	 */
	function processTag($value,$conf,$endTag,$protected=0)	{
			// Return immediately if protected or no parameters
		if ($protected || !count($conf))	return $value;
			// OK then, begin processing for XHTML output:
			// STILL VERY EXPERIMENTAL!!
		if ($conf['xhtml'])	{
			if ($endTag)	{	// Endtags are just set lowercase right away
				$value = strtolower($value);
1277
			} elseif (substr($value,0,4)!='<!--') {	// ... and comments are ignored.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1278
				$inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));	// Finding inner value with out < >
1279
				list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2);	// Separate attributes and tagname
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1280
				$tagName = strtolower($tagName);
1281

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1282
1283
1284
					// Process attributes
				$tagAttrib = $this->get_tag_attributes($tagP);
				if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt']))		$tagAttrib[0]['alt']='';	// Set alt attribute for all images (not XHTML though...)
1285
				if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type']))	$tagAttrib[0]['type']='text/javascript';	// Set type attribute for all script-tags
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1286
1287
1288
1289
				$outA=array();
				reset($tagAttrib[0]);
				while(list($attrib_name,$attrib_value)=each($tagAttrib[0]))	{
						// Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
1290
					$outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1291
1292
1293
				}
				$newTag='<'.trim($tagName.' '.implode(' ',$outA));
					// All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
1294
				if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>')	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1295
1296
1297
1298
1299
1300
1301
					$newTag.=' />';
				} else {
					$newTag.='>';
				}
				$value = $newTag;
			}
		}