class.t3lib_parsehtml.php 51.3 KB
Newer Older
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1
2
3
<?php
/***************************************************************
*  Copyright notice
4
*
5
*  (c) 1999-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
*  All rights reserved
*
*  This script is part of the TYPO3 project. The TYPO3 project is
*  free software; you can redistribute it and/or modify
*  it under the terms of the GNU General Public License as published by
*  the Free Software Foundation; either version 2 of the License, or
*  (at your option) any later version.
*
*  The GNU General Public License can be found at
*  http://www.gnu.org/copyleft/gpl.html.
*  A copy is found in the textfile GPL.txt and important notices to the license
*  from the author is found in LICENSE.txt distributed with these scripts.
*
*
*  This script is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU General Public License for more details.
*
*  This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
/**
 * Contains class with functions for parsing HTML code.
 *
30
31
32
 * $Id$
 * Revised for TYPO3 3.6 July/2003 by Kasper Skaarhoj
 *
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
33
 * @author	Kasper Skaarhoj <kasperYYYY@typo3.com>
Kasper Skårhøj's avatar
Kasper Skårhøj committed
34
35
36
37
38
39
 */
/**
 * [CLASS/FUNCTION INDEX of SCRIPT]
 *
 *
 *
40
41
42
 *  106: class t3lib_parsehtml
 *  123:     function getSubpart($content, $marker)
 *  151:     function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
43
 *
44
 *              SECTION: Parsing HTML code
45
 *  223:     function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
46
 *  284:     function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)
47
48
49
50
51
52
53
54
 *  320:     function splitTags($tag,$content)
 *  354:     function getAllParts($parts,$tag_parts=1,$include_tag=1)
 *  373:     function removeFirstAndLastTag($str)
 *  392:     function getFirstTag($str)
 *  407:     function getFirstTagName($str,$preserveCase=FALSE)
 *  422:     function get_tag_attributes($tag,$deHSC=0)
 *  464:     function split_tag_attributes($tag)
 *  507:     function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')
55
56
 *
 *              SECTION: Clean HTML code
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
57
58
 *  600:     function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())
 *  796:     function bidir_htmlspecialchars($value,$dir)
59
60
61
62
63
64
65
66
67
68
69
70
71
72
 *  819:     function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')
 *  902:     function prefixRelPath($prefix,$srcVal,$suffix='')
 *  920:     function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)
 *  951:     function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')
 *  968:     function unprotectTags($content,$tagList='')
 * 1001:     function stripTagsExcept($value,$tagList)
 * 1024:     function caseShift($str,$flag,$cacheKey='')
 * 1048:     function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)
 * 1077:     function get_tag_attributes_classic($tag,$deHSC=0)
 * 1090:     function indentLines($content, $number=1, $indentChar="\t")
 * 1107:     function HTMLparserConfig($TSconfig,$keepTags=array())
 * 1231:     function XHTML_clean($content)
 * 1253:     function processTag($value,$conf,$endTag,$protected=0)
 * 1299:     function processContent($value,$dir,$conf)
73
74
 *
 * TOTAL FUNCTIONS: 28
Kasper Skårhøj's avatar
Kasper Skårhøj committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
 * (This index is automatically created/updated by the extension "extdeveval")
 *
 */




















/**
Kasper Skårhøj's avatar
Kasper Skårhøj committed
99
 * Functions for parsing HTML.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
100
 * You are encouraged to use this class in your own applications
Kasper Skårhøj's avatar
Kasper Skårhøj committed
101
 *
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
102
 * @author	Kasper Skaarhoj <kasperYYYY@typo3.com>
103
104
 * @package TYPO3
 * @subpackage t3lib
Kasper Skårhøj's avatar
Kasper Skårhøj committed
105
106
107
108
109
110
111
 */
class t3lib_parsehtml {
	var $caseShift_cache=array();


	// *******************************************'
	// COPY FROM class.tslib_content.php: / BEGIN
112
	// substituteSubpart
Kasper Skårhøj's avatar
Kasper Skårhøj committed
113
114
115
116
117
	// Cleaned locally 2/2003 !!!! (so different from tslib_content version)
	// *******************************************'

	/**
	 * Returns the first subpart encapsulated in the marker, $marker (possibly present in $content as a HTML comment)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
118
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
119
120
	 * @param	string		Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
	 * @param	string		Marker string, eg. "###CONTENT_PART###"
Kasper Skårhøj's avatar
Kasper Skårhøj committed
121
	 * @return	string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
122
123
124
125
126
127
	 */
	function getSubpart($content, $marker)	{
		if ($marker && strstr($content,$marker))	{
			$start = strpos($content, $marker)+strlen($marker);
			$stop = @strpos($content, $marker, $start+1);
			$sub = substr($content, $start, $stop-$start);
128
129

			$reg=Array();
Kasper Skårhøj's avatar
Kasper Skårhøj committed
130
131
			ereg('^[^<]*-->',$sub,$reg);
			$start+=strlen($reg[0]);
132
133

			$reg=Array();
Kasper Skårhøj's avatar
Kasper Skårhøj committed
134
135
			ereg('<!--[^>]*$',$sub,$reg);
			$stop-=strlen($reg[0]);
136

Kasper Skårhøj's avatar
Kasper Skårhøj committed
137
138
139
140
141
142
			return substr($content, $start, $stop-$start);
		}
	}

	/**
	 * Substitutes a subpart in $content with the content of $subpartContent.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
143
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
144
145
146
147
148
149
150
151
152
153
154
155
156
	 * @param	string		Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
	 * @param	string		Marker string, eg. "###CONTENT_PART###"
	 * @param	array		If $subpartContent happens to be an array, it's [0] and [1] elements are wrapped around the content of the subpart (fetched by getSubpart())
	 * @param	boolean		If $recursive is set, the function calls itself with the content set to the remaining part of the content after the second marker. This means that proceding subparts are ALSO substituted!
	 * @param	boolean		If set, the marker around the subpart is not removed, but kept in the output
	 * @return	string		Processed input content
	 */
	function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)	{
		$start = strpos($content, $marker);
		$stop = @strpos($content, $marker, $start+1)+strlen($marker);
		if ($start && $stop>$start)	{
			// code before
			$before = substr($content, 0, $start);
157
			$reg=Array();
Kasper Skårhøj's avatar
Kasper Skårhøj committed
158
159
160
161
162
163
164
165
166
167
			ereg('<!--[^>]*$',$before,$reg);
			$start-=strlen($reg[0]);
			if ($keepMarker)	{
				$reg_k=Array();
				if ($reg[0])	ereg('^[^>]*-->',substr($content,$start),$reg_k);
				$before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker));
			}
			$before = substr($content, 0, $start);
				// code after
			$after = substr($content, $stop);
168
			$reg=Array();
Kasper Skårhøj's avatar
Kasper Skårhøj committed
169
170
171
172
173
174
175
176
177
178
			ereg('^[^<]*-->',$after,$reg);
			$stop+=strlen($reg[0]);
			if ($keepMarker)	{
				$reg_k=Array();
				if ($reg[0])	ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k);
				$sLen = strlen($reg_k[0]?$reg_k[0]:$marker);
				$after_marker = substr($content, $stop-$sLen,$sLen);
			}
			$after = substr($content, $stop);

179

Kasper Skårhøj's avatar
Kasper Skårhøj committed
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
				// replace?
			if (is_array($subpartContent))	{
				$substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1];
			} else {
				$substContent=$subpartContent;
			}

			if ($recursive && strpos($after, $marker))	{
				return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent);
			} else {
				return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after;
			}
		} else {
			return $content;
		}
	}
	// *******************************************'
	// COPY FROM class.tslib_content.php: / END
	// *******************************************'






205
206
207
208
209
210
211

	/************************************
	 *
	 * Parsing HTML code
	 *
	 ************************************/

Kasper Skårhøj's avatar
Kasper Skårhøj committed
212
213
	/**
	 * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
Kasper Skårhøj's avatar
Kasper Skårhøj committed
214
	 * Even numbers in the array are outside the blocks, Odd numbers are block-content.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
215
	 * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
216
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
217
218
219
220
221
222
223
224
225
226
227
	 * @param	string		List of tags, comma separated.
	 * @param	string		HTML-content
	 * @param	boolean		If set, excessive end tags are ignored - you should probably set this in most cases.
	 * @return	array		Even numbers in the array are outside the blocks, Odd numbers are block-content.
	 * @see splitTags(), getAllParts(), removeFirstAndLastTag()
	 */
	function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)	{
		$tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
		$regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)';

		$parts = spliti($regexStr,$content);
228

Kasper Skårhøj's avatar
Kasper Skårhøj committed
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
		$newParts=array();
		$pointer=strlen($parts[0]);
		$buffer=$parts[0];
		$nested=0;
		reset($parts);
		next($parts);
		while(list($k,$v)=each($parts))	{
			$isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
			$tagLen = strcspn(substr($content,$pointer),'>')+1;

			if (!$isEndTag)	{	// We meet a start-tag:
				if (!$nested)	{	// Ground level:
					$newParts[]=$buffer;	// previous buffer stored
					$buffer='';
				}
				$nested++;	// We are inside now!
				$mbuffer=substr($content,$pointer,strlen($v)+$tagLen);	// New buffer set and pointer increased
				$pointer+=strlen($mbuffer);
				$buffer.=$mbuffer;
			} else {	// If we meet an endtag:
				$nested--;	// decrease nested-level
				$eliminated=0;
				if ($eliminateExtraEndTags && $nested<0)	{
					$nested=0;
					$eliminated=1;
				} else {
					$buffer.=substr($content,$pointer,$tagLen);	// In any case, add the endtag to current buffer and increase pointer
				}
				$pointer+=$tagLen;
				if (!$nested && !$eliminated)	{	// if we're back on ground level, (and not by eliminating tags...
					$newParts[]=$buffer;
					$buffer='';
				}
				$mbuffer=substr($content,$pointer,strlen($v));	// New buffer set and pointer increased
				$pointer+=strlen($mbuffer);
				$buffer.=$mbuffer;
			}
266

Kasper Skårhøj's avatar
Kasper Skårhøj committed
267
268
269
270
271
		}
		$newParts[]=$buffer;
		return $newParts;
	}

272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
	/**
	 * Splitting content into blocks *recursively* and processing tags/content with call back functions.
	 *
	 * @param	string		Tag list, see splitIntoBlock()
	 * @param	string		Content, see splitIntoBlock()
	 * @param	object		Object where call back methods are.
	 * @param	string		Name of call back method for content; "function callBackContent($str,$level)"
	 * @param	string		Name of call back method for tags; "function callBackTags($tags,$level)"
	 * @param	integer		Indent level
	 * @return	string		Processed content
	 * @see splitIntoBlock()
	 */
	function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)	{
		$parts = $this->splitIntoBlock($tag,$content,TRUE);
		foreach($parts as $k => $v)	{
			if ($k%2)	{
				$firstTagName = $this->getFirstTagName($v, TRUE);
				$tagsArray = array();
				$tagsArray['tag_start'] = $this->getFirstTag($v);
				$tagsArray['tag_end'] = '</'.$firstTagName.'>';
				$tagsArray['tag_name'] = strtolower($firstTagName);
				$tagsArray['add_level'] = 1;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
294
				$tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
295
296
297
298
299

				if ($callBackTags)	$tagsArray = $procObj->$callBackTags($tagsArray,$level);

				$parts[$k] =
					$tagsArray['tag_start'].
Kasper Skårhøj's avatar
Kasper Skårhøj committed
300
					$tagsArray['content'].
301
302
303
304
305
306
307
308
309
					$tagsArray['tag_end'];
			} else {
				if ($callBackContent)	$parts[$k] = $procObj->$callBackContent($parts[$k],$level);
			}
		}

		return implode('',$parts);
	}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
310
311
	/**
	 * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
Kasper Skårhøj's avatar
Kasper Skårhøj committed
312
	 * Even numbers in the array are outside the blocks, Odd numbers are block-content.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
313
	 * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
314
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
315
316
317
318
319
320
	 * @param	string		List of tags
	 * @param	string		HTML-content
	 * @return	array		Even numbers in the array are outside the blocks, Odd numbers are block-content.
	 * @see splitIntoBlock(), getAllParts(), removeFirstAndLastTag()
	 */
	function splitTags($tag,$content)	{
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
321
322
		$tags = t3lib_div::trimExplode(',',$tag,1);
		$regexStr = '<('.implode('|',$tags).')(>|\/>|[[:space:]][^>]*>)';
Kasper Skårhøj's avatar
Kasper Skårhøj committed
323
		$parts = spliti($regexStr,$content);
324

Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
325
326
327
		$pointer = strlen($parts[0]);
		$newParts = array();
		$newParts[] = $parts[0];
Kasper Skårhøj's avatar
Kasper Skårhøj committed
328
329
330
331
332
333
334
		reset($parts);
		next($parts);
		while(list($k,$v)=each($parts))	{
			$tagLen = strcspn(substr($content,$pointer),'>')+1;

				// Set tag:
			$tag = substr($content,$pointer,$tagLen);	// New buffer set and pointer increased
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
335
336
			$newParts[] = $tag;
			$pointer+= strlen($tag);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
337
338

				// Set content:
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
339
340
			$newParts[] = $v;
			$pointer+= strlen($v);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
341
342
343
344
345
346
		}
		return $newParts;
	}

	/**
	 * Returns an array with either tag or non-tag content of the result from ->splitIntoBlock()/->splitTags()
Kasper Skårhøj's avatar
Kasper Skårhøj committed
347
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
	 * @param	array		Parts generated by ->splitIntoBlock() or >splitTags()
	 * @param	boolean		Whether to return the tag-parts (default,true) or what was outside the tags.
	 * @param	boolean		Whether to include the tags in the tag-parts (most useful for input made by ->splitIntoBlock())
	 * @return	array		Tag-parts/Non-tag-parts depending on input argument settings
	 * @see splitIntoBlock(), splitTags()
	 */
	function getAllParts($parts,$tag_parts=1,$include_tag=1)	{
		reset($parts);
		$newParts=array();
		while(list($k,$v)=each($parts))	{
			if (($k+($tag_parts?0:1))%2)	{
				if (!$include_tag)	$v=$this->removeFirstAndLastTag($v);
				$newParts[]=$v;
			}
		}
		return $newParts;
	}

	/**
	 * Removes the first and last tag in the string
	 * Anything before and after the first and last tags respectively is also removed
Kasper Skårhøj's avatar
Kasper Skårhøj committed
369
	 *
370
	 * @param	string		String to process
Kasper Skårhøj's avatar
Kasper Skårhøj committed
371
	 * @return	string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
	 */
	function removeFirstAndLastTag($str)	{
			// First:
		$endLen = strcspn($str,'>')+1;
		$str = substr($str,$endLen);
			// Last:
		$str = strrev($str);
		$endLen = strcspn($str,'<')+1;
		$str = substr($str,$endLen);
			// return
		return strrev($str);
	}

	/**
	 * Returns the first tag in $str
	 * Actually everything from the begining of the $str is returned, so you better make sure the tag is the first thing...
Kasper Skårhøj's avatar
Kasper Skårhøj committed
388
	 *
389
	 * @param	string		HTML string with tags
Kasper Skårhøj's avatar
Kasper Skårhøj committed
390
	 * @return	string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
391
392
393
394
395
396
397
398
399
400
	 */
	function getFirstTag($str)	{
			// First:
		$endLen = strcspn($str,'>')+1;
		$str = substr($str,0,$endLen);
		return $str;
	}

	/**
	 * Returns the NAME of the first tag in $str
Kasper Skårhøj's avatar
Kasper Skårhøj committed
401
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
402
	 * @param	string		HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do)
403
	 * @param	boolean		If set, then the tag is NOT converted to uppercase by case is preserved.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
404
405
406
	 * @return	string		Tag name in upper case
	 * @see getFirstTag()
	 */
407
408
409
	function getFirstTagName($str,$preserveCase=FALSE)	{
		list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2);
		if (!$preserveCase)	$tag = strtoupper($tag);
410

Kasper Skårhøj's avatar
Kasper Skårhøj committed
411
412
		return trim($tag);
	}
413
414
415
416
417
418
419
420
421
422
423
424
425

	/**
	 * Returns an array with all attributes as keys. Attributes are only lowercase a-z
	 * If a attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset()
	 *
	 * @param	string		Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>')
	 * @param	boolean		If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set!
	 * @return	array		array(Tag attributes,Attribute meta-data)
	 */
	function get_tag_attributes($tag,$deHSC=0)	{
		list($components,$metaC) = $this->split_tag_attributes($tag);
		$name = '';	 // attribute name is stored here
		$valuemode = '';
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
426
427
		$attributes = array();
		$attributesMeta = array();
428
429
430
431
432
433
434
435
436
437
		if (is_array($components))	{
			while (list($key,$val) = each ($components))	{
				if ($val != '=')	{	// Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
					if ($valuemode)	{
						if ($name)	{
							$attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
							$attributesMeta[$name]['dashType']=$metaC[$key];
							$name = '';
						}
					} else {
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
438
						if ($namekey = ereg_replace('[^a-zA-Z0-9_:-]','',$val))	{
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
							$name = strtolower($namekey);
							$attributesMeta[$name]=array();
							$attributesMeta[$name]['origTag']=$namekey;
							$attributes[$name] = '';
						}
					}
					$valuemode = '';
				} else {
					$valuemode = 'on';
				}
			}
			if (is_array($attributes))	reset($attributes);
			return array($attributes,$attributesMeta);
		}
	}
454

455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
	/**
	 * Returns an array with the 'components' from an attribute list. The result is normally analyzed by get_tag_attributes
	 * Removes tag-name if found
	 *
	 * @param	string		The tag or attributes
	 * @return	array
	 * @access private
	 * @see t3lib_div::split_tag_attributes()
	 */
	function split_tag_attributes($tag)	{
		$tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag)));
			// Removes any > in the end of the string
		$tag_tmp = trim(eregi_replace ('>$','',$tag_tmp));

		$metaValue = array();
		$value = array();
		while (strcmp($tag_tmp,''))	{	// Compared with empty string instead , 030102
			$firstChar=substr($tag_tmp,0,1);
			if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'"))	{
				$reg=explode($firstChar,$tag_tmp,3);
				$value[]=$reg[1];
				$metaValue[]=$firstChar;
				$tag_tmp=trim($reg[2]);
			} elseif (!strcmp($firstChar,'=')) {
				$value[] = '=';
				$metaValue[]='';
				$tag_tmp = trim(substr($tag_tmp,1));		// Removes = chars.
			} else {
					// There are '' around the value. We look for the next ' ' or '>'
				$reg = split('[[:space:]=]',$tag_tmp,2);
				$value[] = trim($reg[0]);
				$metaValue[]='';
				$tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]);
			}
		}
		if (is_array($value))	reset($value);
		return array($value,$metaValue);
	}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
494
495
496
497
498
499
500
	/**
	 * Checks whether block/solo tags are found in the correct amounts in HTML content
	 * Block tags are tags which are required to have an equal amount of start and end tags, eg. "<table>...</table>"
	 * Solo tags are tags which are required to have ONLY start tags (possibly with an XHTML ending like ".../>")
	 * NOTICE: Correct XHTML might actually fail since "<br></br>" is allowed as well as "<br/>". However only the LATTER is accepted by this function (with "br" in the "solo-tag" list), the first example will result in a warning.
	 * NOTICE: Correct XHTML might actually fail since "<p/>" is allowed as well as "<p></p>". However only the LATTER is accepted by this function (with "p" in the "block-tag" list), the first example will result in an ERROR!
	 * NOTICE: Correct HTML version "something" allows eg. <p> and <li> to be NON-ended (implicitly ended by other tags). However this is NOT accepted by this function (with "p" and "li" in the block-tag list) and it will result in an ERROR!
Kasper Skårhøj's avatar
Kasper Skårhøj committed
501
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
502
503
504
505
506
507
508
509
510
511
512
513
514
	 * @param	string		HTML content to analyze
	 * @param	string		Tag names for block tags (eg. table or div or p) in lowercase, commalist (eg. "table,div,p")
	 * @param	string		Tag names for solo tags (eg. img, br or input) in lowercase, commalist ("img,br,input")
	 * @return	array		Analyse data.
	 */
	function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')	{
		$content = strtolower($content);
		$analyzedOutput=array();
		$analyzedOutput['counts']=array();	// Counts appearances of start-tags
		$analyzedOutput['errors']=array();	// Lists ERRORS
		$analyzedOutput['warnings']=array();	// Lists warnings.
		$analyzedOutput['blocks']=array();	// Lists stats for block-tags
		$analyzedOutput['solo']=array();	// Lists stats for solo-tags
515

Kasper Skårhøj's avatar
Kasper Skårhøj committed
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
			// Block tags, must have endings...
		$blockTags = explode(',',$blockTags);
		foreach($blockTags as $tagName)	{
			$countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
			$countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
			$analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
			if ($countBegin)	$analyzedOutput['counts'][$tagName]=$countBegin;
			if ($countBegin-$countEnd)	{
				if ($countBegin-$countEnd > 0)	{
					$analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
				} else {
					$analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
				}
			}
		}
531

Kasper Skårhøj's avatar
Kasper Skårhøj committed
532
533
534
535
536
537
538
539
540
541
542
			// Solo tags, must NOT have endings...
		$soloTags = explode(',',$soloTags);
		foreach($soloTags as $tagName)	{
			$countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
			$countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
			$analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
			if ($countBegin)	$analyzedOutput['counts'][$tagName]=$countBegin;
			if ($countEnd)	{
				$analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
			}
		}
543

Kasper Skårhøj's avatar
Kasper Skårhøj committed
544
		return $analyzedOutput;
545
	}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
546

547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563











	/*********************************
	 *
	 * Clean HTML code
	 *
	 *********************************/

Kasper Skårhøj's avatar
Kasper Skårhøj committed
564
565
	/**
	 * Function that can clean up HTML content according to configuration given in the $tags array.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
566
567
	 *
	 * Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this:		 $tags = array_flip(explode(',','b,a,i,u'))
Kasper Skårhøj's avatar
Kasper Skårhøj committed
568
	 * If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options:
Kasper Skårhøj's avatar
Kasper Skårhøj committed
569
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
570
571
572
573
574
	 * 	$tags[$tagname] = Array(
	 * 		'overrideAttribs' => ''		If set, this string is preset as the attributes of the tag
	 * 		'allowedAttribs' =>   '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed.
	 * 		'fixAttrib' => Array(
	 * 			'[attribute name]' => Array (
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
575
576
	 * 				'set' => Force the attribute value to this value.
	 * 				'unset' => Boolean: If set, the attribute is unset.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
577
578
579
	 * 				'default' => 	If no attribute exists by this name, this value is set as default value (if this value is not blank)
	 * 				'always' => 	Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists
	 * 				'trim,intval,lower,upper' => 	All booleans. If any of these keys are set, the value is passed through the respective PHP-functions.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
580
	 * 				'range' => Array ('[low limit]','[high limit, optional]')		Setting integer range.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
581
582
583
584
585
586
587
588
589
590
591
	 * 				'list' => Array ('[value1/default]','[value2]','[value3]')		Attribute must be in this list. If not, the value is set to the first element.
	 * 				'removeIfFalse' => 	Boolean/'blank'.	If set, then the attribute is removed if it is 'false'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed)
	 * 				'removeIfEquals' => 	[value]	If the attribute value matches the value set here, then it is removed.
	 * 				'casesensitiveComp' => 1	If set, then the removeIfEquals and list comparisons will be case sensitive. Otherwise not.
	 * 			)
	 * 		),
	 * 		'protect' => '',	Boolean. If set, the tag <> is converted to &lt; and &gt;
	 * 		'remap' => '',		String. If set, the tagname is remapped to this tagname
	 * 		'rmTagIfNoAttrib' => '',	Boolean. If set, then the tag is removed if no attributes happend to be there.
	 * 		'nesting' => '',	Boolean/'global'. If set true, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>'
	 * 	)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
592
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
593
594
595
	 * @param	string		$content; is the HTML-content being processed. This is also the result being returned.
	 * @param	array		$tags; is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure.
	 * @param	string		$keepAll; boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to &lt; and &gt;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
596
	 * @param	integer		$hSC; Values -1,0,1,2: Set to zero= disabled, set to 1 then the content BETWEEN tags is htmlspecialchar()'ed, set to -1 its the opposite and set to 2 the content will be HSC'ed BUT with preservation for real entities (eg. "&amp;" or "&#234;")
Kasper Skårhøj's avatar
Kasper Skårhøj committed
597
598
599
600
601
602
	 * @param	array		Configuration array send along as $conf to the internal functions ->processContent() and ->processTag()
	 * @return	string		Processed HTML content
	 */
	function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())	{
		$newContent = array();
		$tokArr = explode('<',$content);
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
603
		$newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
604
605
		next($tokArr);

Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
606
607
608
		$c = 1;
		$tagRegister = array();
		$tagStack = array();
Kasper Skårhøj's avatar
Kasper Skårhøj committed
609
610
		while(list(,$tok)=each($tokArr))	{
			$firstChar = substr($tok,0,1);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
611
#			if (strcmp(trim($firstChar),''))	{		// It is a tag...
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
612
			if (ereg('[[:alnum:]\/]',$firstChar))	{		// It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
Kasper Skårhøj's avatar
Kasper Skårhøj committed
613
614
615
616
617
618
619
620
				$tagEnd = strcspn($tok,'>');
				if (strlen($tok)!=$tagEnd)	{	// If there is and end-bracket...
					$endTag = $firstChar=='/' ? 1 : 0;
					$tagContent = substr($tok,$endTag,$tagEnd-$endTag);
					$tagParts = split('[[:space:]]',$tagContent,2);
					$tagName = strtolower($tagParts[0]);
					if (isset($tags[$tagName]))	{
						if (is_array($tags[$tagName]))	{	// If there is processing to do for the tag:
621

Kasper Skårhøj's avatar
Kasper Skårhøj committed
622
623
624
625
626
							if (!$endTag)	{	// If NOT an endtag, do attribute processing (added dec. 2003)
									// Override attributes
								if (strcmp($tags[$tagName]['overrideAttribs'],''))	{
									$tagParts[1]=$tags[$tagName]['overrideAttribs'];
								}
627

Kasper Skårhøj's avatar
Kasper Skårhøj committed
628
629
630
631
632
633
634
635
636
637
638
639
640
									// Allowed tags
								if (strcmp($tags[$tagName]['allowedAttribs'],''))	{
									if (!strcmp($tags[$tagName]['allowedAttribs'],'0'))	{	// No attribs allowed
										$tagParts[1]='';
									} elseif (trim($tagParts[1])) {
										$tagAttrib = $this->get_tag_attributes($tagParts[1]);
										$tagParts[1]='';
										$newTagAttrib = array();
										$tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
										while(list(,$allowTag)=each($tList))	{
											if (isset($tagAttrib[0][$allowTag]))	$newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
										}
										$tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
641
642
									}
								}
643

Kasper Skårhøj's avatar
Kasper Skårhøj committed
644
645
646
647
648
649
									// Fixed attrib values
								if (is_array($tags[$tagName]['fixAttrib']))	{
									$tagAttrib = $this->get_tag_attributes($tagParts[1]);
									$tagParts[1]='';
									reset($tags[$tagName]['fixAttrib']);
									while(list($attr,$params)=each($tags[$tagName]['fixAttrib']))	{
Kasper Skårhøj's avatar
   
Kasper Skårhøj committed
650
651
										if (strlen($params['set']))	$tagAttrib[0][$attr] = $params['set'];
										if (strlen($params['unset']))	unset($tagAttrib[0][$attr]);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
										if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr]))	$tagAttrib[0][$attr]=$params['default'];
										if ($params['always'] || isset($tagAttrib[0][$attr]))	{
											if ($params['trim'])	{$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
											if ($params['intval'])	{$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
											if ($params['lower'])	{$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
											if ($params['upper'])	{$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
											if ($params['range'])	{
												if (isset($params['range'][1]))	{
													$tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
												} else {
													$tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
												}
											}
											if (is_array($params['list']))	{
												if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName)))	$tagAttrib[0][$attr]=$params['list'][0];
											}
											if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],'')))	{
												unset($tagAttrib[0][$attr]);
											}
											if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp'])))	{
												unset($tagAttrib[0][$attr]);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
673
											}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
											if ($params['prefixLocalAnchors'])	{
												if (substr($tagAttrib[0][$attr],0,1)=='#')	{
													$prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
													$tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
													if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL')))		{
														$tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
													}
												}
											}
											if ($params['prefixRelPathWith'])	{
												$urlParts = parse_url($tagAttrib[0][$attr]);
												if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/')	{	// If it is NOT an absolute URL (by http: or starting "/")
													$tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
												}
											}
											if ($params['userFunc'])	{
												$tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
											}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
692
693
										}
									}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
694
									$tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
695
								}
Kasper Skårhøj's avatar
Kasper Skårhøj committed
696
697
							} else {	// If endTag, remove any possible attributes:
								$tagParts[1]='';
Kasper Skårhøj's avatar
Kasper Skårhøj committed
698
699
700
701
702
703
704
705
706
707
708
709
710
							}

								// Protecting the tag by converting < and > to &lt; and &gt; ??
							if ($tags[$tagName]['protect'])	{
								$lt = '&lt;';	$gt = '&gt;';
							} else {
								$lt = '<';	$gt = '>';
							}
								// Remapping tag name?
							if ($tags[$tagName]['remap'])	$tagParts[0] = $tags[$tagName]['remap'];

								// rmTagIfNoAttrib
							if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib'])	{
711
								$setTag = !$tags[$tagName]['rmTagIfNoAttrib'];
712

Kasper Skårhøj's avatar
Kasper Skårhøj committed
713
714
715
716
717
718
719
720
								if ($tags[$tagName]['nesting'])	{
									if (!is_array($tagRegister[$tagName]))	$tagRegister[$tagName]=array();

									if ($endTag)	{
/*										if ($tags[$tagName]['nesting']=='global')	{
											$lastEl = end($tagStack);
											$correctTag = !strcmp($tagName,$lastEl);
										} else $correctTag=1;
721
	*/
Kasper Skårhøj's avatar
Kasper Skårhøj committed
722
723
724
725
726
727
728
729
										$correctTag=1;
										if ($tags[$tagName]['nesting']=='global')	{
											$lastEl = end($tagStack);
											if (strcmp($tagName,$lastEl))	{
												if (in_array($tagName,$tagStack))	{
													while(count($tagStack) && strcmp($tagName,$lastEl))	{
														$elPos = end($tagRegister[$lastEl]);
														unset($newContent[$elPos]);
730

Kasper Skårhøj's avatar
Kasper Skårhøj committed
731
732
733
734
735
														array_pop($tagRegister[$lastEl]);
														array_pop($tagStack);
														$lastEl = end($tagStack);
													}
												} else {
736
													$correctTag=0;	// In this case the
Kasper Skårhøj's avatar
Kasper Skårhøj committed
737
738
739
740
741
742
743
744
745
746
747
748
749
750
												}
											}
										}
										if (!count($tagRegister[$tagName]) || !$correctTag)	{
											$setTag=0;
										} else {
											array_pop($tagRegister[$tagName]);
											if ($tags[$tagName]['nesting']=='global')	{array_pop($tagStack);}
										}
									} else {
										array_push($tagRegister[$tagName],$c);
										if ($tags[$tagName]['nesting']=='global')	{array_push($tagStack,$tagName);}
									}
								}
751

Kasper Skårhøj's avatar
Kasper Skårhøj committed
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
								if ($setTag)	{
										// Setting the tag
									$newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='&lt;');
								}
							}
						} else {
							$newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
						}
					} elseif ($keepAll) {	// This is if the tag was not defined in the array for processing:
						if (!strcmp($keepAll,'protect'))	{
							$lt = '&lt;';	$gt = '&gt;';
						} else {
							$lt = '<';	$gt = '>';
						}
						$newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='&lt;');
					}
					$newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
				} else {
					$newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);	// There were not end-bracket, so no tag...
				}
			} else {
				$newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);	// It was not a tag anyways
			}
		}
776

Kasper Skårhøj's avatar
Kasper Skårhøj committed
777
778
779
780
781
782
783
784
785
786
787
788
789
			// Unsetting tags:
		reset($tagRegister);
		while(list($tag,$positions)=each($tagRegister))	{
			reset($positions);
			while(list(,$pKey)=each($positions))	{
				unset($newContent[$pKey]);
			}
		}

		return implode('',$newContent);
	}

	/**
Kasper Skårhøj's avatar
Kasper Skårhøj committed
790
	 * Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
791
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
792
	 * @param	string		Input value
Kasper Skårhøj's avatar
Kasper Skårhøj committed
793
	 * @param	integer		Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
794
795
796
	 * @return	string		Output value
	 */
	function bidir_htmlspecialchars($value,$dir)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
797
		if ($dir==1)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
798
			$value = htmlspecialchars($value);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
799
800
801
		} elseif ($dir==2)	{
			$value = t3lib_div::deHSCentities(htmlspecialchars($value));
		} elseif ($dir==-1) {
Kasper Skårhøj's avatar
Kasper Skårhøj committed
802
803
804
805
806
807
808
809
810
811
			$value = str_replace('&gt;','>',$value);
			$value = str_replace('&lt;','<',$value);
			$value = str_replace('&quot;','"',$value);
			$value = str_replace('&amp;','&',$value);
		}
		return $value;
	}

	/**
	 * Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives
Kasper Skårhøj's avatar
Kasper Skårhøj committed
812
	 *
813
	 * @param	string		Prefix string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
814
815
	 * @param	string		HTML content
	 * @param	array		Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase
816
	 * @param	string		Suffix string (put after the resource).
Kasper Skårhøj's avatar
Kasper Skårhøj committed
817
818
	 * @return	string		Processed HTML content
	 */
819
	function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')	{
820

821
		$parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
822
		foreach($parts as $k => $v)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
823
824
825
826
827
828
829
830
831
832
833
834
835
			if ($k%2)	{
				$params = $this->get_tag_attributes($v,1);
				$tagEnd = substr($v,-2)=='/>' ? ' />' : '>';	// Detect tag-ending so that it is re-applied correctly.
				$firstTagName = $this->getFirstTagName($v);	// The 'name' of the first tag
				$somethingDone=0;
				$prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
				switch(strtolower($firstTagName))	{
						// background - attribute:
					case 'td':
					case 'body':
					case 'table':
						$src = $params[0]['background'];
						if ($src)	{
836
							$params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
837
838
839
840
841
842
843
							$somethingDone=1;
						}
					break;
						// src attribute
					case 'img':
					case 'input':
					case 'script':
844
					case 'embed':
Kasper Skårhøj's avatar
Kasper Skårhøj committed
845
846
						$src = $params[0]['src'];
						if ($src)	{
847
							$params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
848
849
850
851
852
853
854
							$somethingDone=1;
						}
					break;
					case 'link':
					case 'a':
						$src = $params[0]['href'];
						if ($src)	{
855
							$params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
856
857
858
859
860
861
862
							$somethingDone=1;
						}
					break;
						// action attribute
					case 'form':
						$src = $params[0]['action'];
						if ($src)	{
863
							$params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
864
865
866
867
868
869
870
871
872
873
874
875
							$somethingDone=1;
						}
					break;
				}
				if ($somethingDone)	{
					$tagParts = split('[[:space:]]',$v,2);
					$tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
					$parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).
									$tagEnd;
				}
			}
		}
876
877
878
879
880
881
882
883
		$content = implode('',$parts);

			// Fix <style> section:
		$prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
		if (strlen($prefix))	{
			$parts = $this->splitIntoBlock('style',$content);
			foreach($parts as $k => $v)	{
				if ($k%2)	{
884
					$parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
885
886
887
888
				}
			}
			$content = implode('',$parts);
		}
889

890
		return $content;
891
892
	}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
893
894
	/**
	 * Internal sub-function for ->prefixResourcePath()
Kasper Skårhøj's avatar
Kasper Skårhøj committed
895
	 *
896
	 * @param	string		Prefix string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
897
	 * @param	string		Relative path/URL
898
	 * @param	string		Suffix string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
899
900
901
	 * @return	string		Output path, prefixed if no scheme in input string
	 * @access private
	 */
902
	function prefixRelPath($prefix,$srcVal,$suffix='')	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
903
		$pU = parse_url($srcVal);
904
		if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/')	{ // If not an absolute URL.
905
			$srcVal = $prefix.$srcVal.$suffix;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
906
907
908
909
910
		}
		return $srcVal;
	}

	/**
Kasper Skårhøj's avatar
Kasper Skårhøj committed
911
	 * Cleans up the input $value for fonttags.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
912
	 * If keepFace,-Size and -Color is set then font-tags with an allowed property is kept. Else deleted.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
913
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
	 * @param	string		HTML content with font-tags inside to clean up.
	 * @param	boolean		If set, keep "face" attribute
	 * @param	boolean		If set, keep "size" attribute
	 * @param	boolean		If set, keep "color" attribute
	 * @return	string		Processed HTML content
	 */
	function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)	{
		$fontSplit = $this->splitIntoBlock('font',$value);	// ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
		reset($fontSplit);
		while(list($k,$v)=each($fontSplit))	{
			if ($k%2)	{	// font:
				$attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
				$newAttribs=array();
				if ($keepFace && $attribArray['face'])	$newAttribs[]='face="'.$attribArray['face'].'"';
				if ($keepSize && $attribArray['size'])	$newAttribs[]='size="'.$attribArray['size'].'"';
				if ($keepColor && $attribArray['color'])	$newAttribs[]='color="'.$attribArray['color'].'"';
930

Kasper Skårhøj's avatar
Kasper Skårhøj committed
931
932
933
934
935
936
937
938
939
940
				$innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
				if (count($newAttribs))	{
					$fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
				} else {
					$fontSplit[$k]=$innerContent;
				}
			}
		}
		return implode('',$fontSplit);
	}
941

Kasper Skårhøj's avatar
Kasper Skårhøj committed
942
943
	/**
	 * This is used to map certain tag-names into other names.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
944
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
945
946
947
948
949
950
951
	 * @param	string		HTML content
	 * @param	array		Array with tag key=>value pairs where key is from-tag and value is to-tag
	 * @param	string		Alternative less-than char to search for (search regex string)
	 * @param	string		Alternative less-than char to replace with (replace regex string)
	 * @return	string		Processed HTML content
	 */
	function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')	{
952

Kasper Skårhøj's avatar
Kasper Skårhøj committed
953
		foreach($tags as $from => $to)	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
954
955
956
957
958
959
960
961
962
			$value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value);
			$value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value);
			$value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value);
		}
		return $value;
	}

	/**
	 * This converts htmlspecialchar()'ed tags (from $tagList) back to real tags. Eg. '&lt;strong&gt' would be converted back to '<strong>' if found in $tagList
Kasper Skårhøj's avatar
Kasper Skårhøj committed
963
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
	 * @param	string		HTML content
	 * @param	string		Tag list, separated by comma. Lowercase!
	 * @return	string		Processed HTML content
	 */
	function unprotectTags($content,$tagList='')	{
		$tagsArray = t3lib_div::trimExplode(',',$tagList,1);
		$contentParts = explode('&lt;',$content);
		next($contentParts);	// bypass the first
		while(list($k,$tok)=each($contentParts))	{
			$firstChar = substr($tok,0,1);
			if (strcmp(trim($firstChar),''))	{
				$subparts = explode('&gt;',$tok,2);
				$tagEnd = strlen($subparts[0]);
				if (strlen($tok)!=$tagEnd)	{
					$endTag = $firstChar=='/' ? 1 : 0;
					$tagContent = substr($tok,$endTag,$tagEnd-$endTag);
					$tagParts = split('[[:space:]]',$tagContent,2);
					$tagName = strtolower($tagParts[0]);
					if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))	{
						$contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
					} else $contentParts[$k] = '&lt;'.$tok;
				} else $contentParts[$k] = '&lt;'.$tok;
			} else $contentParts[$k] = '&lt;'.$tok;
		}
988

Kasper Skårhøj's avatar
Kasper Skårhøj committed
989
990
		return implode('',$contentParts);
	}
991

Kasper Skårhøj's avatar
Kasper Skårhøj committed
992
993
994
	/**
	 * Strips tags except the tags in the list, $tagList
	 * OBSOLETE - use PHP function strip_tags()
Kasper Skårhøj's avatar
Kasper Skårhøj committed
995
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
	 * @param	string		Value to process
	 * @param	string		List of tags
	 * @return	string		Output value
	 * @ignore
	 */
	function stripTagsExcept($value,$tagList)	{
		$tags=t3lib_div::trimExplode(',',$tagList,1);
		$forthArr=array();
		$backArr=array();
		while(list(,$theTag)=each($tags))	{
			$forthArr[$theTag]=md5($theTag);
			$backArr[md5($theTag)]=$theTag;
		}
			$value = $this->mapTags($value,$forthArr,'<','_');
			$value=strip_tags($value);
			$value = $this->mapTags($value,$backArr,'_','<');
		return $value;
	}
1014

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1015
	/**
1016
	 * Internal function for case shifting of a string or whole array
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1017
	 *
1018
1019
1020
	 * @param	mixed		Input string/array
	 * @param	boolean		If $str is a string AND this boolean is true, the string is returned in uppercase
	 * @param	string		Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
	 * @return	string		Output string, processed
	 * @access private
	 */
	function caseShift($str,$flag,$cacheKey='')	{
		if (is_array($str))	{
			if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))	{
				reset($str);
				while(list($k)=each($str))	{
					$str[$k] = strtoupper($str[$k]);
				}
				if ($cacheKey)	$this->caseShift_cache[$cacheKey]=$str;
			} else {
				$str = $this->caseShift_cache[$cacheKey];
			}
		} elseif (!$flag)	$str = strtoupper($str);
		return $str;
	}
1038

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1039
1040
	/**
	 * Compiling an array with tag attributes into a string
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1041
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
	 * @param	array		Tag attributes
	 * @param	array		Meta information about these attributes (like if they were quoted)
	 * @param	boolean		If set, then the attribute names will be set in lower case, value quotes in double-quotes and the value will be htmlspecialchar()'ed
	 * @return	string		Imploded attributes, eg: 'attribute="value" attrib2="value2"'
	 * @access private
	 */
	function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)	{
		$accu=array();
		reset($tagAttrib);
		while(list($k,$v)=each($tagAttrib))	{
			if ($xhtmlClean)	{
				$attr=strtolower($k);
				if (strcmp($v,'') || isset($meta[$k]['dashType']))	{
					$attr.='="'.htmlspecialchars($v).'"';
				}
			} else {
				$attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
				if (strcmp($v,'') || isset($meta[$k]['dashType']))	{
					$dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
					$attr.='='.$dash.$v.$dash;
				}
			}
			$accu[]=$attr;
		}
		return implode(' ',$accu);
	}
1068

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1069
1070
	/**
	 * Get tag attributes, the classic version (which had some limitations?)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1071
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1072
1073
	 * @param	string		The tag
	 * @param	boolean		De-htmlspecialchar flag.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1074
	 * @return	array
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1075
1076
1077
1078
1079
1080
1081
	 * @access private
	 */
	function get_tag_attributes_classic($tag,$deHSC=0)	{
		$attr=$this->get_tag_attributes($tag,$deHSC);
		return is_array($attr[0])?$attr[0]:array();
	}

1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
	/**
	 * Indents input content with $number instances of $indentChar
	 *
	 * @param	string		Content string, multiple lines.
	 * @param	integer		Number of indents
	 * @param	string		Indent character/string
	 * @return	string		Indented code (typ. HTML)
	 */
	function indentLines($content, $number=1, $indentChar="\t")	{
		$preTab = str_pad('', $number*strlen($indentChar), $indentChar);
		$lines = explode(chr(10),str_replace(chr(13),'',$content));
		while(list($k,$v) = each($lines))	{
			$lines[$k] = $preTab.$v;
		}
		return implode(chr(10), $lines);
	}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1099
1100
	/**
	 * Converts TSconfig into an array for the HTMLcleaner function.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1101
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1102
	 * @param	array		TSconfig for HTMLcleaner
1103
	 * @param	array		Array of tags to keep (?)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1104
	 * @return	array
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1105
1106
1107
1108
1109
	 * @access private
	 */
	function HTMLparserConfig($TSconfig,$keepTags=array())	{
			// Allow tags (base list, merged with incoming array)
		$alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
1110
		$keepTags = array_merge($alTags,$keepTags);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190

			// Set config properties.
		if (is_array($TSconfig['tags.']))	{
			reset($TSconfig['tags.']);
			while(list($key,$tagC)=each($TSconfig['tags.']))	{
				if (!is_array($tagC) && $key==strtolower($key))	{
					if (!strcmp($tagC,'0'))	unset($keepTags[$key]);
					if (!strcmp($tagC,'1') && !isset($keepTags[$key]))	$keepTags[$key]=1;
				}
			}

			reset($TSconfig['tags.']);
			while(list($key,$tagC)=each($TSconfig['tags.']))	{
				if (is_array($tagC) && $key==strtolower($key))	{
					$key=substr($key,0,-1);
					if (!is_array($keepTags[$key]))	$keepTags[$key]=array();
					if (is_array($tagC['fixAttrib.']))	{
						reset($tagC['fixAttrib.']);
						while(list($atName,$atConfig)=each($tagC['fixAttrib.']))	{
							if (is_array($atConfig))	{
								$atName=substr($atName,0,-1);
								if (!is_array($keepTags[$key]['fixAttrib'][$atName]))	{
									$keepTags[$key]['fixAttrib'][$atName]=array();
								}
								$keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig);		// Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
								if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],''))	$keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
								if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],''))	$keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
							}
						}
					}
					unset($tagC['fixAttrib.']);
					unset($tagC['fixAttrib']);
					$keepTags[$key] = array_merge($keepTags[$key],$tagC);			// Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
				}
			}
		}
			// localNesting
		if ($TSconfig['localNesting'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
			while(list(,$tn)=each($lN))	{
				if (isset($keepTags[$tn]))	{
					$keepTags[$tn]['nesting']=1;
				}
			}
		}
		if ($TSconfig['globalNesting'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
			while(list(,$tn)=each($lN))	{
				if (isset($keepTags[$tn]))	{
					if (!is_array($keepTags[$tn]))	$keepTags[$tn]=array();
					$keepTags[$tn]['nesting']='global';
				}
			}
		}
		if ($TSconfig['rmTagIfNoAttrib'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
			while(list(,$tn)=each($lN))	{
				if (isset($keepTags[$tn]))	{
					if (!is_array($keepTags[$tn]))	$keepTags[$tn]=array();
					$keepTags[$tn]['rmTagIfNoAttrib']=1;
				}
			}
		}
		if ($TSconfig['noAttrib'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
			while(list(,$tn)=each($lN))	{
				if (isset($keepTags[$tn]))	{
					if (!is_array($keepTags[$tn]))	$keepTags[$tn]=array();
					$keepTags[$tn]['allowedAttribs']=0;
				}
			}
		}
		if ($TSconfig['removeTags'])	{
			$lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
			while(list(,$tn)=each($lN))	{
				$keepTags[$tn]=array();
				$keepTags[$tn]['allowedAttribs']=0;
				$keepTags[$tn]['rmTagIfNoAttrib']=1;
			}
		}
1191

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1192
1193
1194
1195
			// Create additional configuration:
		$addConfig=array();
		if ($TSconfig['xhtml_cleaning'])	{
			$addConfig['xhtml']=1;
1196
1197
		}

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1198
		return array(
1199
1200
			$keepTags,
			''.$TSconfig['keepNonMatchedTags'],
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1201
1202
1203
			intval($TSconfig['htmlSpecialChars']),
			$addConfig
		);
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1204
	}
1205

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1206
1207
1208
	/**
	 * Tries to convert the content to be XHTML compliant and other stuff like that.
	 * STILL EXPERIMENTAL. See comments below.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1209
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
	 * 			What it does NOT do (yet) according to XHTML specs.:
	 * 			- Wellformedness: Nesting is NOT checked
	 * 			- name/id attribute issue is not observed at this point.
	 * 			- Certain nesting of elements not allowed. Most interesting, <PRE> cannot contain img, big,small,sub,sup ...
	 * 			- Wrapping scripts and style element contents in CDATA - or alternatively they should have entitites converted.
	 * 			- Setting charsets may put some special requirements on both XML declaration/ meta-http-equiv. (C.9)
	 * 			- UTF-8 encoding is in fact expected by XML!!
	 * 			- stylesheet element and attribute names are NOT converted to lowercase
	 * 			- ampersands (and entities in general I think) MUST be converted to an entity reference! (&amps;). This may mean further conversion of non-tag content before output to page. May be related to the charset issue as a whole.
	 * 			- Minimized values not allowed: Must do this: selected="selected"
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1220
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1221
	 * 			What it does at this point:
1222
	 * 			- All tags (frame,base,meta,link + img,br,hr,area,input) is ended with "/>" - others?
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1223
1224
1225
	 * 			- Lowercase for elements and attributes
	 * 			- All attributes in quotes
	 * 			- Add "alt" attribute to img-tags if it's not there already.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1226
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
	 * @param	string		Content to clean up
	 * @return	string		Cleaned up content returned.
	 * @access private
	 */
	function XHTML_clean($content)	{
		$content = $this->HTMLcleaner(
			$content,
			array(),	// No tags treated specially
			1,			// Keep ALL tags.
			0,			// All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
			array('xhtml' => 1)
		);
		return $content;
	}

	/**
	 * Processing all tags themselves
1244
	 * (Some additions by Sacha Vorbeck)
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1245
	 *
1246
	 * @param	string		Tag to process
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1247
1248
1249
	 * @param	array		Configuration array passing instructions for processing. If count()==0, function will return value unprocessed. See source code for details
	 * @param	boolean		Is endtag, then set this.
	 * @param	boolean		If set, just return value straight away
1250
	 * @return	string		Processed value.
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
	 * @access private
	 */
	function processTag($value,$conf,$endTag,$protected=0)	{
			// Return immediately if protected or no parameters
		if ($protected || !count($conf))	return $value;
			// OK then, begin processing for XHTML output:
			// STILL VERY EXPERIMENTAL!!
		if ($conf['xhtml'])	{
			if ($endTag)	{	// Endtags are just set lowercase right away
				$value = strtolower($value);
			} elseif (substr($value,0,2)!='<!') {	// ... and comments are ignored.
				$inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));	// Finding inner value with out < >
				list($tagName,$tagP)=split('[[:space:]]',$inValue,2);	// Separate attributes and tagname
				$tagName = strtolower($tagName);
1265

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1266
1267
1268
					// Process attributes
				$tagAttrib = $this->get_tag_attributes($tagP);
				if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt']))		$tagAttrib[0]['alt']='';	// Set alt attribute for all images (not XHTML though...)
1269
				if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type']))	$tagAttrib[0]['type']='text/javascript';	// Set type attribute for all script-tags
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1270
1271
1272
1273
				$outA=array();
				reset($tagAttrib[0]);
				while(list($attrib_name,$attrib_value)=each($tagAttrib[0]))	{
						// Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
1274
					$outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1275
1276
1277
				}
				$newTag='<'.trim($tagName.' '.implode(' ',$outA));
					// All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
1278
				if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>')	{
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1279
1280
1281
1282
1283
1284
1285
					$newTag.=' />';
				} else {
					$newTag.='>';
				}
				$value = $newTag;
			}
		}
1286

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1287
		return $value;
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1288
	}
1289

Kasper Skårhøj's avatar
Kasper Skårhøj committed
1290
1291
	/**
	 * Processing content between tags for HTML_cleaner
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1292
	 *
Kasper Skårhøj's avatar
Kasper Skårhøj committed
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
	 * @param	string		The value
	 * @param	integer		Direction, either -1 or +1. 0 (zero) means no change to input value.
	 * @param	mixed		Not used, ignore.
	 * @return	string		The processed value.
	 * @access private
	 */
	function processContent($value,$dir,$conf)	{
		if ($dir!=0)	$value = $this->bidir_htmlspecialchars($value,$dir);
		return $value;
	}
}



if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php'])	{
	include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
}
1310
?>