Fixed bug #11937: Do not show E_DEPRECATED messages on productive systems
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2009 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 December/2003 by Kasper Skaarhoj
32 * XHTML compatible.
33 *
34 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
35 * @internal
36 */
37 /**
38 * [CLASS/FUNCTION INDEX of SCRIPT]
39 *
40 *
41 *
42 * 103: class t3lib_parsehtml_proc extends t3lib_parsehtml
43 * 138: function init($elRef='',$recPid=0)
44 * 150: function setRelPath($path)
45 * 174: function evalWriteFile($pArr,$currentRecord)
46 *
47 * SECTION: Main function
48 * 232: function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
49 *
50 * SECTION: Specific RTE TRANSFORMATION functions
51 * 398: function TS_images_db($value)
52 * 550: function TS_images_rte($value)
53 * 589: function TS_reglinks($value,$direction)
54 * 626: function TS_links_db($value)
55 * 675: function TS_links_rte($value)
56 * 760: function TS_preserve_db($value)
57 * 784: function TS_preserve_rte($value)
58 * 805: function TS_transform_db($value,$css=FALSE)
59 * 922: function transformStyledATags($value)
60 * 948: function TS_transform_rte($value,$css=0)
61 * 1019: function TS_strip_db($value)
62 *
63 * SECTION: Generic RTE transformation, analysis and helper functions
64 * 1050: function getURL($url)
65 * 1064: function HTMLcleaner_db($content,$tagList='')
66 * 1091: function getKeepTags($direction='rte',$tagList='')
67 * 1200: function divideIntoLines($value,$count=5,$returnArray=FALSE)
68 * 1304: function setDivTags($value,$dT='p')
69 * 1349: function internalizeFontTags($value)
70 * 1385: function siteUrl()
71 * 1395: function rteImageStorageDir()
72 * 1407: function removeTables($value,$breakChar='<br />')
73 * 1439: function defaultTStagMapping($code,$direction='rte')
74 * 1462: function getWHFromAttribs($attribArray)
75 * 1489: function urlInfoForLinkTags($url)
76 * 1548: function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
77 *
78 * TOTAL FUNCTIONS: 28
79 * (This index is automatically created/updated by the extension "extdeveval")
80 *
81 */
82
83
84
85
86
87
88
89
90
91
92
93
94
95 /**
96 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
97 *
98 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
99 * @package TYPO3
100 * @subpackage t3lib
101 */
102 class t3lib_parsehtml_proc extends t3lib_parsehtml {
103
104 // Static:
105 var $blockElementList = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,HR,ADDRESS,DL,DD'; // List of tags for these elements
106
107 // Internal, static:
108 var $recPid = 0; // Set this to the pid of the record manipulated by the class.
109 var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
110 var $relPath=''; // Relative path
111 var $relBackPath=''; // Relative back-path
112 public $tsConfig = array(); // Current Page TSConfig
113 var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
114
115 // Internal, dynamic
116 var $TS_transform_db_safecounter=100; // Run-away brake for recursive calls.
117 var $rte_p=''; // Parameters from TCA types configuration related to the RTE
118 var $getKeepTags_cache=array(); // Data caching for processing function
119 var $allowedClasses=array(); // Storage of the allowed CSS class names in the RTE
120 var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
121
122
123
124
125
126
127
128
129
130
131 /**
132 * Initialize, setting element reference and record PID
133 *
134 * @param string Element reference, eg "tt_content:bodytext"
135 * @param integer PID of the record (page id)
136 * @return void
137 */
138 function init($elRef='',$recPid=0) {
139 $this->recPid = $recPid;
140 $this->elRef = $elRef;
141 }
142
143 /**
144 * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
145 * This is used when editing files with the RTE
146 *
147 * @param string The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
148 * @return void There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
149 */
150 function setRelPath($path) {
151 $path = trim($path);
152 $path = preg_replace('/^\//','',$path);
153 $path = preg_replace('/\/$/','',$path);
154 if ($path) {
155 $this->relPath = $path;
156 $this->relBackPath = '';
157 $partsC=count(explode('/',$this->relPath));
158 for ($a=0;$a<$partsC;$a++) {
159 $this->relBackPath.='../';
160 }
161 $this->relPath.='/';
162 }
163 }
164
165 /**
166 * Evaluate the environment for editing a staticFileEdit file.
167 * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
168 *
169 * @param array Parameters for the current field as found in types-config
170 * @param array Current record we are editing.
171 * @return mixed On success an array with various information is returned, otherwise a string with an error message
172 * @see t3lib_TCEmain, t3lib_transferData
173 */
174 function evalWriteFile($pArr,$currentRecord) {
175
176 // Write file configuration:
177 if (is_array($pArr)) {
178 if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
179 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'],-1)=='/'
180 && @is_dir(PATH_site.$GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
181
182 $SW_p = $pArr['parameters'];
183 $SW_editFileField = trim($SW_p[0]);
184 $SW_editFile = $currentRecord[$SW_editFileField];
185 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
186 $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'].$SW_editFile;
187 $SW_editFile = PATH_site.$SW_relpath;
188 if (@is_file($SW_editFile)) {
189 return array(
190 'editFile' => $SW_editFile,
191 'relEditFile' => $SW_relpath,
192 'contentField' => trim($SW_p[1]),
193 'markerField' => trim($SW_p[2]),
194 'loadFromFileField' => trim($SW_p[3]),
195 'statusField' => trim($SW_p[4])
196 );
197 } else return "ERROR: Editfile '".$SW_relpath."' did not exist";
198 } else return "ERROR: Edit file name could not be found or was bad.";
199 } else return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
200 }
201 }
202
203
204
205
206
207
208
209
210
211
212
213
214
215 /**********************************************
216 *
217 * Main function
218 *
219 **********************************************/
220
221 /**
222 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
223 * This is the main function called from tcemain and transfer data classes
224 *
225 * @param string Input value
226 * @param array Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
227 * @param string Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
228 * @param array Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
229 * @return string Output value
230 * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
231 */
232 function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array()) {
233
234 // Init:
235 $this->tsConfig = $thisConfig;
236 $this->procOptions = $thisConfig['proc.'];
237 $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
238
239 // dynamic configuration of blockElementList
240 if ($this->procOptions['blockElementList']) {
241 $this->blockElementList = $this->procOptions['blockElementList'];
242 }
243
244 // Get parameters for rte_transformation:
245 $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
246
247 // Setting modes:
248 if (strcmp($this->procOptions['overruleMode'],'')) {
249 $modes = array_unique(t3lib_div::trimExplode(',',$this->procOptions['overruleMode']));
250 } else {
251 $modes = array_unique(t3lib_div::trimExplode('-',$p['mode']));
252 }
253 $revmodes = array_flip($modes);
254
255 // Find special modes and extract them:
256 if (isset($revmodes['ts'])) {
257 $modes[$revmodes['ts']] = 'ts_transform,ts_preserve,ts_images,ts_links';
258 }
259 // Find special modes and extract them:
260 if (isset($revmodes['ts_css'])) {
261 $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
262 }
263
264 // Make list unique
265 $modes = array_unique(t3lib_div::trimExplode(',',implode(',',$modes),1));
266
267 // Reverse order if direction is "rte"
268 if ($direction=='rte') {
269 $modes = array_reverse($modes);
270 }
271
272 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
273 $entry_HTMLparser = $this->procOptions['entryHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_'.$direction.'.']) : '';
274 $exit_HTMLparser = $this->procOptions['exitHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_'.$direction.'.']) : '';
275
276 // Line breaks of content is unified into char-10 only (removing char 13)
277 if (!$this->procOptions['disableUnifyLineBreaks']) {
278 $value = str_replace(chr(13).chr(10),chr(10),$value);
279 }
280
281 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
282 if (is_array($entry_HTMLparser)) {
283 $value = $this->HTMLcleaner($value,$entry_HTMLparser[0],$entry_HTMLparser[1],$entry_HTMLparser[2],$entry_HTMLparser[3]);
284 }
285
286 // Traverse modes:
287 foreach($modes as $cmd) {
288 // ->DB
289 if ($direction=='db') {
290 // Checking for user defined transformation:
291 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
292 $_procObj = t3lib_div::getUserObj($_classRef);
293 $_procObj->pObj = $this;
294 $_procObj->transformationKey = $cmd;
295 $value = $_procObj->transform_db($value,$this);
296 } else { // ... else use defaults:
297 switch($cmd) {
298 case 'ts_images':
299 $value = $this->TS_images_db($value);
300 break;
301 case 'ts_reglinks':
302 $value = $this->TS_reglinks($value,'db');
303 break;
304 case 'ts_links':
305 $value = $this->TS_links_db($value);
306 break;
307 case 'ts_preserve':
308 $value = $this->TS_preserve_db($value);
309 break;
310 case 'ts_transform':
311 case 'css_transform':
312 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
313 $this->allowedClasses = t3lib_div::trimExplode(',', $this->procOptions['allowedClasses'], 1);
314 $value = $this->TS_transform_db($value,$cmd=='css_transform');
315 break;
316 case 'ts_strip':
317 $value = $this->TS_strip_db($value);
318 break;
319 default:
320 break;
321 }
322 }
323 }
324 // ->RTE
325 if ($direction=='rte') {
326 // Checking for user defined transformation:
327 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
328 $_procObj = t3lib_div::getUserObj($_classRef);
329 $_procObj->pObj = $this;
330 $value = $_procObj->transform_rte($value,$this);
331 } else { // ... else use defaults:
332 switch($cmd) {
333 case 'ts_images':
334 $value = $this->TS_images_rte($value);
335 break;
336 case 'ts_reglinks':
337 $value = $this->TS_reglinks($value,'rte');
338 break;
339 case 'ts_links':
340 $value = $this->TS_links_rte($value);
341 break;
342 case 'ts_preserve':
343 $value = $this->TS_preserve_rte($value);
344 break;
345 case 'ts_transform':
346 case 'css_transform':
347 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
348 $value = $this->TS_transform_rte($value,$cmd=='css_transform');
349 break;
350 default:
351 break;
352 }
353 }
354 }
355 }
356
357 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
358 if (is_array($exit_HTMLparser)) {
359 $value = $this->HTMLcleaner($value,$exit_HTMLparser[0],$exit_HTMLparser[1],$exit_HTMLparser[2],$exit_HTMLparser[3]);
360 }
361
362 // Final clean up of linebreaks:
363 if (!$this->procOptions['disableUnifyLineBreaks']) {
364 $value = str_replace(chr(13).chr(10),chr(10),$value); // Make sure no \r\n sequences has entered in the meantime...
365 $value = str_replace(chr(10),chr(13).chr(10),$value); // ... and then change all \n into \r\n
366 }
367
368 // Return value:
369 return $value;
370 }
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387 /************************************
388 *
389 * Specific RTE TRANSFORMATION functions
390 *
391 *************************************/
392
393 /**
394 * Transformation handler: 'ts_images' / direction: "db"
395 * Processing images inserted in the RTE.
396 * This is used when content goes from the RTE to the database.
397 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
398 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
399 * Also "magic" images are processed here.
400 *
401 * @param string The content from RTE going to Database
402 * @return string Processed content
403 */
404 function TS_images_db($value) {
405
406 // Split content by <img> tags and traverse the resulting array for processing:
407 $imgSplit = $this->splitTags('img',$value);
408 foreach($imgSplit as $k => $v) {
409 if ($k%2) { // image found, do processing:
410
411 // Init
412 $attribArray = $this->get_tag_attributes_classic($v,1);
413 $siteUrl = $this->siteUrl();
414 $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
415
416 $absRef = trim($attribArray['src']); // It's always a absolute URL coming from the RTE into the Database.
417
418 // make path absolute if it is relative and we have a site path wich is not '/'
419 $pI=pathinfo($absRef);
420 if($sitePath AND !$pI['scheme'] && t3lib_div::isFirstPartOfStr($absRef,$sitePath)) {
421 // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
422 $absRef = substr($absRef,strlen($sitePath));
423 $absRef = $siteUrl.$absRef;
424 }
425
426 // External image from another URL? In that case, fetch image (unless disabled feature).
427 if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
428 $externalFile = $this->getUrl($absRef); // Get it
429 if ($externalFile) {
430 $pU = parse_url($absRef);
431 $pI=pathinfo($pU['path']);
432
433 if (t3lib_div::inList('gif,png,jpeg,jpg',strtolower($pI['extension']))) {
434 $filename = t3lib_div::shortMD5($absRef).'.'.$pI['extension'];
435 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
436 $C_origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
437 if (!@is_file($origFilePath)) {
438 t3lib_div::writeFile($origFilePath,$externalFile);
439 t3lib_div::writeFile($C_origFilePath,$externalFile);
440 }
441 $absRef = $siteUrl.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
442
443 $attribArray['src']=$absRef;
444 $params = t3lib_div::implodeAttributes($attribArray,1);
445 $imgSplit[$k] = '<img '.$params.' />';
446 }
447 }
448 }
449
450 // Check image as local file (siteURL equals the one of the image)
451 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
452 $path = rawurldecode(substr($absRef,strlen($siteUrl))); // Rel-path, rawurldecoded for special characters.
453 $filepath = t3lib_div::getFileAbsFileName($path); // Abs filepath, locked to relative path of this project.
454
455 // Check file existence (in relative dir to this installation!)
456 if ($filepath && @is_file($filepath)) {
457
458 // If "magic image":
459 $pathPre=$this->rteImageStorageDir().'RTEmagicC_';
460 if (t3lib_div::isFirstPartOfStr($path,$pathPre)) {
461 // Find original file:
462 $pI=pathinfo(substr($path,strlen($pathPre)));
463 $filename = substr($pI['basename'],0,-strlen('.'.$pI['extension']));
464 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
465 if (@is_file($origFilePath)) {
466 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
467 $imgObj->init();
468 $imgObj->mayScaleUp=0;
469 $imgObj->tempPath=PATH_site.$imgObj->tempPath;
470
471 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
472 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
473 // Compare dimensions:
474 if ($curWH[0]!=$curInfo[0] || $curWH[1]!=$curInfo[1]) {
475 $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
476 $cW = $curWH[0];
477 $cH = $curWH[1];
478 $cH = 1000; // Make the image based on the width solely...
479 $imgI = $imgObj->imageMagickConvert($origFilePath,$pI['extension'],$cW.'m',$cH.'m');
480 if ($imgI[3]) {
481 $fI=pathinfo($imgI[3]);
482 @copy($imgI[3],$filepath); // Override the child file
483 // Removing width and heigth form style attribute
484 $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
485 $attribArray['width']=$imgI[0];
486 $attribArray['height']=$imgI[1];
487 $params = t3lib_div::implodeAttributes($attribArray,1);
488 $imgSplit[$k]='<img '.$params.' />';
489 }
490 }
491 }
492
493 } elseif ($this->procOptions['plainImageMode']) { // If "plain image" has been configured:
494
495 // Image dimensions as set in the image tag, if any
496 $curWH = $this->getWHFromAttribs($attribArray);
497 if ($curWH[0]) $attribArray['width'] = $curWH[0];
498 if ($curWH[1]) $attribArray['height'] = $curWH[1];
499
500 // Removing width and heigth form style attribute
501 $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
502
503 // Finding dimensions of image file:
504 $fI = @getimagesize($filepath);
505
506 // Perform corrections to aspect ratio based on configuration:
507 switch((string)$this->procOptions['plainImageMode']) {
508 case 'lockDimensions':
509 $attribArray['width']=$fI[0];
510 $attribArray['height']=$fI[1];
511 break;
512 case 'lockRatioWhenSmaller': // If the ratio has to be smaller, then first set the width...:
513 if ($attribArray['width']>$fI[0]) $attribArray['width'] = $fI[0];
514 case 'lockRatio':
515 if ($fI[0]>0) {
516 $attribArray['height']=round($attribArray['width']*($fI[1]/$fI[0]));
517 }
518 break;
519 }
520
521 // Compile the image tag again:
522 $params = t3lib_div::implodeAttributes($attribArray,1);
523 $imgSplit[$k]='<img '.$params.' />';
524 }
525 } else { // Remove image if it was not found in a proper position on the server!
526
527 // Commented out; removing the image tag might not be that logical...
528 #$imgSplit[$k]='';
529 }
530 }
531
532 // Convert abs to rel url
533 if ($imgSplit[$k]) {
534 $attribArray=$this->get_tag_attributes_classic($imgSplit[$k],1);
535 $absRef = trim($attribArray['src']);
536 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
537 $attribArray['src'] = $this->relBackPath.substr($absRef,strlen($siteUrl));
538 if (!isset($attribArray['alt'])) $attribArray['alt']=''; // Must have alt-attribute for XHTML compliance.
539 $imgSplit[$k]='<img '.t3lib_div::implodeAttributes($attribArray,1,1).' />';
540 }
541 }
542 }
543 }
544 return implode('',$imgSplit);
545 }
546
547 /**
548 * Transformation handler: 'ts_images' / direction: "rte"
549 * Processing images from database content going into the RTE.
550 * Processing includes converting the src attribute to an absolute URL.
551 *
552 * @param string Content input
553 * @return string Content output
554 */
555 function TS_images_rte($value) {
556
557 $siteUrl = $this->siteUrl();
558 $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
559
560 // Split content by <img> tags and traverse the resulting array for processing:
561 $imgSplit = $this->splitTags('img',$value);
562 foreach($imgSplit as $k => $v) {
563 if ($k%2) { // image found:
564
565 // Init
566 $attribArray=$this->get_tag_attributes_classic($v,1);
567 $absRef = trim($attribArray['src']);
568
569 // Unless the src attribute is already pointing to an external URL:
570 if (strtolower(substr($absRef,0,4))!='http') {
571 $attribArray['src'] = substr($attribArray['src'],strlen($this->relBackPath));
572 // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
573 $attribArray['src'] = preg_replace('#^'.preg_quote($sitePath,'#').'#','',$attribArray['src']);
574 $attribArray['src'] = $siteUrl.$attribArray['src'];
575 if (!isset($attribArray['alt'])) $attribArray['alt']='';
576 $params = t3lib_div::implodeAttributes($attribArray);
577 $imgSplit[$k]='<img '.$params.' />';
578 }
579 }
580 }
581
582 // return processed content:
583 return implode('',$imgSplit);
584 }
585
586 /**
587 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
588 * Converting <A>-tags to/from abs/rel
589 *
590 * @param string Content input
591 * @param string Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
592 * @return string Content output
593 */
594 function TS_reglinks($value,$direction) {
595 $retVal = '';
596
597 switch($direction) {
598 case 'rte':
599 $retVal = $this->TS_AtagToAbs($value,1);
600 break;
601 case 'db':
602 $siteURL = $this->siteUrl();
603 $blockSplit = $this->splitIntoBlock('A',$value);
604 reset($blockSplit);
605 while(list($k,$v)=each($blockSplit)) {
606 if ($k%2) { // block:
607 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
608 // If the url is local, remove url-prefix
609 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
610 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
611 }
612 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
613 $eTag='</a>';
614 $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
615 }
616 }
617 $retVal = implode('',$blockSplit);
618 break;
619 }
620 return $retVal;
621 }
622
623 /**
624 * Transformation handler: 'ts_links' / direction: "db"
625 * Converting <A>-tags to <link tags>
626 *
627 * @param string Content input
628 * @return string Content output
629 * @see TS_links_rte()
630 */
631 function TS_links_db($value) {
632
633 // Split content into <a> tag blocks and process:
634 $blockSplit = $this->splitIntoBlock('A',$value);
635 foreach($blockSplit as $k => $v) {
636 if ($k%2) { // If an A-tag was found:
637 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
638 $info = $this->urlInfoForLinkTags($attribArray['href']);
639
640 // Check options:
641 $attribArray_copy = $attribArray;
642 unset($attribArray_copy['href']);
643 unset($attribArray_copy['target']);
644 unset($attribArray_copy['class']);
645 unset($attribArray_copy['title']);
646 if ($attribArray_copy['rteerror']) { // Unset "rteerror" and "style" attributes if "rteerror" is set!
647 unset($attribArray_copy['style']);
648 unset($attribArray_copy['rteerror']);
649 }
650 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
651 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
652 $bTag='<link '.$info['url'].($info['query']?',0,'.$info['query']:'').($attribArray['target']?' '.$attribArray['target']:(($attribArray['class'] || $attribArray['title'])?' -':'')).($attribArray['class']?' '.$attribArray['class']:($attribArray['title']?' -':'')).($attribArray['title']?' "'.$attribArray['title'].'"':'').'>';
653 $eTag='</link>';
654 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
655 } else { // ... otherwise store the link as a-tag.
656 // Unsetting 'rtekeep' attribute if that had been set.
657 unset($attribArray['rtekeep']);
658 // If the url is local, remove url-prefix
659 $siteURL = $this->siteUrl();
660 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
661 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
662 }
663 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
664 $eTag='</a>';
665 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
666 }
667 }
668 }
669 return implode('',$blockSplit);
670 }
671
672 /**
673 * Transformation handler: 'ts_links' / direction: "rte"
674 * Converting <link tags> to <A>-tags
675 *
676 * @param string Content input
677 * @return string Content output
678 * @see TS_links_rte()
679 */
680 function TS_links_rte($value) {
681 $value = $this->TS_AtagToAbs($value);
682
683 // Split content by the TYPO3 pseudo tag "<link>":
684 $blockSplit = $this->splitIntoBlock('link',$value,1);
685 foreach($blockSplit as $k => $v) {
686 $error = '';
687 if ($k%2) { // block:
688 $tagCode = t3lib_div::unQuoteFilenames(trim(substr($this->getFirstTag($v),0,-1)),true);
689 $link_param = $tagCode[1];
690 $href = '';
691 $siteUrl = $this->siteUrl();
692 // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
693 if(strstr($link_param,'@')) { // mailadr
694 $href = 'mailto:'.preg_replace('/^mailto:/i','',$link_param);
695 } elseif (substr($link_param,0,1)=='#') { // check if anchor
696 $href = $siteUrl.$link_param;
697 } else {
698 $fileChar=intval(strpos($link_param, '/'));
699 $urlChar=intval(strpos($link_param, '.'));
700
701 // Detects if a file is found in site-root OR is a simulateStaticDocument.
702 list($rootFileDat) = explode('?',$link_param);
703 $rFD_fI = pathinfo($rootFileDat);
704 if (trim($rootFileDat) && !strstr($link_param,'/') && (@is_file(PATH_site.$rootFileDat) || t3lib_div::inList('php,html,htm',strtolower($rFD_fI['extension'])))) {
705 $href = $siteUrl.$link_param;
706 } elseif($urlChar && (strstr($link_param,'//') || !$fileChar || $urlChar<$fileChar)) { // url (external): If doubleSlash or if a '.' comes before a '/'.
707 if (!preg_match('/^[a-z]*:\/\//',trim(strtolower($link_param)))) {$scheme='http://';} else {$scheme='';}
708 $href = $scheme.$link_param;
709 } elseif($fileChar) { // file (internal)
710 $href = $siteUrl.$link_param;
711 } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
712 $link_params_parts = explode('#',$link_param);
713 $idPart = trim($link_params_parts[0]); // Link-data del
714 if (!strcmp($idPart,'')) { $idPart=$this->recPid; } // If no id or alias is given, set it to class record pid
715
716 // FIXME commented because useless - what is it for?
717 // if ($link_params_parts[1] && !$sectionMark) {
718 // $sectionMark = '#'.trim($link_params_parts[1]);
719 // }
720
721 // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
722 $pairParts = t3lib_div::trimExplode(',',$idPart);
723 if (count($pairParts)>1) {
724 $idPart = $pairParts[0];
725 // Type ? future support for?
726 }
727 // Checking if the id-parameter is an alias.
728 if (!t3lib_div::testInt($idPart)) {
729 list($idPartR) = t3lib_BEfunc::getRecordsByField('pages','alias',$idPart);
730 $idPart = intval($idPartR['uid']);
731 }
732 $page = t3lib_BEfunc::getRecord('pages', $idPart);
733 if (is_array($page)) { // Page must exist...
734 $pairParts = t3lib_div::trimExplode(',',$link_param);
735 $href = $siteUrl.'?id='.$pairParts[0].($pairParts[2]?$pairParts[2]:'');
736 // linkHandler - allowing links to start with registerd linkHandler e.g.. "record:"
737 } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][array_shift(explode(':', $link_param))])) {
738 $href = $link_param;
739 } else {
740 #$href = '';
741 $href = $siteUrl.'?id='.$link_param;
742 $error = 'No page found: '.$idPart;
743 }
744 }
745 }
746
747 // Setting the A-tag:
748 $bTag = '<a href="'.htmlspecialchars($href).'"'.
749 ($tagCode[2]&&$tagCode[2]!='-' ? ' target="'.htmlspecialchars($tagCode[2]).'"' : '').
750 ($tagCode[3]&&$tagCode[3]!='-' ? ' class="'.htmlspecialchars($tagCode[3]).'"' : '').
751 ($tagCode[4] ? ' title="'.htmlspecialchars($tagCode[4]).'"' : '').
752 ($error ? ' rteerror="'.htmlspecialchars($error).'" style="background-color: yellow; border:2px red solid; color: black;"' : ''). // Should be OK to add the style; the transformation back to databsae will remove it...
753 '>';
754 $eTag = '</a>';
755 $blockSplit[$k] = $bTag.$this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
756 }
757 }
758
759 // Return content:
760 return implode('',$blockSplit);
761 }
762
763 /**
764 * Preserve special tags
765 *
766 * @param string Content input
767 * @return string Content output
768 */
769 function TS_preserve_db($value) {
770 if (!$this->preserveTags) return $value;
771
772 // Splitting into blocks for processing (span-tags are used for special tags)
773 $blockSplit = $this->splitIntoBlock('span',$value);
774 foreach($blockSplit as $k => $v) {
775 if ($k%2) { // block:
776 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
777 if ($attribArray['specialtag']) {
778 $theTag = rawurldecode($attribArray['specialtag']);
779 $theTagName = $this->getFirstTagName($theTag);
780 $blockSplit[$k] = $theTag.$this->removeFirstAndLastTag($blockSplit[$k]).'</'.$theTagName.'>';
781 }
782 }
783 }
784 return implode('',$blockSplit);
785 }
786
787 /**
788 * Preserve special tags
789 *
790 * @param string Content input
791 * @return string Content output
792 */
793 function TS_preserve_rte($value) {
794 if (!$this->preserveTags) return $value;
795
796 $blockSplit = $this->splitIntoBlock($this->preserveTags,$value);
797 foreach($blockSplit as $k => $v) {
798 if ($k%2) { // block:
799 $blockSplit[$k] = '<span specialtag="'.rawurlencode($this->getFirstTag($v)).'">'.$this->removeFirstAndLastTag($blockSplit[$k]).'</span>';
800 }
801 }
802 return implode('',$blockSplit);
803 }
804
805 /**
806 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
807 * Cleaning (->db) for standard content elements (ts)
808 *
809 * @param string Content input
810 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
811 * @return string Content output
812 * @see TS_transform_rte()
813 */
814 function TS_transform_db($value,$css=FALSE) {
815
816 // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
817 $this->TS_transform_db_safecounter--;
818 if ($this->TS_transform_db_safecounter<0) return $value;
819
820 // Split the content from RTE by the occurence of these blocks:
821 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$value);
822
823 $cc=0;
824 $aC = count($blockSplit);
825
826 // Avoid superfluous linebreaks by transform_db after ending headListTag
827 while($aC && !strcmp(trim($blockSplit[$aC-1]),'')) {
828 unset($blockSplit[$aC-1]);
829 $aC = count($blockSplit);
830 }
831
832 // Traverse the blocks
833 foreach($blockSplit as $k => $v) {
834 $cc++;
835 $lastBR = $cc==$aC ? '' : chr(10);
836
837 if ($k%2) { // Inside block:
838
839 // Init:
840 $tag=$this->getFirstTag($v);
841 $tagName=strtolower($this->getFirstTagName($v));
842
843 // Process based on the tag:
844 switch($tagName) {
845 case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
846 case 'dd' : // Do the same on dd elements
847 case 'div': // Do the same on div sections, if they were splitted
848 $blockSplit[$k]=$tag.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
849 break;
850 case 'ol':
851 case 'ul': // Transform lists into <typolist>-tags:
852 if (!$css) {
853 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
854 $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
855 while(list($k2)=each($parts)) {
856 $parts[$k2]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/','',$parts[$k2]); // remove all linesbreaks!
857 $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
858 $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
859 $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
860 }
861 if ($tagName=='ol') { $params=' type="1"'; } else { $params=''; }
862 $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
863 }
864 } else {
865 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
866 }
867 break;
868 case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
869 if (!$this->procOptions['preserveTables'] && !$css) {
870 $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
871 } else {
872 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
873 }
874 break;
875 case 'h1':
876 case 'h2':
877 case 'h3':
878 case 'h4':
879 case 'h5':
880 case 'h6':
881 if (!$css) {
882 $attribArray=$this->get_tag_attributes_classic($tag);
883 // Processing inner content here:
884 $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
885
886 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
887 $type = intval(substr($tagName,1));
888 $blockSplit[$k]='<typohead'.
889 ($type!=6?' type="'.$type.'"':'').
890 ($attribArray['align']?' align="'.$attribArray['align'].'"':'').
891 ($attribArray['class']?' class="'.$attribArray['class'].'"':'').
892 '>'.
893 $innerContent.
894 '</typohead>'.
895 $lastBR;
896 } else {
897 $blockSplit[$k]='<'.$tagName.
898 ($attribArray['align']?' align="'.htmlspecialchars($attribArray['align']).'"':'').
899 ($attribArray['class']?' class="'.htmlspecialchars($attribArray['class']).'"':'').
900 '>'.
901 $innerContent.
902 '</'.$tagName.'>'.
903 $lastBR;
904 }
905 } else {
906 // Eliminate true linebreaks inside Hx tags
907 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
908 }
909 break;
910 default:
911 // Eliminate true linebreaks inside other headlist tags and after hr tag
912 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
913 break;
914 }
915 } else { // NON-block:
916 if (strcmp(trim($blockSplit[$k]),'')) {
917 $blockSplit[$k]=$this->divideIntoLines(preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$blockSplit[$k])).$lastBR;
918 $blockSplit[$k]=$this->transformStyledATags($blockSplit[$k]);
919 } else unset($blockSplit[$k]);
920 }
921 }
922 $this->TS_transform_db_safecounter++;
923
924 return implode('',$blockSplit);
925 }
926
927 /**
928 * Wraps a-tags that contain a style attribute with a span-tag
929 *
930 * @param string Content input
931 * @return string Content output
932 */
933 function transformStyledATags($value) {
934 $blockSplit = $this->splitIntoBlock('A',$value);
935 foreach($blockSplit as $k => $v) {
936 if ($k%2) { // If an A-tag was found:
937 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
938 if ($attribArray['style']) { // If "style" attribute is set!
939 $attribArray_copy['style'] = $attribArray['style'];
940 unset($attribArray['style']);
941 $bTag='<span '.t3lib_div::implodeAttributes($attribArray_copy,1).'><a '.t3lib_div::implodeAttributes($attribArray,1).'>';
942 $eTag='</a></span>';
943 $blockSplit[$k] = $bTag.$this->removeFirstAndLastTag($blockSplit[$k]).$eTag;
944 }
945 }
946 }
947 return implode('',$blockSplit);
948 }
949
950 /**
951 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
952 * Set (->rte) for standard content elements (ts)
953 *
954 * @param string Content input
955 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
956 * @return string Content output
957 * @see TS_transform_db()
958 */
959 function TS_transform_rte($value,$css=0) {
960
961 // Split the content from Database by the occurence of these blocks:
962 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$value);
963
964 // Traverse the blocks
965 foreach($blockSplit as $k => $v) {
966 if ($k%2) { // Inside one of the blocks:
967
968 // Init:
969 $tag = $this->getFirstTag($v);
970 $tagName = strtolower($this->getFirstTagName($v));
971 $attribArray = $this->get_tag_attributes_classic($tag);
972
973 // Based on tagname, we do transformations:
974 switch($tagName) {
975 case 'blockquote': // Keep blockquotes
976 case 'dd': // Keep definitions
977 case 'div': // Keep div sections, if they were splitted
978 $blockSplit[$k] = $tag.
979 $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
980 '</'.$tagName.'>';
981 break;
982 case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
983 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
984 $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
985 $tListContent = preg_replace('/^[ ]*'.chr(10).'/','',$tListContent);
986 $tListContent = preg_replace('/'.chr(10).'[ ]*$/','',$tListContent);
987 $lines = explode(chr(10),$tListContent);
988 $typ = $attribArray['type']==1 ? 'ol' : 'ul';
989 $blockSplit[$k] = '<'.$typ.'>'.chr(10).
990 '<li>'.implode('</li>'.chr(10).'<li>',$lines).'</li>'.
991 '</'.$typ.'>';
992 }
993 break;
994 case 'typohead': // Transform typohead into Hx tags.
995 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
996 $tC = $this->removeFirstAndLastTag($blockSplit[$k]);
997 $typ = t3lib_div::intInRange($attribArray['type'],0,6);
998 if (!$typ) $typ=6;
999 $align = $attribArray['align']?' align="'.$attribArray['align'].'"': '';
1000 $class = $attribArray['class']?' class="'.$attribArray['class'].'"': '';
1001 $blockSplit[$k] = '<h'.$typ.$align.$class.'>'.
1002 $tC.
1003 '</h'.$typ.'>';
1004 }
1005 break;
1006 }
1007 $blockSplit[$k+1] = preg_replace('/^[ ]*'.chr(10).'/','',$blockSplit[$k+1]); // Removing linebreak if typohead
1008 } else { // NON-block:
1009 $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
1010 $singleLineBreak = $blockSplit[$k]==chr(10);
1011 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$nextFTN)) { // Removing linebreak if typolist/typohead
1012 $blockSplit[$k] = preg_replace('/'.chr(10).'[ ]*$/','',$blockSplit[$k]);
1013 }
1014 // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
1015 if (!strcmp($blockSplit[$k],'') && !$singleLineBreak) {
1016 unset($blockSplit[$k]);
1017 } else {
1018 $blockSplit[$k] = $this->setDivTags($blockSplit[$k],($this->procOptions['useDIVasParagraphTagForRTE']?'div':'p'));
1019 }
1020 }
1021 }
1022 return implode(chr(10),$blockSplit);
1023 }
1024
1025 /**
1026 * Transformation handler: 'ts_strip' / direction: "db"
1027 * Removing all non-allowed tags
1028 *
1029 * @param string Content input
1030 * @return string Content output
1031 */
1032 function TS_strip_db($value) {
1033 $value = strip_tags($value,'<'.implode('><',explode(',','b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')).'>');
1034 return $value;
1035 }
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050 /***************************************************************
1051 *
1052 * Generic RTE transformation, analysis and helper functions
1053 *
1054 **************************************************************/
1055
1056 /**
1057 * Reads the file or url $url and returns the content
1058 *
1059 * @param string Filepath/URL to read
1060 * @return string The content from the resource given as input.
1061 * @see t3lib_div::getURL()
1062 */
1063 function getURL($url) {
1064 return t3lib_div::getURL($url);
1065 }
1066
1067 /**
1068 * Function for cleaning content going into the database.
1069 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
1070 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
1071 *
1072 * @param string Content to clean up
1073 * @param string Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
1074 * @return string Clean content
1075 * @see getKeepTags()
1076 */
1077 function HTMLcleaner_db($content,$tagList='') {
1078 if (!$tagList) {
1079 $keepTags = $this->getKeepTags('db');
1080 } else {
1081 $keepTags = $this->getKeepTags('db',$tagList);
1082 }
1083 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
1084 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
1085
1086 // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
1087 $addConfig=array();
1088 if ((is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning'])) {
1089 $addConfig['xhtml']=1;
1090 }
1091
1092 return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC,$addConfig);
1093 }
1094
1095 /**
1096 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
1097 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
1098 *
1099 * @param string The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
1100 * @param string Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
1101 * @return array Configuration array
1102 * @see HTMLcleaner_db()
1103 */
1104 function getKeepTags($direction='rte',$tagList='') {
1105 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1106
1107 // Setting up allowed tags:
1108 if (strcmp($tagList,'')) { // If the $tagList input var is set, this will take precedence
1109 $keepTags = array_flip(t3lib_div::trimExplode(',',$tagList,1));
1110 } else { // Default is to get allowed/denied tags from internal array of processing options:
1111 // Construct default list of tags to keep:
1112 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1113 $keepTags = array_flip(t3lib_div::trimExplode(',',$typoScript_list.','.strtolower($this->procOptions['allowTags']),1));
1114
1115 // For tags to deny, remove them from $keepTags array:
1116 $denyTags = t3lib_div::trimExplode(',',$this->procOptions['denyTags'],1);
1117 foreach($denyTags as $dKe) {
1118 unset($keepTags[$dKe]);
1119 }
1120 }
1121
1122 // Based on the direction of content, set further options:
1123 switch ($direction) {
1124
1125 // GOING from database to Rich Text Editor:
1126 case 'rte':
1127
1128 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1129 // Transform bold/italics tags to strong/em
1130 if (isset($keepTags['b'])) {$keepTags['b']=array('remap'=>'STRONG');}
1131 if (isset($keepTags['i'])) {$keepTags['i']=array('remap'=>'EM');}
1132 }
1133
1134 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1135 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
1136 break;
1137
1138 // GOING from RTE to database:
1139 case 'db':
1140
1141 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1142 // Transform strong/em back to bold/italics:
1143 if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
1144 if (isset($keepTags['em'])) { $keepTags['em']=array('remap'=>'i'); }
1145 }
1146
1147 // Setting up span tags if they are allowed:
1148 if (isset($keepTags['span'])) {
1149 $classes=array_merge(array(''),$this->allowedClasses);
1150 $keepTags['span']=array(
1151 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir',
1152 'fixAttrib' => Array(
1153 'class' => Array (
1154 'list' => $classes,
1155 'removeIfFalse' => 1
1156 ),
1157 ),
1158 'rmTagIfNoAttrib' => 1
1159 );
1160 if (!$this->procOptions['allowedClasses']) unset($keepTags['span']['fixAttrib']['class']['list']);
1161 }
1162
1163 // Setting up font tags if they are allowed:
1164 if (isset($keepTags['font'])) {
1165 $colors=array_merge(array(''),t3lib_div::trimExplode(',',$this->procOptions['allowedFontColors'],1));
1166 $keepTags['font']=array(
1167 'allowedAttribs'=>'face,color,size',
1168 'fixAttrib' => Array(
1169 'face' => Array (
1170 'removeIfFalse' => 1
1171 ),
1172 'color' => Array (
1173 'removeIfFalse' => 1,
1174 'list'=>$colors
1175 ),
1176 'size' => Array (
1177 'removeIfFalse' => 1,
1178 )
1179 ),
1180 'rmTagIfNoAttrib' => 1
1181 );
1182 if (!$this->procOptions['allowedFontColors']) unset($keepTags['font']['fixAttrib']['color']['list']);
1183 }
1184
1185 // Setting further options, getting them from the processiong options:
1186 $TSc = $this->procOptions['HTMLparser_db.'];
1187 if (!$TSc['globalNesting']) $TSc['globalNesting']='b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1188 if (!$TSc['noAttrib']) $TSc['noAttrib']='b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1189
1190 // Transforming the array from TypoScript to regular array:
1191 list($keepTags) = $this->HTMLparserConfig($TSc,$keepTags);
1192 break;
1193 }
1194
1195 // Caching (internally, in object memory) the result unless tagList is set:
1196 if (!$tagList) {
1197 $this->getKeepTags_cache[$direction] = $keepTags;
1198 } else {
1199 return $keepTags;
1200 }
1201 }
1202
1203 // Return result:
1204 return $this->getKeepTags_cache[$direction];
1205 }
1206
1207 /**
1208 * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by chr(10).
1209 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1210 * The function ->setDivTags does the opposite.
1211 * This function processes content to go into the database.
1212 *
1213 * @param string Value to process.
1214 * @param integer Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1215 * @param boolean If true, an array with the lines is returned, otherwise a string of the processed input value.
1216 * @return string Processed input value.
1217 * @see setDivTags()
1218 */
1219 function divideIntoLines($value,$count=5,$returnArray=FALSE) {
1220
1221 // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
1222 if ($this->procOptions['internalizeFontTags']) {$value = $this->internalizeFontTags($value);}
1223
1224 // Setting configuration for processing:
1225 $allowTagsOutside = t3lib_div::trimExplode(',',strtolower($this->procOptions['allowTagsOutside']?$this->procOptions['allowTagsOutside']:'img'),1);
1226 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1227 $divSplit = $this->splitIntoBlock('div,p',$value,1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1228
1229 if ($this->procOptions['keepPDIVattribs']) {
1230 $keepAttribListArr = t3lib_div::trimExplode(',',strtolower($this->procOptions['keepPDIVattribs']),1);
1231 } else {
1232 $keepAttribListArr = array();
1233 }
1234
1235 // Returns plainly the value if there was no div/p sections in it
1236 if (count($divSplit)<=1 || $count<=0) {
1237 return $value;
1238 }
1239
1240 // Traverse the splitted sections:
1241 foreach($divSplit as $k => $v) {
1242 if ($k%2) { // Inside
1243 $v=$this->removeFirstAndLastTag($v);
1244
1245 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1246 $subLines = $this->divideIntoLines($v,$count-1,1);
1247 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1248 // No noting.
1249 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1250 $subLines = array($subLines);
1251 if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1252 $subLines = spliti('<br[[:space:]]*[\/]?>',$v);
1253 }
1254
1255 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1256 reset($subLines);
1257 while(list($sk)=each($subLines)) {
1258
1259 // Clear up the subline for DB.
1260 $subLines[$sk]=$this->HTMLcleaner_db($subLines[$sk]);
1261
1262 // Get first tag, attributes etc:
1263 $fTag = $this->getFirstTag($divSplit[$k]);
1264 $tagName=strtolower($this->getFirstTagName($divSplit[$k]));
1265 $attribs=$this->get_tag_attributes($fTag);
1266
1267 // Keep attributes (lowercase)
1268 $newAttribs=array();
1269 if (count($keepAttribListArr)) {
1270 foreach($keepAttribListArr as $keepA) {
1271 if (isset($attribs[0][$keepA])) { $newAttribs[$keepA] = $attribs[0][$keepA]; }
1272 }
1273 }
1274
1275 // ALIGN attribute:
1276 if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']),'') && strtolower($attribs[0]['align'])!='left') { // Set to value, but not 'left'
1277 $newAttribs['align']=strtolower($attribs[0]['align']);
1278 }
1279
1280 // CLASS attribute:
1281 if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']),'')) { // Set to whatever value
1282 if (!count($this->allowedClasses) || in_array($attribs[0]['class'], $this->allowedClasses)) {
1283 $newAttribs['class'] = $attribs[0]['class'];
1284 } else {
1285 $classes = t3lib_div::trimExplode(' ', $attribs[0]['class'], true);
1286 $newClasses = array();
1287 foreach ($classes as $class) {
1288 if (in_array($class, $this->allowedClasses)) {
1289 $newClasses[] = $class;
1290 }
1291 }
1292 if (count($newClasses)) {
1293 $newAttribs['class'] = implode(' ', $newClasses);
1294 }
1295 }
1296 }
1297
1298 // Remove any line break char (10 or 13)
1299 $subLines[$sk]=preg_replace('/'.chr(10).'|'.chr(13).'/','',$subLines[$sk]);
1300
1301 // If there are any attributes or if we are supposed to remap the tag, then do so:
1302 if (count($newAttribs) && strcmp($remapParagraphTag,'1')) {
1303 if ($remapParagraphTag=='P') $tagName='p';
1304 if ($remapParagraphTag=='DIV') $tagName='div';
1305 $subLines[$sk]='<'.trim($tagName.' '.$this->compileTagAttribs($newAttribs)).'>'.$subLines[$sk].'</'.$tagName.'>';
1306 }
1307 }
1308 }
1309 // Add the processed line(s)
1310 $divSplit[$k] = implode(chr(10),$subLines);
1311
1312 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1313 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1314 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1315 if (trim(strip_tags($divSplit[$k]))=='&nbsp;' && !preg_match('/\<(img)(\s[^>]*)?\/?>/si', $divSplit[$k]) && !preg_match('/\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1316 $divSplit[$k]='';
1317 }
1318 } else { // outside div:
1319 // Remove positions which are outside div/p tags and without content
1320 $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
1321 if (!strcmp($divSplit[$k],'')) unset($divSplit[$k]); // Remove part if it's empty
1322 }
1323 }
1324
1325 // Return value:
1326 return $returnArray ? $divSplit : implode(chr(10),$divSplit);
1327 }
1328
1329 /**
1330 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1331 * For processing of content going FROM database TO RTE.
1332 *
1333 * @param string Value to convert
1334 * @param string Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1335 * @return string Processed value.
1336 * @see divideIntoLines()
1337 */
1338 function setDivTags($value,$dT='p') {
1339
1340 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1341 $keepTags = $this->getKeepTags('rte');
1342 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
1343 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
1344 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte']?1:0;
1345
1346 // Divide the content into lines, based on chr(10):
1347 $parts = explode(chr(10),$value);
1348 foreach($parts as $k => $v) {
1349
1350 // Processing of line content:
1351 if (!strcmp(trim($parts[$k]),'')) { // If the line is blank, set it to &nbsp;
1352 $parts[$k]='&nbsp;';
1353 } else { // Clean the line content:
1354 $parts[$k]=$this->HTMLcleaner($parts[$k],$keepTags,$kUknown,$hSC);
1355 if ($convNBSP) $parts[$k]=str_replace('&amp;nbsp;','&nbsp;',$parts[$k]);
1356 }
1357
1358 // Wrapping the line in <$dT> is not already wrapped:
1359 $testStr = strtolower(trim($parts[$k]));
1360 if (substr($testStr,0,4)!='<div' || substr($testStr,-6)!='</div>') {
1361 if (substr($testStr,0,2)!='<p' || substr($testStr,-4)!='</p>') {
1362 // Only set p-tags if there is not already div or p tags:
1363 $parts[$k]='<'.$dT.'>'.$parts[$k].'</'.$dT.'>';
1364 }
1365 }
1366 }
1367
1368 // Implode result:
1369 return implode(chr(10),$parts);
1370 }
1371
1372 /**
1373 * This splits the $value in font-tag chunks.
1374 * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
1375 * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
1376 * In that case the font-tags are normally on the OUTSIDE of the sections.
1377 * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
1378 *
1379 * @param string Input content
1380 * @return string Output content
1381 * @see divideIntoLines()
1382 */
1383 function internalizeFontTags($value) {
1384
1385 // Splitting into font tag blocks:
1386 $fontSplit = $this->splitIntoBlock('font',$value);
1387
1388 foreach($fontSplit as $k => $v) {
1389 if ($k%2) { // Inside
1390 $fTag = $this->getFirstTag($v); // Fint font-tag
1391
1392 $divSplit_sub = $this->splitIntoBlock('div,p',$this->removeFirstAndLastTag($v),1);
1393 if (count($divSplit_sub)>1) { // If there were div/p sections inside the font-tag, do something about it...
1394 // traverse those sections:
1395 foreach($divSplit_sub as $k2 => $v2) {
1396 if ($k2%2) { // Inside
1397 $div_p = $this->getFirstTag($v2); // Fint font-tag
1398 $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
1399 $v2=$this->removeFirstAndLastTag($v2); // ... and remove it from original.
1400 $divSplit_sub[$k2]=$div_p.$fTag.$v2.'</font>'.'</'.$div_p_tagname.'>';
1401 } elseif (trim(strip_tags($v2))) {
1402 $divSplit_sub[$k2]=$fTag.$v2.'</font>';
1403 }
1404 }
1405 $fontSplit[$k]=implode('',$divSplit_sub);
1406 }
1407 }
1408 }
1409
1410 return implode('',$fontSplit);
1411 }
1412
1413 /**
1414 * Returns SiteURL based on thisScript.
1415 *
1416 * @return string Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1417 * @see t3lib_div::getIndpEnv()
1418 */
1419 function siteUrl() {
1420 return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1421 }
1422
1423 /**
1424 * Return the storage folder of RTE image files.
1425 * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
1426 *
1427 * @return string
1428 */
1429 function rteImageStorageDir() {
1430 return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
1431 }
1432
1433 /**
1434 * Remove all tables from incoming code
1435 * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
1436 *
1437 * @param string Input value
1438 * @param string Break character to use for linebreaks.
1439 * @return string Output value
1440 */
1441 function removeTables($value,$breakChar='<br />') {
1442
1443 // Splitting value into table blocks:
1444 $tableSplit = $this->splitIntoBlock('table',$value);
1445
1446 // Traverse blocks of tables:
1447 foreach($tableSplit as $k => $v) {
1448 if ($k%2) {
1449 $tableSplit[$k]='';
1450 $rowSplit = $this->splitIntoBlock('tr',$v);
1451 foreach($rowSplit as $k2 => $v2) {
1452 if ($k2%2) {
1453 $cellSplit = $this->getAllParts($this->splitIntoBlock('td',$v2),1,0);
1454 foreach($cellSplit as $k3 => $v3) {
1455 $tableSplit[$k].=$v3.$breakChar;
1456 }
1457 }
1458 }
1459 }
1460 }
1461
1462 // Implode it all again:
1463 return implode($breakChar,$tableSplit);
1464 }
1465
1466 /**
1467 * Default tag mapping for TS
1468 *
1469 * @param string Input code to process
1470 * @param string Direction To databsae (db) or from database to RTE (rte)
1471 * @return string Processed value
1472 */
1473 function defaultTStagMapping($code,$direction='rte') {
1474 if ($direction=='db') {
1475 $code=$this->mapTags($code,array( // Map tags
1476 'strong' => 'b',
1477 'em' => 'i'
1478 ));
1479 }
1480 if ($direction=='rte') {
1481 $code=$this->mapTags($code,array( // Map tags
1482 'b' => 'strong',
1483 'i' => 'em'
1484 ));
1485 }
1486 return $code;
1487 }
1488
1489 /**
1490 * Finds width and height from attrib-array
1491 * If the width and height is found in the style-attribute, use that!
1492 *
1493 * @param array Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1494 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1495 */
1496 function getWHFromAttribs($attribArray) {
1497 $style =trim($attribArray['style']);
1498 if ($style) {
1499 $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1500 // Width
1501 $reg = array();
1502 preg_match('/width'.$regex.'/i',$style,$reg);
1503 $w = intval($reg[1]);
1504 // Height
1505 preg_match('/height'.$regex.'/i',$style,$reg);
1506 $h = intval($reg[1]);
1507 }
1508 if (!$w) {
1509 $w = $attribArray['width'];
1510 }
1511 if (!$h) {
1512 $h = $attribArray['height'];
1513 }
1514 return array(intval($w),intval($h));
1515 }
1516
1517 /**
1518 * Parse <A>-tag href and return status of email,external,file or page
1519 *
1520 * @param string URL to analyse.
1521 * @return array Information in an array about the URL
1522 */
1523 function urlInfoForLinkTags($url) {
1524 $info = array();
1525 $url = trim($url);
1526 if (substr(strtolower($url),0,7)=='mailto:') {
1527 $info['url']=trim(substr($url,7));
1528 $info['type']='email';
1529 } else {
1530 $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
1531 for($a=0;$a<strlen($url);$a++) {
1532 if ($url{$a}!=$curURL{$a}) {
1533 break;
1534 }
1535 }
1536
1537 $info['relScriptPath']=substr($curURL,$a);
1538 $info['relUrl']=substr($url,$a);
1539 $info['url']=$url;
1540 $info['type']='ext';
1541
1542 $siteUrl_parts = parse_url($url);
1543 $curUrl_parts = parse_url($curURL);
1544
1545 if ($siteUrl_parts['host']==$curUrl_parts['host'] // Hosts should match
1546 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'],0,strlen(TYPO3_mainDir))==TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
1547
1548 // New processing order 100502
1549 $uP=parse_url($info['relUrl']);
1550
1551 if (!strcmp('#'.$siteUrl_parts['fragment'],$info['relUrl'])) {
1552 $info['url']=$info['relUrl'];
1553 $info['type']='anchor';
1554 } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php')) {
1555 $pp = explode('id=',$uP['query']);
1556 $parameters = explode('&', $pp[1]);
1557 $id = array_shift($parameters);
1558 if ($id) {
1559 $info['pageid']=$id;
1560 $info['cElement']=$uP['fragment'];
1561 $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
1562 $info['type']='page';
1563 $info['query'] = $parameters[0]?'&'.implode('&', $parameters):'';
1564 }
1565 } else {
1566 $info['url']=$info['relUrl'];
1567 $info['type']='file';
1568 }
1569 } else {
1570 unset($info['relScriptPath']);
1571 unset($info['relUrl']);
1572 }
1573 }
1574 return $info;
1575 }
1576
1577 /**
1578 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1579 *
1580 * @param string Content input
1581 * @param boolean If true, then the "rtekeep" attribute will not be set.
1582 * @return string Content output
1583 */
1584 function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE) {
1585 $blockSplit = $this->splitIntoBlock('A',$value);
1586 reset($blockSplit);
1587 while(list($k,$v)=each($blockSplit)) {
1588 if ($k%2) { // block:
1589 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
1590
1591 // Checking if there is a scheme, and if not, prepend the current url.
1592 if (strlen($attribArray['href'])) { // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1593 $uP = parse_url(strtolower($attribArray['href']));
1594 if (!$uP['scheme']) {
1595 $attribArray['href'] = $this->siteUrl().substr($attribArray['href'],strlen($this->relBackPath));
1596 }
1597 } else {
1598 $attribArray['rtekeep'] = 1;
1599 }
1600 if (!$dontSetRTEKEEP) $attribArray['rtekeep'] = 1;
1601
1602 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
1603 $eTag='</a>';
1604 $blockSplit[$k] = $bTag.$this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
1605 }
1606 }
1607 return implode('',$blockSplit);
1608 }
1609 }
1610
1611
1612 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']) {
1613 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
1614 }
1615
1616 ?>