Fixed bug #10104: IRRE - Controll icons in header of newly created child elements...
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2008 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 December/2003 by Kasper Skaarhoj
32 * XHTML compatible.
33 *
34 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
35 * @internal
36 */
37 /**
38 * [CLASS/FUNCTION INDEX of SCRIPT]
39 *
40 *
41 *
42 * 103: class t3lib_parsehtml_proc extends t3lib_parsehtml
43 * 138: function init($elRef='',$recPid=0)
44 * 150: function setRelPath($path)
45 * 174: function evalWriteFile($pArr,$currentRecord)
46 *
47 * SECTION: Main function
48 * 232: function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
49 *
50 * SECTION: Specific RTE TRANSFORMATION functions
51 * 398: function TS_images_db($value)
52 * 550: function TS_images_rte($value)
53 * 589: function TS_reglinks($value,$direction)
54 * 626: function TS_links_db($value)
55 * 675: function TS_links_rte($value)
56 * 760: function TS_preserve_db($value)
57 * 784: function TS_preserve_rte($value)
58 * 805: function TS_transform_db($value,$css=FALSE)
59 * 922: function transformStyledATags($value)
60 * 948: function TS_transform_rte($value,$css=0)
61 * 1019: function TS_strip_db($value)
62 *
63 * SECTION: Generic RTE transformation, analysis and helper functions
64 * 1050: function getURL($url)
65 * 1064: function HTMLcleaner_db($content,$tagList='')
66 * 1091: function getKeepTags($direction='rte',$tagList='')
67 * 1200: function divideIntoLines($value,$count=5,$returnArray=FALSE)
68 * 1304: function setDivTags($value,$dT='p')
69 * 1349: function internalizeFontTags($value)
70 * 1385: function siteUrl()
71 * 1395: function rteImageStorageDir()
72 * 1407: function removeTables($value,$breakChar='<br />')
73 * 1439: function defaultTStagMapping($code,$direction='rte')
74 * 1462: function getWHFromAttribs($attribArray)
75 * 1489: function urlInfoForLinkTags($url)
76 * 1548: function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
77 *
78 * TOTAL FUNCTIONS: 28
79 * (This index is automatically created/updated by the extension "extdeveval")
80 *
81 */
82
83 require_once (PATH_t3lib.'class.t3lib_parsehtml.php');
84
85
86
87
88
89
90
91
92
93
94
95
96 /**
97 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
98 *
99 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
100 * @package TYPO3
101 * @subpackage t3lib
102 */
103 class t3lib_parsehtml_proc extends t3lib_parsehtml {
104
105 // Static:
106 var $blockElementList = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,HR,ADDRESS,DL,DD'; // List of tags for these elements
107
108 // Internal, static:
109 var $recPid = 0; // Set this to the pid of the record manipulated by the class.
110 var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
111 var $relPath=''; // Relative path
112 var $relBackPath=''; // Relative back-path
113 public $tsConfig = array(); // Current Page TSConfig
114 var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
115
116 // Internal, dynamic
117 var $TS_transform_db_safecounter=100; // Run-away brake for recursive calls.
118 var $rte_p=''; // Parameters from TCA types configuration related to the RTE
119 var $getKeepTags_cache=array(); // Data caching for processing function
120 var $allowedClasses=array(); // Storage of the allowed CSS class names in the RTE
121 var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
122
123
124
125
126
127
128
129
130
131
132 /**
133 * Initialize, setting element reference and record PID
134 *
135 * @param string Element reference, eg "tt_content:bodytext"
136 * @param integer PID of the record (page id)
137 * @return void
138 */
139 function init($elRef='',$recPid=0) {
140 $this->recPid = $recPid;
141 $this->elRef = $elRef;
142 }
143
144 /**
145 * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
146 * This is used when editing files with the RTE
147 *
148 * @param string The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
149 * @return void There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
150 */
151 function setRelPath($path) {
152 $path = trim($path);
153 $path = ereg_replace('^/','',$path);
154 $path = ereg_replace('/$','',$path);
155 if ($path) {
156 $this->relPath = $path;
157 $this->relBackPath = '';
158 $partsC=count(explode('/',$this->relPath));
159 for ($a=0;$a<$partsC;$a++) {
160 $this->relBackPath.='../';
161 }
162 $this->relPath.='/';
163 }
164 }
165
166 /**
167 * Evaluate the environment for editing a staticFileEdit file.
168 * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
169 *
170 * @param array Parameters for the current field as found in types-config
171 * @param array Current record we are editing.
172 * @return mixed On success an array with various information is returned, otherwise a string with an error message
173 * @see t3lib_TCEmain, t3lib_transferData
174 */
175 function evalWriteFile($pArr,$currentRecord) {
176
177 // Write file configuration:
178 if (is_array($pArr)) {
179 if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
180 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'],-1)=='/'
181 && @is_dir(PATH_site.$GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
182
183 $SW_p = $pArr['parameters'];
184 $SW_editFileField = trim($SW_p[0]);
185 $SW_editFile = $currentRecord[$SW_editFileField];
186 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
187 $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'].$SW_editFile;
188 $SW_editFile = PATH_site.$SW_relpath;
189 if (@is_file($SW_editFile)) {
190 return array(
191 'editFile' => $SW_editFile,
192 'relEditFile' => $SW_relpath,
193 'contentField' => trim($SW_p[1]),
194 'markerField' => trim($SW_p[2]),
195 'loadFromFileField' => trim($SW_p[3]),
196 'statusField' => trim($SW_p[4])
197 );
198 } else return "ERROR: Editfile '".$SW_relpath."' did not exist";
199 } else return "ERROR: Edit file name could not be found or was bad.";
200 } else return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
201 }
202 }
203
204
205
206
207
208
209
210
211
212
213
214
215
216 /**********************************************
217 *
218 * Main function
219 *
220 **********************************************/
221
222 /**
223 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
224 * This is the main function called from tcemain and transfer data classes
225 *
226 * @param string Input value
227 * @param array Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
228 * @param string Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
229 * @param array Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
230 * @return string Output value
231 * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
232 */
233 function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array()) {
234
235 // Init:
236 $this->tsConfig = $thisConfig;
237 $this->procOptions = $thisConfig['proc.'];
238 $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
239
240 // dynamic configuration of blockElementList
241 if ($this->procOptions['blockElementList']) {
242 $this->blockElementList = $this->procOptions['blockElementList'];
243 }
244
245 // Get parameters for rte_transformation:
246 $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
247
248 // Setting modes:
249 if (strcmp($this->procOptions['overruleMode'],'')) {
250 $modes = array_unique(t3lib_div::trimExplode(',',$this->procOptions['overruleMode']));
251 } else {
252 $modes = array_unique(t3lib_div::trimExplode('-',$p['mode']));
253 }
254 $revmodes = array_flip($modes);
255
256 // Find special modes and extract them:
257 if (isset($revmodes['ts'])) {
258 $modes[$revmodes['ts']] = 'ts_transform,ts_preserve,ts_images,ts_links';
259 }
260 // Find special modes and extract them:
261 if (isset($revmodes['ts_css'])) {
262 $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
263 }
264
265 // Make list unique
266 $modes = array_unique(t3lib_div::trimExplode(',',implode(',',$modes),1));
267
268 // Reverse order if direction is "rte"
269 if ($direction=='rte') {
270 $modes = array_reverse($modes);
271 }
272
273 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
274 $entry_HTMLparser = $this->procOptions['entryHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_'.$direction.'.']) : '';
275 $exit_HTMLparser = $this->procOptions['exitHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_'.$direction.'.']) : '';
276
277 // Line breaks of content is unified into char-10 only (removing char 13)
278 if (!$this->procOptions['disableUnifyLineBreaks']) {
279 $value = str_replace(chr(13).chr(10),chr(10),$value);
280 }
281
282 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
283 if (is_array($entry_HTMLparser)) {
284 $value = $this->HTMLcleaner($value,$entry_HTMLparser[0],$entry_HTMLparser[1],$entry_HTMLparser[2],$entry_HTMLparser[3]);
285 }
286
287 // Traverse modes:
288 foreach($modes as $cmd) {
289 // ->DB
290 if ($direction=='db') {
291 // Checking for user defined transformation:
292 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
293 $_procObj = &t3lib_div::getUserObj($_classRef);
294 $_procObj->pObj = &$this;
295 $_procObj->transformationKey = $cmd;
296 $value = $_procObj->transform_db($value,$this);
297 } else { // ... else use defaults:
298 switch($cmd) {
299 case 'ts_images':
300 $value = $this->TS_images_db($value);
301 break;
302 case 'ts_reglinks':
303 $value = $this->TS_reglinks($value,'db');
304 break;
305 case 'ts_links':
306 $value = $this->TS_links_db($value);
307 break;
308 case 'ts_preserve':
309 $value = $this->TS_preserve_db($value);
310 break;
311 case 'ts_transform':
312 case 'css_transform':
313 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
314 $this->allowedClasses = t3lib_div::trimExplode(',',strtoupper($this->procOptions['allowedClasses']),1);
315 $value = $this->TS_transform_db($value,$cmd=='css_transform');
316 break;
317 case 'ts_strip':
318 $value = $this->TS_strip_db($value);
319 break;
320 default:
321 break;
322 }
323 }
324 }
325 // ->RTE
326 if ($direction=='rte') {
327 // Checking for user defined transformation:
328 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
329 $_procObj = &t3lib_div::getUserObj($_classRef);
330 $_procObj->pObj = &$this;
331 $value = $_procObj->transform_rte($value,$this);
332 } else { // ... else use defaults:
333 switch($cmd) {
334 case 'ts_images':
335 $value = $this->TS_images_rte($value);
336 break;
337 case 'ts_reglinks':
338 $value = $this->TS_reglinks($value,'rte');
339 break;
340 case 'ts_links':
341 $value = $this->TS_links_rte($value);
342 break;
343 case 'ts_preserve':
344 $value = $this->TS_preserve_rte($value);
345 break;
346 case 'ts_transform':
347 case 'css_transform':
348 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
349 $value = $this->TS_transform_rte($value,$cmd=='css_transform');
350 break;
351 default:
352 break;
353 }
354 }
355 }
356 }
357
358 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
359 if (is_array($exit_HTMLparser)) {
360 $value = $this->HTMLcleaner($value,$exit_HTMLparser[0],$exit_HTMLparser[1],$exit_HTMLparser[2],$exit_HTMLparser[3]);
361 }
362
363 // Final clean up of linebreaks:
364 if (!$this->procOptions['disableUnifyLineBreaks']) {
365 $value = str_replace(chr(13).chr(10),chr(10),$value); // Make sure no \r\n sequences has entered in the meantime...
366 $value = str_replace(chr(10),chr(13).chr(10),$value); // ... and then change all \n into \r\n
367 }
368
369 // Return value:
370 return $value;
371 }
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388 /************************************
389 *
390 * Specific RTE TRANSFORMATION functions
391 *
392 *************************************/
393
394 /**
395 * Transformation handler: 'ts_images' / direction: "db"
396 * Processing images inserted in the RTE.
397 * This is used when content goes from the RTE to the database.
398 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
399 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
400 * Also "magic" images are processed here.
401 *
402 * @param string The content from RTE going to Database
403 * @return string Processed content
404 */
405 function TS_images_db($value) {
406
407 // Split content by <img> tags and traverse the resulting array for processing:
408 $imgSplit = $this->splitTags('img',$value);
409 foreach($imgSplit as $k => $v) {
410 if ($k%2) { // image found, do processing:
411
412 // Init
413 $attribArray = $this->get_tag_attributes_classic($v,1);
414 $siteUrl = $this->siteUrl();
415 $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
416
417 $absRef = trim($attribArray['src']); // It's always a absolute URL coming from the RTE into the Database.
418
419 // make path absolute if it is relative and we have a site path wich is not '/'
420 $pI=pathinfo($absRef);
421 if($sitePath AND !$pI['scheme'] && t3lib_div::isFirstPartOfStr($absRef,$sitePath)) {
422 // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
423 $absRef = substr($absRef,strlen($sitePath));
424 $absRef = $siteUrl.$absRef;
425 }
426
427 // External image from another URL? In that case, fetch image (unless disabled feature).
428 if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
429 $externalFile = $this->getUrl($absRef); // Get it
430 if ($externalFile) {
431 $pU = parse_url($absRef);
432 $pI=pathinfo($pU['path']);
433
434 if (t3lib_div::inList('gif,png,jpeg,jpg',strtolower($pI['extension']))) {
435 $filename = t3lib_div::shortMD5($absRef).'.'.$pI['extension'];
436 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
437 $C_origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
438 if (!@is_file($origFilePath)) {
439 t3lib_div::writeFile($origFilePath,$externalFile);
440 t3lib_div::writeFile($C_origFilePath,$externalFile);
441 }
442 $absRef = $siteUrl.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
443
444 $attribArray['src']=$absRef;
445 $params = t3lib_div::implodeAttributes($attribArray,1);
446 $imgSplit[$k] = '<img '.$params.' />';
447 }
448 }
449 }
450
451 // Check image as local file (siteURL equals the one of the image)
452 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
453 $path = rawurldecode(substr($absRef,strlen($siteUrl))); // Rel-path, rawurldecoded for special characters.
454 $filepath = t3lib_div::getFileAbsFileName($path); // Abs filepath, locked to relative path of this project.
455
456 // Check file existence (in relative dir to this installation!)
457 if ($filepath && @is_file($filepath)) {
458
459 // If "magic image":
460 $pathPre=$this->rteImageStorageDir().'RTEmagicC_';
461 if (t3lib_div::isFirstPartOfStr($path,$pathPre)) {
462 // Find original file:
463 $pI=pathinfo(substr($path,strlen($pathPre)));
464 $filename = substr($pI['basename'],0,-strlen('.'.$pI['extension']));
465 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
466 if (@is_file($origFilePath)) {
467 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
468 $imgObj->init();
469 $imgObj->mayScaleUp=0;
470 $imgObj->tempPath=PATH_site.$imgObj->tempPath;
471
472 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
473 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
474 // Compare dimensions:
475 if ($curWH[0]!=$curInfo[0] || $curWH[1]!=$curInfo[1]) {
476 $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
477 $cW = $curWH[0];
478 $cH = $curWH[1];
479 $cH = 1000; // Make the image based on the width solely...
480 $imgI = $imgObj->imageMagickConvert($origFilePath,$pI['extension'],$cW.'m',$cH.'m');
481 if ($imgI[3]) {
482 $fI=pathinfo($imgI[3]);
483 @copy($imgI[3],$filepath); // Override the child file
484 // Removing width and heigth form style attribute
485 $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
486 $attribArray['width']=$imgI[0];
487 $attribArray['height']=$imgI[1];
488 $params = t3lib_div::implodeAttributes($attribArray,1);
489 $imgSplit[$k]='<img '.$params.' />';
490 }
491 }
492 }
493
494 } elseif ($this->procOptions['plainImageMode']) { // If "plain image" has been configured:
495
496 // Image dimensions as set in the image tag, if any
497 $curWH = $this->getWHFromAttribs($attribArray);
498 if ($curWH[0]) $attribArray['width'] = $curWH[0];
499 if ($curWH[1]) $attribArray['height'] = $curWH[1];
500
501 // Removing width and heigth form style attribute
502 $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
503
504 // Finding dimensions of image file:
505 $fI = @getimagesize($filepath);
506
507 // Perform corrections to aspect ratio based on configuration:
508 switch((string)$this->procOptions['plainImageMode']) {
509 case 'lockDimensions':
510 $attribArray['width']=$fI[0];
511 $attribArray['height']=$fI[1];
512 break;
513 case 'lockRatioWhenSmaller': // If the ratio has to be smaller, then first set the width...:
514 if ($attribArray['width']>$fI[0]) $attribArray['width'] = $fI[0];
515 case 'lockRatio':
516 if ($fI[0]>0) {
517 $attribArray['height']=round($attribArray['width']*($fI[1]/$fI[0]));
518 }
519 break;
520 }
521
522 // Compile the image tag again:
523 $params = t3lib_div::implodeAttributes($attribArray,1);
524 $imgSplit[$k]='<img '.$params.' />';
525 }
526 } else { // Remove image if it was not found in a proper position on the server!
527
528 // Commented out; removing the image tag might not be that logical...
529 #$imgSplit[$k]='';
530 }
531 }
532
533 // Convert abs to rel url
534 if ($imgSplit[$k]) {
535 $attribArray=$this->get_tag_attributes_classic($imgSplit[$k],1);
536 $absRef = trim($attribArray['src']);
537 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
538 $attribArray['src'] = $this->relBackPath.substr($absRef,strlen($siteUrl));
539 if (!isset($attribArray['alt'])) $attribArray['alt']=''; // Must have alt-attribute for XHTML compliance.
540 $imgSplit[$k]='<img '.t3lib_div::implodeAttributes($attribArray,1,1).' />';
541 }
542 }
543 }
544 }
545 return implode('',$imgSplit);
546 }
547
548 /**
549 * Transformation handler: 'ts_images' / direction: "rte"
550 * Processing images from database content going into the RTE.
551 * Processing includes converting the src attribute to an absolute URL.
552 *
553 * @param string Content input
554 * @return string Content output
555 */
556 function TS_images_rte($value) {
557
558 $siteUrl = $this->siteUrl();
559 $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
560
561 // Split content by <img> tags and traverse the resulting array for processing:
562 $imgSplit = $this->splitTags('img',$value);
563 foreach($imgSplit as $k => $v) {
564 if ($k%2) { // image found:
565
566 // Init
567 $attribArray=$this->get_tag_attributes_classic($v,1);
568 $absRef = trim($attribArray['src']);
569
570 // Unless the src attribute is already pointing to an external URL:
571 if (strtolower(substr($absRef,0,4))!='http') {
572 $attribArray['src'] = substr($attribArray['src'],strlen($this->relBackPath));
573 // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
574 $attribArray['src'] = preg_replace('#^'.preg_quote($sitePath,'#').'#','',$attribArray['src']);
575 $attribArray['src'] = $siteUrl.$attribArray['src'];
576 if (!isset($attribArray['alt'])) $attribArray['alt']='';
577 $params = t3lib_div::implodeAttributes($attribArray);
578 $imgSplit[$k]='<img '.$params.' />';
579 }
580 }
581 }
582
583 // return processed content:
584 return implode('',$imgSplit);
585 }
586
587 /**
588 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
589 * Converting <A>-tags to/from abs/rel
590 *
591 * @param string Content input
592 * @param string Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
593 * @return string Content output
594 */
595 function TS_reglinks($value,$direction) {
596 $retVal = '';
597
598 switch($direction) {
599 case 'rte':
600 $retVal = $this->TS_AtagToAbs($value,1);
601 break;
602 case 'db':
603 $siteURL = $this->siteUrl();
604 $blockSplit = $this->splitIntoBlock('A',$value);
605 reset($blockSplit);
606 while(list($k,$v)=each($blockSplit)) {
607 if ($k%2) { // block:
608 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
609 // If the url is local, remove url-prefix
610 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
611 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
612 }
613 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
614 $eTag='</a>';
615 $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
616 }
617 }
618 $retVal = implode('',$blockSplit);
619 break;
620 }
621 return $retVal;
622 }
623
624 /**
625 * Transformation handler: 'ts_links' / direction: "db"
626 * Converting <A>-tags to <link tags>
627 *
628 * @param string Content input
629 * @return string Content output
630 * @see TS_links_rte()
631 */
632 function TS_links_db($value) {
633
634 // Split content into <a> tag blocks and process:
635 $blockSplit = $this->splitIntoBlock('A',$value);
636 foreach($blockSplit as $k => $v) {
637 if ($k%2) { // If an A-tag was found:
638 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
639 $info = $this->urlInfoForLinkTags($attribArray['href']);
640
641 // Check options:
642 $attribArray_copy = $attribArray;
643 unset($attribArray_copy['href']);
644 unset($attribArray_copy['target']);
645 unset($attribArray_copy['class']);
646 unset($attribArray_copy['title']);
647 if ($attribArray_copy['rteerror']) { // Unset "rteerror" and "style" attributes if "rteerror" is set!
648 unset($attribArray_copy['style']);
649 unset($attribArray_copy['rteerror']);
650 }
651 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
652 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
653 $bTag='<link '.$info['url'].($info['query']?',0,'.$info['query']:'').($attribArray['target']?' '.$attribArray['target']:(($attribArray['class'] || $attribArray['title'])?' -':'')).($attribArray['class']?' '.$attribArray['class']:($attribArray['title']?' -':'')).($attribArray['title']?' "'.$attribArray['title'].'"':'').'>';
654 $eTag='</link>';
655 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
656 } else { // ... otherwise store the link as a-tag.
657 // Unsetting 'rtekeep' attribute if that had been set.
658 unset($attribArray['rtekeep']);
659 // If the url is local, remove url-prefix
660 $siteURL = $this->siteUrl();
661 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
662 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
663 }
664 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
665 $eTag='</a>';
666 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
667 }
668 }
669 }
670 return implode('',$blockSplit);
671 }
672
673 /**
674 * Transformation handler: 'ts_links' / direction: "rte"
675 * Converting <link tags> to <A>-tags
676 *
677 * @param string Content input
678 * @return string Content output
679 * @see TS_links_rte()
680 */
681 function TS_links_rte($value) {
682 $value = $this->TS_AtagToAbs($value);
683
684 // Split content by the TYPO3 pseudo tag "<link>":
685 $blockSplit = $this->splitIntoBlock('link',$value,1);
686 foreach($blockSplit as $k => $v) {
687 $error = '';
688 if ($k%2) { // block:
689 $tagCode = t3lib_div::unQuoteFilenames(trim(substr($this->getFirstTag($v),0,-1)),true);
690 $link_param = $tagCode[1];
691 $href = '';
692 $siteUrl = $this->siteUrl();
693 // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
694 if(strstr($link_param,'@')) { // mailadr
695 $href = 'mailto:'.eregi_replace('^mailto:','',$link_param);
696 } elseif (substr($link_param,0,1)=='#') { // check if anchor
697 $href = $siteUrl.$link_param;
698 } else {
699 $fileChar=intval(strpos($link_param, '/'));
700 $urlChar=intval(strpos($link_param, '.'));
701
702 // Detects if a file is found in site-root OR is a simulateStaticDocument.
703 list($rootFileDat) = explode('?',$link_param);
704 $rFD_fI = pathinfo($rootFileDat);
705 if (trim($rootFileDat) && !strstr($link_param,'/') && (@is_file(PATH_site.$rootFileDat) || t3lib_div::inList('php,html,htm',strtolower($rFD_fI['extension'])))) {
706 $href = $siteUrl.$link_param;
707 } elseif($urlChar && (strstr($link_param,'//') || !$fileChar || $urlChar<$fileChar)) { // url (external): If doubleSlash or if a '.' comes before a '/'.
708 if (!ereg('^[a-z]*://',trim(strtolower($link_param)))) {$scheme='http://';} else {$scheme='';}
709 $href = $scheme.$link_param;
710 } elseif($fileChar) { // file (internal)
711 $href = $siteUrl.$link_param;
712 } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
713 $link_params_parts = explode('#',$link_param);
714 $idPart = trim($link_params_parts[0]); // Link-data del
715 if (!strcmp($idPart,'')) { $idPart=$this->recPid; } // If no id or alias is given, set it to class record pid
716
717 // FIXME commented because useless - what is it for?
718 // if ($link_params_parts[1] && !$sectionMark) {
719 // $sectionMark = '#'.trim($link_params_parts[1]);
720 // }
721
722 // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
723 $pairParts = t3lib_div::trimExplode(',',$idPart);
724 if (count($pairParts)>1) {
725 $idPart = $pairParts[0];
726 // Type ? future support for?
727 }
728 // Checking if the id-parameter is an alias.
729 if (!t3lib_div::testInt($idPart)) {
730 list($idPartR) = t3lib_BEfunc::getRecordsByField('pages','alias',$idPart);
731 $idPart = intval($idPartR['uid']);
732 }
733 $page = t3lib_BEfunc::getRecord('pages', $idPart);
734 if (is_array($page)) { // Page must exist...
735 $pairParts = t3lib_div::trimExplode(',',$link_param);
736 $href = $siteUrl.'?id='.$pairParts[0].($pairParts[2]?$pairParts[2]:'');
737 } else if(strtolower(substr($link_param, 0, 7)) == 'record:') {
738 // linkHandler - allowing links to start with "record:"
739 $href = $link_param;
740 } else {
741 #$href = '';
742 $href = $siteUrl.'?id='.$link_param;
743 $error = 'No page found: '.$idPart;
744 }
745 }
746 }
747
748 // Setting the A-tag:
749 $bTag = '<a href="'.htmlspecialchars($href).'"'.
750 ($tagCode[2]&&$tagCode[2]!='-' ? ' target="'.htmlspecialchars($tagCode[2]).'"' : '').
751 ($tagCode[3]&&$tagCode[3]!='-' ? ' class="'.htmlspecialchars($tagCode[3]).'"' : '').
752 ($tagCode[4] ? ' title="'.htmlspecialchars($tagCode[4]).'"' : '').
753 ($error ? ' rteerror="'.htmlspecialchars($error).'" style="background-color: yellow; border:2px red solid; color: black;"' : ''). // Should be OK to add the style; the transformation back to databsae will remove it...
754 '>';
755 $eTag = '</a>';
756 $blockSplit[$k] = $bTag.$this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
757 }
758 }
759
760 // Return content:
761 return implode('',$blockSplit);
762 }
763
764 /**
765 * Preserve special tags
766 *
767 * @param string Content input
768 * @return string Content output
769 */
770 function TS_preserve_db($value) {
771 if (!$this->preserveTags) return $value;
772
773 // Splitting into blocks for processing (span-tags are used for special tags)
774 $blockSplit = $this->splitIntoBlock('span',$value);
775 foreach($blockSplit as $k => $v) {
776 if ($k%2) { // block:
777 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
778 if ($attribArray['specialtag']) {
779 $theTag = rawurldecode($attribArray['specialtag']);
780 $theTagName = $this->getFirstTagName($theTag);
781 $blockSplit[$k] = $theTag.$this->removeFirstAndLastTag($blockSplit[$k]).'</'.$theTagName.'>';
782 }
783 }
784 }
785 return implode('',$blockSplit);
786 }
787
788 /**
789 * Preserve special tags
790 *
791 * @param string Content input
792 * @return string Content output
793 */
794 function TS_preserve_rte($value) {
795 if (!$this->preserveTags) return $value;
796
797 $blockSplit = $this->splitIntoBlock($this->preserveTags,$value);
798 foreach($blockSplit as $k => $v) {
799 if ($k%2) { // block:
800 $blockSplit[$k] = '<span specialtag="'.rawurlencode($this->getFirstTag($v)).'">'.$this->removeFirstAndLastTag($blockSplit[$k]).'</span>';
801 }
802 }
803 return implode('',$blockSplit);
804 }
805
806 /**
807 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
808 * Cleaning (->db) for standard content elements (ts)
809 *
810 * @param string Content input
811 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
812 * @return string Content output
813 * @see TS_transform_rte()
814 */
815 function TS_transform_db($value,$css=FALSE) {
816
817 // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
818 $this->TS_transform_db_safecounter--;
819 if ($this->TS_transform_db_safecounter<0) return $value;
820
821 // Split the content from RTE by the occurence of these blocks:
822 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$value);
823
824 $cc=0;
825 $aC = count($blockSplit);
826
827 // Avoid superfluous linebreaks by transform_db after ending headListTag
828 while($aC && !strcmp(trim($blockSplit[$aC-1]),'')) {
829 unset($blockSplit[$aC-1]);
830 $aC = count($blockSplit);
831 }
832
833 // Traverse the blocks
834 foreach($blockSplit as $k => $v) {
835 $cc++;
836 $lastBR = $cc==$aC ? '' : chr(10);
837
838 if ($k%2) { // Inside block:
839
840 // Init:
841 $tag=$this->getFirstTag($v);
842 $tagName=strtolower($this->getFirstTagName($v));
843
844 // Process based on the tag:
845 switch($tagName) {
846 case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
847 case 'dd' : // Do the same on dd elements
848 case 'div': // Do the same on div sections, if they were splitted
849 $blockSplit[$k]=$tag.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
850 break;
851 case 'ol':
852 case 'ul': // Transform lists into <typolist>-tags:
853 if (!$css) {
854 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
855 $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
856 while(list($k2)=each($parts)) {
857 $parts[$k2]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/','',$parts[$k2]); // remove all linesbreaks!
858 $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
859 $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
860 $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
861 }
862 if ($tagName=='ol') { $params=' type="1"'; } else { $params=''; }
863 $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
864 }
865 } else {
866 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
867 }
868 break;
869 case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
870 if (!$this->procOptions['preserveTables'] && !$css) {
871 $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
872 } else {
873 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
874 }
875 break;
876 case 'h1':
877 case 'h2':
878 case 'h3':
879 case 'h4':
880 case 'h5':
881 case 'h6':
882 if (!$css) {
883 $attribArray=$this->get_tag_attributes_classic($tag);
884 // Processing inner content here:
885 $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
886
887 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
888 $type = intval(substr($tagName,1));
889 $blockSplit[$k]='<typohead'.
890 ($type!=6?' type="'.$type.'"':'').
891 ($attribArray['align']?' align="'.$attribArray['align'].'"':'').
892 ($attribArray['class']?' class="'.$attribArray['class'].'"':'').
893 '>'.
894 $innerContent.
895 '</typohead>'.
896 $lastBR;
897 } else {
898 $blockSplit[$k]='<'.$tagName.
899 ($attribArray['align']?' align="'.htmlspecialchars($attribArray['align']).'"':'').
900 ($attribArray['class']?' class="'.htmlspecialchars($attribArray['class']).'"':'').
901 '>'.
902 $innerContent.
903 '</'.$tagName.'>'.
904 $lastBR;
905 }
906 } else {
907 // Eliminate true linebreaks inside Hx tags
908 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
909 }
910 break;
911 default:
912 // Eliminate true linebreaks inside other headlist tags and after hr tag
913 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
914 break;
915 }
916 } else { // NON-block:
917 if (strcmp(trim($blockSplit[$k]),'')) {
918 $blockSplit[$k]=$this->divideIntoLines(preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$blockSplit[$k])).$lastBR;
919 $blockSplit[$k]=$this->transformStyledATags($blockSplit[$k]);
920 } else unset($blockSplit[$k]);
921 }
922 }
923 $this->TS_transform_db_safecounter++;
924
925 return implode('',$blockSplit);
926 }
927
928 /**
929 * Wraps a-tags that contain a style attribute with a span-tag
930 *
931 * @param string Content input
932 * @return string Content output
933 */
934 function transformStyledATags($value) {
935 $blockSplit = $this->splitIntoBlock('A',$value);
936 foreach($blockSplit as $k => $v) {
937 if ($k%2) { // If an A-tag was found:
938 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
939 if ($attribArray['style']) { // If "style" attribute is set!
940 $attribArray_copy['style'] = $attribArray['style'];
941 unset($attribArray['style']);
942 $bTag='<span '.t3lib_div::implodeAttributes($attribArray_copy,1).'><a '.t3lib_div::implodeAttributes($attribArray,1).'>';
943 $eTag='</a></span>';
944 $blockSplit[$k] = $bTag.$this->removeFirstAndLastTag($blockSplit[$k]).$eTag;
945 }
946 }
947 }
948 return implode('',$blockSplit);
949 }
950
951 /**
952 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
953 * Set (->rte) for standard content elements (ts)
954 *
955 * @param string Content input
956 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
957 * @return string Content output
958 * @see TS_transform_db()
959 */
960 function TS_transform_rte($value,$css=0) {
961
962 // Split the content from Database by the occurence of these blocks:
963 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$value);
964
965 // Traverse the blocks
966 foreach($blockSplit as $k => $v) {
967 if ($k%2) { // Inside one of the blocks:
968
969 // Init:
970 $tag = $this->getFirstTag($v);
971 $tagName = strtolower($this->getFirstTagName($v));
972 $attribArray = $this->get_tag_attributes_classic($tag);
973
974 // Based on tagname, we do transformations:
975 switch($tagName) {
976 case 'blockquote': // Keep blockquotes
977 case 'dd': // Keep definitions
978 case 'div': // Keep div sections, if they were splitted
979 $blockSplit[$k] = $tag.
980 $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
981 '</'.$tagName.'>';
982 break;
983 case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
984 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
985 $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
986 $tListContent = ereg_replace('^[ ]*'.chr(10),'',$tListContent);
987 $tListContent = ereg_replace(chr(10).'[ ]*$','',$tListContent);
988 $lines = explode(chr(10),$tListContent);
989 $typ = $attribArray['type']==1 ? 'ol' : 'ul';
990 $blockSplit[$k] = '<'.$typ.'>'.chr(10).
991 '<li>'.implode('</li>'.chr(10).'<li>',$lines).'</li>'.
992 '</'.$typ.'>';
993 }
994 break;
995 case 'typohead': // Transform typohead into Hx tags.
996 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
997 $tC = $this->removeFirstAndLastTag($blockSplit[$k]);
998 $typ = t3lib_div::intInRange($attribArray['type'],0,6);
999 if (!$typ) $typ=6;
1000 $align = $attribArray['align']?' align="'.$attribArray['align'].'"': '';
1001 $class = $attribArray['class']?' class="'.$attribArray['class'].'"': '';
1002 $blockSplit[$k] = '<h'.$typ.$align.$class.'>'.
1003 $tC.
1004 '</h'.$typ.'>';
1005 }
1006 break;
1007 }
1008 $blockSplit[$k+1] = ereg_replace('^[ ]*'.chr(10),'',$blockSplit[$k+1]); // Removing linebreak if typohead
1009 } else { // NON-block:
1010 $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
1011 $singleLineBreak = $blockSplit[$k]==chr(10);
1012 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$nextFTN)) { // Removing linebreak if typolist/typohead
1013 $blockSplit[$k] = ereg_replace(chr(10).'[ ]*$','',$blockSplit[$k]);
1014 }
1015 // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
1016 if (!strcmp($blockSplit[$k],'') && !$singleLineBreak) {
1017 unset($blockSplit[$k]);
1018 } else {
1019 $blockSplit[$k] = $this->setDivTags($blockSplit[$k],($this->procOptions['useDIVasParagraphTagForRTE']?'div':'p'));
1020 }
1021 }
1022 }
1023 return implode(chr(10),$blockSplit);
1024 }
1025
1026 /**
1027 * Transformation handler: 'ts_strip' / direction: "db"
1028 * Removing all non-allowed tags
1029 *
1030 * @param string Content input
1031 * @return string Content output
1032 */
1033 function TS_strip_db($value) {
1034 $value = strip_tags($value,'<'.implode('><',explode(',','b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')).'>');
1035 return $value;
1036 }
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051 /***************************************************************
1052 *
1053 * Generic RTE transformation, analysis and helper functions
1054 *
1055 **************************************************************/
1056
1057 /**
1058 * Reads the file or url $url and returns the content
1059 *
1060 * @param string Filepath/URL to read
1061 * @return string The content from the resource given as input.
1062 * @see t3lib_div::getURL()
1063 */
1064 function getURL($url) {
1065 return t3lib_div::getURL($url);
1066 }
1067
1068 /**
1069 * Function for cleaning content going into the database.
1070 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
1071 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
1072 *
1073 * @param string Content to clean up
1074 * @param string Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
1075 * @return string Clean content
1076 * @see getKeepTags()
1077 */
1078 function HTMLcleaner_db($content,$tagList='') {
1079 if (!$tagList) {
1080 $keepTags = $this->getKeepTags('db');
1081 } else {
1082 $keepTags = $this->getKeepTags('db',$tagList);
1083 }
1084 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
1085 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
1086
1087 // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
1088 $addConfig=array();
1089 if ((is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning'])) {
1090 $addConfig['xhtml']=1;
1091 }
1092
1093 return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC,$addConfig);
1094 }
1095
1096 /**
1097 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
1098 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
1099 *
1100 * @param string The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
1101 * @param string Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
1102 * @return array Configuration array
1103 * @see HTMLcleaner_db()
1104 */
1105 function getKeepTags($direction='rte',$tagList='') {
1106 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1107
1108 // Setting up allowed tags:
1109 if (strcmp($tagList,'')) { // If the $tagList input var is set, this will take precedence
1110 $keepTags = array_flip(t3lib_div::trimExplode(',',$tagList,1));
1111 } else { // Default is to get allowed/denied tags from internal array of processing options:
1112 // Construct default list of tags to keep:
1113 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1114 $keepTags = array_flip(t3lib_div::trimExplode(',',$typoScript_list.','.strtolower($this->procOptions['allowTags']),1));
1115
1116 // For tags to deny, remove them from $keepTags array:
1117 $denyTags = t3lib_div::trimExplode(',',$this->procOptions['denyTags'],1);
1118 foreach($denyTags as $dKe) {
1119 unset($keepTags[$dKe]);
1120 }
1121 }
1122
1123 // Based on the direction of content, set further options:
1124 switch ($direction) {
1125
1126 // GOING from database to Rich Text Editor:
1127 case 'rte':
1128
1129 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1130 // Transform bold/italics tags to strong/em
1131 if (isset($keepTags['b'])) {$keepTags['b']=array('remap'=>'STRONG');}
1132 if (isset($keepTags['i'])) {$keepTags['i']=array('remap'=>'EM');}
1133 }
1134
1135 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1136 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
1137 break;
1138
1139 // GOING from RTE to database:
1140 case 'db':
1141
1142 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1143 // Transform strong/em back to bold/italics:
1144 if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
1145 if (isset($keepTags['em'])) { $keepTags['em']=array('remap'=>'i'); }
1146 }
1147
1148 // Setting up span tags if they are allowed:
1149 if (isset($keepTags['span'])) {
1150 $classes=array_merge(array(''),$this->allowedClasses);
1151 $keepTags['span']=array(
1152 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir',
1153 'fixAttrib' => Array(
1154 'class' => Array (
1155 'list' => $classes,
1156 'removeIfFalse' => 1
1157 ),
1158 ),
1159 'rmTagIfNoAttrib' => 1
1160 );
1161 if (!$this->procOptions['allowedClasses']) unset($keepTags['span']['fixAttrib']['class']['list']);
1162 }
1163
1164 // Setting up font tags if they are allowed:
1165 if (isset($keepTags['font'])) {
1166 $colors=array_merge(array(''),t3lib_div::trimExplode(',',$this->procOptions['allowedFontColors'],1));
1167 $keepTags['font']=array(
1168 'allowedAttribs'=>'face,color,size',
1169 'fixAttrib' => Array(
1170 'face' => Array (
1171 'removeIfFalse' => 1
1172 ),
1173 'color' => Array (
1174 'removeIfFalse' => 1,
1175 'list'=>$colors
1176 ),
1177 'size' => Array (
1178 'removeIfFalse' => 1,
1179 )
1180 ),
1181 'rmTagIfNoAttrib' => 1
1182 );
1183 if (!$this->procOptions['allowedFontColors']) unset($keepTags['font']['fixAttrib']['color']['list']);
1184 }
1185
1186 // Setting further options, getting them from the processiong options:
1187 $TSc = $this->procOptions['HTMLparser_db.'];
1188 if (!$TSc['globalNesting']) $TSc['globalNesting']='b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1189 if (!$TSc['noAttrib']) $TSc['noAttrib']='b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1190
1191 // Transforming the array from TypoScript to regular array:
1192 list($keepTags) = $this->HTMLparserConfig($TSc,$keepTags);
1193 break;
1194 }
1195
1196 // Caching (internally, in object memory) the result unless tagList is set:
1197 if (!$tagList) {
1198 $this->getKeepTags_cache[$direction] = $keepTags;
1199 } else {
1200 return $keepTags;
1201 }
1202 }
1203
1204 // Return result:
1205 return $this->getKeepTags_cache[$direction];
1206 }
1207
1208 /**
1209 * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by chr(10).
1210 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1211 * The function ->setDivTags does the opposite.
1212 * This function processes content to go into the database.
1213 *
1214 * @param string Value to process.
1215 * @param integer Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1216 * @param boolean If true, an array with the lines is returned, otherwise a string of the processed input value.
1217 * @return string Processed input value.
1218 * @see setDivTags()
1219 */
1220 function divideIntoLines($value,$count=5,$returnArray=FALSE) {
1221
1222 // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
1223 if ($this->procOptions['internalizeFontTags']) {$value = $this->internalizeFontTags($value);}
1224
1225 // Setting configuration for processing:
1226 $allowTagsOutside = t3lib_div::trimExplode(',',strtolower($this->procOptions['allowTagsOutside']?$this->procOptions['allowTagsOutside']:'img'),1);
1227 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1228 $divSplit = $this->splitIntoBlock('div,p',$value,1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1229
1230 if ($this->procOptions['keepPDIVattribs']) {
1231 $keepAttribListArr = t3lib_div::trimExplode(',',strtolower($this->procOptions['keepPDIVattribs']),1);
1232 } else {
1233 $keepAttribListArr = array();
1234 }
1235
1236 // Returns plainly the value if there was no div/p sections in it
1237 if (count($divSplit)<=1 || $count<=0) {
1238 return $value;
1239 }
1240
1241 // Traverse the splitted sections:
1242 foreach($divSplit as $k => $v) {
1243 if ($k%2) { // Inside
1244 $v=$this->removeFirstAndLastTag($v);
1245
1246 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1247 $subLines = $this->divideIntoLines($v,$count-1,1);
1248 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1249 // No noting.
1250 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1251 $subLines = array($subLines);
1252 if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1253 $subLines = spliti('<br[[:space:]]*[\/]?>',$v);
1254 }
1255
1256 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1257 reset($subLines);
1258 while(list($sk)=each($subLines)) {
1259
1260 // Clear up the subline for DB.
1261 $subLines[$sk]=$this->HTMLcleaner_db($subLines[$sk]);
1262
1263 // Get first tag, attributes etc:
1264 $fTag = $this->getFirstTag($divSplit[$k]);
1265 $tagName=strtolower($this->getFirstTagName($divSplit[$k]));
1266 $attribs=$this->get_tag_attributes($fTag);
1267
1268 // Keep attributes (lowercase)
1269 $newAttribs=array();
1270 if (count($keepAttribListArr)) {
1271 foreach($keepAttribListArr as $keepA) {
1272 if (isset($attribs[0][$keepA])) { $newAttribs[$keepA] = $attribs[0][$keepA]; }
1273 }
1274 }
1275
1276 // ALIGN attribute:
1277 if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']),'') && strtolower($attribs[0]['align'])!='left') { // Set to value, but not 'left'
1278 $newAttribs['align']=strtolower($attribs[0]['align']);
1279 }
1280
1281 // CLASS attribute:
1282 if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']),'')) { // Set to whatever value
1283 if (!count($this->allowedClasses) || in_array(strtoupper($attribs[0]['class']),$this->allowedClasses)) {
1284 $newAttribs['class']=$attribs[0]['class'];
1285 }
1286 }
1287
1288 // Remove any line break char (10 or 13)
1289 $subLines[$sk]=ereg_replace(chr(10).'|'.chr(13),'',$subLines[$sk]);
1290
1291 // If there are any attributes or if we are supposed to remap the tag, then do so:
1292 if (count($newAttribs) && strcmp($remapParagraphTag,'1')) {
1293 if ($remapParagraphTag=='P') $tagName='p';
1294 if ($remapParagraphTag=='DIV') $tagName='div';
1295 $subLines[$sk]='<'.trim($tagName.' '.$this->compileTagAttribs($newAttribs)).'>'.$subLines[$sk].'</'.$tagName.'>';
1296 }
1297 }
1298 }
1299 // Add the processed line(s)
1300 $divSplit[$k] = implode(chr(10),$subLines);
1301
1302 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1303 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1304 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1305 if (trim(strip_tags($divSplit[$k]))=='&nbsp;' && !preg_match('/\<(img)(\s[^>]*)?\/?>/si', $divSplit[$k]) && !preg_match('/\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1306 $divSplit[$k]='';
1307 }
1308 } else { // outside div:
1309 // Remove positions which are outside div/p tags and without content
1310 $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
1311 if (!strcmp($divSplit[$k],'')) unset($divSplit[$k]); // Remove part if it's empty
1312 }
1313 }
1314
1315 // Return value:
1316 return $returnArray ? $divSplit : implode(chr(10),$divSplit);
1317 }
1318
1319 /**
1320 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1321 * For processing of content going FROM database TO RTE.
1322 *
1323 * @param string Value to convert
1324 * @param string Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1325 * @return string Processed value.
1326 * @see divideIntoLines()
1327 */
1328 function setDivTags($value,$dT='p') {
1329
1330 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1331 $keepTags = $this->getKeepTags('rte');
1332 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
1333 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
1334 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte']?1:0;
1335
1336 // Divide the content into lines, based on chr(10):
1337 $parts = explode(chr(10),$value);
1338 foreach($parts as $k => $v) {
1339
1340 // Processing of line content:
1341 if (!strcmp(trim($parts[$k]),'')) { // If the line is blank, set it to &nbsp;
1342 $parts[$k]='&nbsp;';
1343 } else { // Clean the line content:
1344 $parts[$k]=$this->HTMLcleaner($parts[$k],$keepTags,$kUknown,$hSC);
1345 if ($convNBSP) $parts[$k]=str_replace('&amp;nbsp;','&nbsp;',$parts[$k]);
1346 }
1347
1348 // Wrapping the line in <$dT> is not already wrapped:
1349 $testStr = strtolower(trim($parts[$k]));
1350 if (substr($testStr,0,4)!='<div' || substr($testStr,-6)!='</div>') {
1351 if (substr($testStr,0,2)!='<p' || substr($testStr,-4)!='</p>') {
1352 // Only set p-tags if there is not already div or p tags:
1353 $parts[$k]='<'.$dT.'>'.$parts[$k].'</'.$dT.'>';
1354 }
1355 }
1356 }
1357
1358 // Implode result:
1359 return implode(chr(10),$parts);
1360 }
1361
1362 /**
1363 * This splits the $value in font-tag chunks.
1364 * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
1365 * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
1366 * In that case the font-tags are normally on the OUTSIDE of the sections.
1367 * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
1368 *
1369 * @param string Input content
1370 * @return string Output content
1371 * @see divideIntoLines()
1372 */
1373 function internalizeFontTags($value) {
1374
1375 // Splitting into font tag blocks:
1376 $fontSplit = $this->splitIntoBlock('font',$value);
1377
1378 foreach($fontSplit as $k => $v) {
1379 if ($k%2) { // Inside
1380 $fTag = $this->getFirstTag($v); // Fint font-tag
1381
1382 $divSplit_sub = $this->splitIntoBlock('div,p',$this->removeFirstAndLastTag($v),1);
1383 if (count($divSplit_sub)>1) { // If there were div/p sections inside the font-tag, do something about it...
1384 // traverse those sections:
1385 foreach($divSplit_sub as $k2 => $v2) {
1386 if ($k2%2) { // Inside
1387 $div_p = $this->getFirstTag($v2); // Fint font-tag
1388 $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
1389 $v2=$this->removeFirstAndLastTag($v2); // ... and remove it from original.
1390 $divSplit_sub[$k2]=$div_p.$fTag.$v2.'</font>'.'</'.$div_p_tagname.'>';
1391 } elseif (trim(strip_tags($v2))) {
1392 $divSplit_sub[$k2]=$fTag.$v2.'</font>';
1393 }
1394 }
1395 $fontSplit[$k]=implode('',$divSplit_sub);
1396 }
1397 }
1398 }
1399
1400 return implode('',$fontSplit);
1401 }
1402
1403 /**
1404 * Returns SiteURL based on thisScript.
1405 *
1406 * @return string Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1407 * @see t3lib_div::getIndpEnv()
1408 */
1409 function siteUrl() {
1410 return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1411 }
1412
1413 /**
1414 * Return the storage folder of RTE image files.
1415 * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
1416 *
1417 * @return string
1418 */
1419 function rteImageStorageDir() {
1420 return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
1421 }
1422
1423 /**
1424 * Remove all tables from incoming code
1425 * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
1426 *
1427 * @param string Input value
1428 * @param string Break character to use for linebreaks.
1429 * @return string Output value
1430 */
1431 function removeTables($value,$breakChar='<br />') {
1432
1433 // Splitting value into table blocks:
1434 $tableSplit = $this->splitIntoBlock('table',$value);
1435
1436 // Traverse blocks of tables:
1437 foreach($tableSplit as $k => $v) {
1438 if ($k%2) {
1439 $tableSplit[$k]='';
1440 $rowSplit = $this->splitIntoBlock('tr',$v);
1441 foreach($rowSplit as $k2 => $v2) {
1442 if ($k2%2) {
1443 $cellSplit = $this->getAllParts($this->splitIntoBlock('td',$v2),1,0);
1444 foreach($cellSplit as $k3 => $v3) {
1445 $tableSplit[$k].=$v3.$breakChar;
1446 }
1447 }
1448 }
1449 }
1450 }
1451
1452 // Implode it all again:
1453 return implode($breakChar,$tableSplit);
1454 }
1455
1456 /**
1457 * Default tag mapping for TS
1458 *
1459 * @param string Input code to process
1460 * @param string Direction To databsae (db) or from database to RTE (rte)
1461 * @return string Processed value
1462 */
1463 function defaultTStagMapping($code,$direction='rte') {
1464 if ($direction=='db') {
1465 $code=$this->mapTags($code,array( // Map tags
1466 'strong' => 'b',
1467 'em' => 'i'
1468 ));
1469 }
1470 if ($direction=='rte') {
1471 $code=$this->mapTags($code,array( // Map tags
1472 'b' => 'strong',
1473 'i' => 'em'
1474 ));
1475 }
1476 return $code;
1477 }
1478
1479 /**
1480 * Finds width and height from attrib-array
1481 * If the width and height is found in the style-attribute, use that!
1482 *
1483 * @param array Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1484 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1485 */
1486 function getWHFromAttribs($attribArray) {
1487 $style =trim($attribArray['style']);
1488 if ($style) {
1489 $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1490 // Width
1491 $reg = array();
1492 eregi('width'.$regex,$style,$reg);
1493 $w = intval($reg[1]);
1494 // Height
1495 eregi('height'.$regex,$style,$reg);
1496 $h = intval($reg[1]);
1497 }
1498 if (!$w) {
1499 $w = $attribArray['width'];
1500 }
1501 if (!$h) {
1502 $h = $attribArray['height'];
1503 }
1504 return array(intval($w),intval($h));
1505 }
1506
1507 /**
1508 * Parse <A>-tag href and return status of email,external,file or page
1509 *
1510 * @param string URL to analyse.
1511 * @return array Information in an array about the URL
1512 */
1513 function urlInfoForLinkTags($url) {
1514 $info = array();
1515 $url = trim($url);
1516 if (substr(strtolower($url),0,7)=='mailto:') {
1517 $info['url']=trim(substr($url,7));
1518 $info['type']='email';
1519 } else {
1520 $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
1521 for($a=0;$a<strlen($url);$a++) {
1522 if ($url{$a}!=$curURL{$a}) {
1523 break;
1524 }
1525 }
1526
1527 $info['relScriptPath']=substr($curURL,$a);
1528 $info['relUrl']=substr($url,$a);
1529 $info['url']=$url;
1530 $info['type']='ext';
1531
1532 $siteUrl_parts = parse_url($url);
1533 $curUrl_parts = parse_url($curURL);
1534
1535 if ($siteUrl_parts['host']==$curUrl_parts['host'] // Hosts should match
1536 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'],0,strlen(TYPO3_mainDir))==TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
1537
1538 // New processing order 100502
1539 $uP=parse_url($info['relUrl']);
1540
1541 if (!strcmp('#'.$siteUrl_parts['fragment'],$info['relUrl'])) {
1542 $info['url']=$info['relUrl'];
1543 $info['type']='anchor';
1544 } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php')) {
1545 $pp = explode('id=',$uP['query']);
1546 $parameters = explode('&', $pp[1]);
1547 $id = array_shift($parameters);
1548 if ($id) {
1549 $info['pageid']=$id;
1550 $info['cElement']=$uP['fragment'];
1551 $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
1552 $info['type']='page';
1553 $info['query'] = $parameters[0]?'&'.implode('&', $parameters):'';
1554 }
1555 } else {
1556 $info['url']=$info['relUrl'];
1557 $info['type']='file';
1558 }
1559 } else {
1560 unset($info['relScriptPath']);
1561 unset($info['relUrl']);
1562 }
1563 }
1564 return $info;
1565 }
1566
1567 /**
1568 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1569 *
1570 * @param string Content input
1571 * @param boolean If true, then the "rtekeep" attribute will not be set.
1572 * @return string Content output
1573 */
1574 function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE) {
1575 $blockSplit = $this->splitIntoBlock('A',$value);
1576 reset($blockSplit);
1577 while(list($k,$v)=each($blockSplit)) {
1578 if ($k%2) { // block:
1579 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
1580
1581 // Checking if there is a scheme, and if not, prepend the current url.
1582 if (strlen($attribArray['href'])) { // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1583 $uP = parse_url(strtolower($attribArray['href']));
1584 if (!$uP['scheme']) {
1585 $attribArray['href'] = $this->siteUrl().substr($attribArray['href'],strlen($this->relBackPath));
1586 }
1587 } else {
1588 $attribArray['rtekeep'] = 1;
1589 }
1590 if (!$dontSetRTEKEEP) $attribArray['rtekeep'] = 1;
1591
1592 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
1593 $eTag='</a>';
1594 $blockSplit[$k] = $bTag.$this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
1595 }
1596 }
1597 return implode('',$blockSplit);
1598 }
1599 }
1600
1601
1602 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']) {
1603 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
1604 }
1605
1606 ?>