Updating version number to 4.2-dev after release of 4.2.0beta2a
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2006 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 December/2003 by Kasper Skaarhoj
32 * XHTML compatible.
33 *
34 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
35 * @internal
36 */
37 /**
38 * [CLASS/FUNCTION INDEX of SCRIPT]
39 *
40 *
41 *
42 * 103: class t3lib_parsehtml_proc extends t3lib_parsehtml
43 * 138: function init($elRef='',$recPid=0)
44 * 150: function setRelPath($path)
45 * 174: function evalWriteFile($pArr,$currentRecord)
46 *
47 * SECTION: Main function
48 * 232: function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
49 *
50 * SECTION: Specific RTE TRANSFORMATION functions
51 * 398: function TS_images_db($value)
52 * 550: function TS_images_rte($value)
53 * 589: function TS_reglinks($value,$direction)
54 * 626: function TS_links_db($value)
55 * 675: function TS_links_rte($value)
56 * 760: function TS_preserve_db($value)
57 * 784: function TS_preserve_rte($value)
58 * 805: function TS_transform_db($value,$css=FALSE)
59 * 922: function transformStyledATags($value)
60 * 948: function TS_transform_rte($value,$css=0)
61 * 1019: function TS_strip_db($value)
62 *
63 * SECTION: Generic RTE transformation, analysis and helper functions
64 * 1050: function getURL($url)
65 * 1064: function HTMLcleaner_db($content,$tagList='')
66 * 1091: function getKeepTags($direction='rte',$tagList='')
67 * 1200: function divideIntoLines($value,$count=5,$returnArray=FALSE)
68 * 1304: function setDivTags($value,$dT='p')
69 * 1349: function internalizeFontTags($value)
70 * 1385: function siteUrl()
71 * 1395: function rteImageStorageDir()
72 * 1407: function removeTables($value,$breakChar='<br />')
73 * 1439: function defaultTStagMapping($code,$direction='rte')
74 * 1462: function getWHFromAttribs($attribArray)
75 * 1489: function urlInfoForLinkTags($url)
76 * 1548: function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
77 *
78 * TOTAL FUNCTIONS: 28
79 * (This index is automatically created/updated by the extension "extdeveval")
80 *
81 */
82
83 require_once (PATH_t3lib.'class.t3lib_parsehtml.php');
84
85
86
87
88
89
90
91
92
93
94
95
96 /**
97 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
98 *
99 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
100 * @package TYPO3
101 * @subpackage t3lib
102 */
103 class t3lib_parsehtml_proc extends t3lib_parsehtml {
104
105 // Static:
106 var $headListTags = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,HR,ADDRESS,DL,DD'; // List of tags for these elements
107
108 // Internal, static:
109 var $recPid = 0; // Set this to the pid of the record manipulated by the class.
110 var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
111 var $relPath=''; // Relative path
112 var $relBackPath=''; // Relative back-path
113 var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
114
115 // Internal, dynamic
116 var $TS_transform_db_safecounter=100; // Run-away brake for recursive calls.
117 var $rte_p=''; // Parameters from TCA types configuration related to the RTE
118 var $getKeepTags_cache=array(); // Data caching for processing function
119 var $allowedClasses=array(); // Storage of the allowed CSS class names in the RTE
120 var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
121
122
123
124
125
126
127
128
129
130
131 /**
132 * Initialize, setting element reference and record PID
133 *
134 * @param string Element reference, eg "tt_content:bodytext"
135 * @param integer PID of the record (page id)
136 * @return void
137 */
138 function init($elRef='',$recPid=0) {
139 $this->recPid = $recPid;
140 $this->elRef = $elRef;
141 }
142
143 /**
144 * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
145 * This is used when editing files with the RTE
146 *
147 * @param string The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
148 * @return void There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
149 */
150 function setRelPath($path) {
151 $path = trim($path);
152 $path = ereg_replace('^/','',$path);
153 $path = ereg_replace('/$','',$path);
154 if ($path) {
155 $this->relPath = $path;
156 $this->relBackPath = '';
157 $partsC=count(explode('/',$this->relPath));
158 for ($a=0;$a<$partsC;$a++) {
159 $this->relBackPath.='../';
160 }
161 $this->relPath.='/';
162 }
163 }
164
165 /**
166 * Evaluate the environment for editing a staticFileEdit file.
167 * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
168 *
169 * @param array Parameters for the current field as found in types-config
170 * @param array Current record we are editing.
171 * @return mixed On success an array with various information is returned, otherwise a string with an error message
172 * @see t3lib_TCEmain, t3lib_transferData
173 */
174 function evalWriteFile($pArr,$currentRecord) {
175
176 // Write file configuration:
177 if (is_array($pArr)) {
178 if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
179 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'],-1)=='/'
180 && @is_dir(PATH_site.$GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
181
182 $SW_p = $pArr['parameters'];
183 $SW_editFileField = trim($SW_p[0]);
184 $SW_editFile = $currentRecord[$SW_editFileField];
185 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
186 $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'].$SW_editFile;
187 $SW_editFile = PATH_site.$SW_relpath;
188 if (@is_file($SW_editFile)) {
189 return array(
190 'editFile' => $SW_editFile,
191 'relEditFile' => $SW_relpath,
192 'contentField' => trim($SW_p[1]),
193 'markerField' => trim($SW_p[2]),
194 'loadFromFileField' => trim($SW_p[3]),
195 'statusField' => trim($SW_p[4])
196 );
197 } else return "ERROR: Editfile '".$SW_relpath."' did not exist";
198 } else return "ERROR: Edit file name could not be found or was bad.";
199 } else return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
200 }
201 }
202
203
204
205
206
207
208
209
210
211
212
213
214
215 /**********************************************
216 *
217 * Main function
218 *
219 **********************************************/
220
221 /**
222 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
223 * This is the main function called from tcemain and transfer data classes
224 *
225 * @param string Input value
226 * @param array Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
227 * @param string Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
228 * @param array Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
229 * @return string Output value
230 * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
231 */
232 function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array()) {
233
234 // Init:
235 $this->procOptions = $thisConfig['proc.'];
236 $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
237
238 // Get parameters for rte_transformation:
239 $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
240
241 // Setting modes:
242 if (strcmp($this->procOptions['overruleMode'],'')) {
243 $modes = array_unique(t3lib_div::trimExplode(',',$this->procOptions['overruleMode']));
244 } else {
245 $modes = array_unique(t3lib_div::trimExplode('-',$p['mode']));
246 }
247 $revmodes = array_flip($modes);
248
249 // Find special modes and extract them:
250 if (isset($revmodes['ts'])) {
251 $modes[$revmodes['ts']] = 'ts_transform,ts_preserve,ts_images,ts_links';
252 }
253 // Find special modes and extract them:
254 if (isset($revmodes['ts_css'])) {
255 $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
256 }
257
258 // Make list unique
259 $modes = array_unique(t3lib_div::trimExplode(',',implode(',',$modes),1));
260
261 // Reverse order if direction is "rte"
262 if ($direction=='rte') {
263 $modes = array_reverse($modes);
264 }
265
266 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
267 $entry_HTMLparser = $this->procOptions['entryHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_'.$direction.'.']) : '';
268 $exit_HTMLparser = $this->procOptions['exitHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_'.$direction.'.']) : '';
269
270 // Line breaks of content is unified into char-10 only (removing char 13)
271 if (!$this->procOptions['disableUnifyLineBreaks']) {
272 $value = str_replace(chr(13).chr(10),chr(10),$value);
273 }
274
275 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
276 if (is_array($entry_HTMLparser)) {
277 $value = $this->HTMLcleaner($value,$entry_HTMLparser[0],$entry_HTMLparser[1],$entry_HTMLparser[2],$entry_HTMLparser[3]);
278 }
279
280 // Traverse modes:
281 foreach($modes as $cmd) {
282 // ->DB
283 if ($direction=='db') {
284 // Checking for user defined transformation:
285 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
286 $_procObj = &t3lib_div::getUserObj($_classRef);
287 $_procObj->pObj = &$this;
288 $_procObj->transformationKey = $cmd;
289 $value = $_procObj->transform_db($value,$this);
290 } else { // ... else use defaults:
291 switch($cmd) {
292 case 'ts_images':
293 $value = $this->TS_images_db($value);
294 break;
295 case 'ts_reglinks':
296 $value = $this->TS_reglinks($value,'db');
297 break;
298 case 'ts_links':
299 $value = $this->TS_links_db($value);
300 break;
301 case 'ts_preserve':
302 $value = $this->TS_preserve_db($value);
303 break;
304 case 'ts_transform':
305 case 'css_transform':
306 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
307 $this->allowedClasses = t3lib_div::trimExplode(',',strtoupper($this->procOptions['allowedClasses']),1);
308 $value = $this->TS_transform_db($value,$cmd=='css_transform');
309 break;
310 case 'ts_strip':
311 $value = $this->TS_strip_db($value);
312 break;
313 default:
314 break;
315 }
316 }
317 }
318 // ->RTE
319 if ($direction=='rte') {
320 // Checking for user defined transformation:
321 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
322 $_procObj = &t3lib_div::getUserObj($_classRef);
323 $_procObj->pObj = &$this;
324 $value = $_procObj->transform_rte($value,$this);
325 } else { // ... else use defaults:
326 switch($cmd) {
327 case 'ts_images':
328 $value = $this->TS_images_rte($value);
329 break;
330 case 'ts_reglinks':
331 $value = $this->TS_reglinks($value,'rte');
332 break;
333 case 'ts_links':
334 $value = $this->TS_links_rte($value);
335 break;
336 case 'ts_preserve':
337 $value = $this->TS_preserve_rte($value);
338 break;
339 case 'ts_transform':
340 case 'css_transform':
341 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
342 $value = $this->TS_transform_rte($value,$cmd=='css_transform');
343 break;
344 default:
345 break;
346 }
347 }
348 }
349 }
350
351 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
352 if (is_array($exit_HTMLparser)) {
353 $value = $this->HTMLcleaner($value,$exit_HTMLparser[0],$exit_HTMLparser[1],$exit_HTMLparser[2],$exit_HTMLparser[3]);
354 }
355
356 // Final clean up of linebreaks:
357 if (!$this->procOptions['disableUnifyLineBreaks']) {
358 $value = str_replace(chr(13).chr(10),chr(10),$value); // Make sure no \r\n sequences has entered in the meantime...
359 $value = str_replace(chr(10),chr(13).chr(10),$value); // ... and then change all \n into \r\n
360 }
361
362 // Return value:
363 return $value;
364 }
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381 /************************************
382 *
383 * Specific RTE TRANSFORMATION functions
384 *
385 *************************************/
386
387 /**
388 * Transformation handler: 'ts_images' / direction: "db"
389 * Processing images inserted in the RTE.
390 * This is used when content goes from the RTE to the database.
391 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
392 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
393 * Also "magic" images are processed here.
394 *
395 * @param string The content from RTE going to Database
396 * @return string Processed content
397 */
398 function TS_images_db($value) {
399
400 // Split content by <img> tags and traverse the resulting array for processing:
401 $imgSplit = $this->splitTags('img',$value);
402 foreach($imgSplit as $k => $v) {
403 if ($k%2) { // image found, do processing:
404
405 // Init
406 $attribArray = $this->get_tag_attributes_classic($v,1);
407 $siteUrl = $this->siteUrl();
408 $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
409
410 $absRef = trim($attribArray['src']); // It's always a absolute URL coming from the RTE into the Database.
411
412 // make path absolute if it is relative and we have a site path wich is not '/'
413 $pI=pathinfo($absRef);
414 if($sitePath AND !$pI['scheme'] && t3lib_div::isFirstPartOfStr($absRef,$sitePath)) {
415 // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
416 $absRef = substr($absRef,strlen($sitePath));
417 $absRef = $siteUrl.$absRef;
418 }
419
420 // External image from another URL? In that case, fetch image (unless disabled feature).
421 if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
422 $externalFile = $this->getUrl($absRef); // Get it
423 if ($externalFile) {
424 $pU = parse_url($absRef);
425 $pI=pathinfo($pU['path']);
426
427 if (t3lib_div::inList('gif,png,jpeg,jpg',strtolower($pI['extension']))) {
428 $filename = t3lib_div::shortMD5($absRef).'.'.$pI['extension'];
429 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
430 $C_origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
431 if (!@is_file($origFilePath)) {
432 t3lib_div::writeFile($origFilePath,$externalFile);
433 t3lib_div::writeFile($C_origFilePath,$externalFile);
434 }
435 $absRef = $siteUrl.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
436
437 $attribArray['src']=$absRef;
438 $params = t3lib_div::implodeAttributes($attribArray,1);
439 $imgSplit[$k] = '<img '.$params.' />';
440 }
441 }
442 }
443
444 // Check image as local file (siteURL equals the one of the image)
445 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
446 $path = rawurldecode(substr($absRef,strlen($siteUrl))); // Rel-path, rawurldecoded for special characters.
447 $filepath = t3lib_div::getFileAbsFileName($path); // Abs filepath, locked to relative path of this project.
448
449 // Check file existence (in relative dir to this installation!)
450 if ($filepath && @is_file($filepath)) {
451
452 // If "magic image":
453 $pathPre=$this->rteImageStorageDir().'RTEmagicC_';
454 if (t3lib_div::isFirstPartOfStr($path,$pathPre)) {
455 // Find original file:
456 $pI=pathinfo(substr($path,strlen($pathPre)));
457 $filename = substr($pI['basename'],0,-strlen('.'.$pI['extension']));
458 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
459 if (@is_file($origFilePath)) {
460 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
461 $imgObj->init();
462 $imgObj->mayScaleUp=0;
463 $imgObj->tempPath=PATH_site.$imgObj->tempPath;
464
465 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
466 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
467 // Compare dimensions:
468 if ($curWH[0]!=$curInfo[0] || $curWH[1]!=$curInfo[1]) {
469 $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
470 $cW = $curWH[0];
471 $cH = $curWH[1];
472 $cH = 1000; // Make the image based on the width solely...
473 $imgI = $imgObj->imageMagickConvert($origFilePath,$pI['extension'],$cW.'m',$cH.'m');
474 if ($imgI[3]) {
475 $fI=pathinfo($imgI[3]);
476 @copy($imgI[3],$filepath); // Override the child file
477 // Removing width and heigth form style attribute
478 $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
479 $attribArray['width']=$imgI[0];
480 $attribArray['height']=$imgI[1];
481 $params = t3lib_div::implodeAttributes($attribArray,1);
482 $imgSplit[$k]='<img '.$params.' />';
483 }
484 }
485 }
486
487 } elseif ($this->procOptions['plainImageMode']) { // If "plain image" has been configured:
488
489 // Image dimensions as set in the image tag, if any
490 $curWH = $this->getWHFromAttribs($attribArray);
491 if ($curWH[0]) $attribArray['width'] = $curWH[0];
492 if ($curWH[1]) $attribArray['height'] = $curWH[1];
493
494 // Removing width and heigth form style attribute
495 $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
496
497 // Finding dimensions of image file:
498 $fI = @getimagesize($filepath);
499
500 // Perform corrections to aspect ratio based on configuration:
501 switch((string)$this->procOptions['plainImageMode']) {
502 case 'lockDimensions':
503 $attribArray['width']=$fI[0];
504 $attribArray['height']=$fI[1];
505 break;
506 case 'lockRatioWhenSmaller': // If the ratio has to be smaller, then first set the width...:
507 if ($attribArray['width']>$fI[0]) $attribArray['width'] = $fI[0];
508 case 'lockRatio':
509 if ($fI[0]>0) {
510 $attribArray['height']=round($attribArray['width']*($fI[1]/$fI[0]));
511 }
512 break;
513 }
514
515 // Compile the image tag again:
516 $params = t3lib_div::implodeAttributes($attribArray,1);
517 $imgSplit[$k]='<img '.$params.' />';
518 }
519 } else { // Remove image if it was not found in a proper position on the server!
520
521 // Commented out; removing the image tag might not be that logical...
522 #$imgSplit[$k]='';
523 }
524 }
525
526 // Convert abs to rel url
527 if ($imgSplit[$k]) {
528 $attribArray=$this->get_tag_attributes_classic($imgSplit[$k],1);
529 $absRef = trim($attribArray['src']);
530 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
531 $attribArray['src'] = $this->relBackPath.substr($absRef,strlen($siteUrl));
532 if (!isset($attribArray['alt'])) $attribArray['alt']=''; // Must have alt-attribute for XHTML compliance.
533 $imgSplit[$k]='<img '.t3lib_div::implodeAttributes($attribArray,1,1).' />';
534 }
535 }
536 }
537 }
538 return implode('',$imgSplit);
539 }
540
541 /**
542 * Transformation handler: 'ts_images' / direction: "rte"
543 * Processing images from database content going into the RTE.
544 * Processing includes converting the src attribute to an absolute URL.
545 *
546 * @param string Content input
547 * @return string Content output
548 */
549 function TS_images_rte($value) {
550
551 $siteUrl = $this->siteUrl();
552 $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
553
554 // Split content by <img> tags and traverse the resulting array for processing:
555 $imgSplit = $this->splitTags('img',$value);
556 foreach($imgSplit as $k => $v) {
557 if ($k%2) { // image found:
558
559 // Init
560 $attribArray=$this->get_tag_attributes_classic($v,1);
561 $absRef = trim($attribArray['src']);
562
563 // Unless the src attribute is already pointing to an external URL:
564 if (strtolower(substr($absRef,0,4))!='http') {
565 $attribArray['src'] = substr($attribArray['src'],strlen($this->relBackPath));
566 // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
567 $attribArray['src'] = preg_replace('#^'.preg_quote($sitePath,'#').'#','',$attribArray['src']);
568 $attribArray['src'] = $siteUrl.$attribArray['src'];
569 if (!isset($attribArray['alt'])) $attribArray['alt']='';
570 $params = t3lib_div::implodeAttributes($attribArray);
571 $imgSplit[$k]='<img '.$params.' />';
572 }
573 }
574 }
575
576 // return processed content:
577 return implode('',$imgSplit);
578 }
579
580 /**
581 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
582 * Converting <A>-tags to/from abs/rel
583 *
584 * @param string Content input
585 * @param string Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
586 * @return string Content output
587 */
588 function TS_reglinks($value,$direction) {
589 $retVal = '';
590
591 switch($direction) {
592 case 'rte':
593 $retVal = $this->TS_AtagToAbs($value,1);
594 break;
595 case 'db':
596 $siteURL = $this->siteUrl();
597 $blockSplit = $this->splitIntoBlock('A',$value);
598 reset($blockSplit);
599 while(list($k,$v)=each($blockSplit)) {
600 if ($k%2) { // block:
601 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
602 // If the url is local, remove url-prefix
603 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
604 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
605 }
606 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
607 $eTag='</a>';
608 $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
609 }
610 }
611 $retVal = implode('',$blockSplit);
612 break;
613 }
614 return $retVal;
615 }
616
617 /**
618 * Transformation handler: 'ts_links' / direction: "db"
619 * Converting <A>-tags to <link tags>
620 *
621 * @param string Content input
622 * @return string Content output
623 * @see TS_links_rte()
624 */
625 function TS_links_db($value) {
626
627 // Split content into <a> tag blocks and process:
628 $blockSplit = $this->splitIntoBlock('A',$value);
629 foreach($blockSplit as $k => $v) {
630 if ($k%2) { // If an A-tag was found:
631 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
632 $info = $this->urlInfoForLinkTags($attribArray['href']);
633
634 // Check options:
635 $attribArray_copy = $attribArray;
636 unset($attribArray_copy['href']);
637 unset($attribArray_copy['target']);
638 unset($attribArray_copy['class']);
639 unset($attribArray_copy['title']);
640 if ($attribArray_copy['rteerror']) { // Unset "rteerror" and "style" attributes if "rteerror" is set!
641 unset($attribArray_copy['style']);
642 unset($attribArray_copy['rteerror']);
643 }
644 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
645 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
646 $bTag='<link '.$info['url'].($attribArray['target']?' '.$attribArray['target']:(($attribArray['class'] || $attribArray['title'])?' -':'')).($attribArray['class']?' '.$attribArray['class']:($attribArray['title']?' -':'')).($attribArray['title']?' "'.$attribArray['title'].'"':'').'>';
647 $eTag='</link>';
648 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
649 } else { // ... otherwise store the link as a-tag.
650 // Unsetting 'rtekeep' attribute if that had been set.
651 unset($attribArray['rtekeep']);
652 // If the url is local, remove url-prefix
653 $siteURL = $this->siteUrl();
654 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
655 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
656 }
657 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
658 $eTag='</a>';
659 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
660 }
661 }
662 }
663 return implode('',$blockSplit);
664 }
665
666 /**
667 * Transformation handler: 'ts_links' / direction: "rte"
668 * Converting <link tags> to <A>-tags
669 *
670 * @param string Content input
671 * @return string Content output
672 * @see TS_links_rte()
673 */
674 function TS_links_rte($value) {
675 $value = $this->TS_AtagToAbs($value);
676
677 // Split content by the TYPO3 pseudo tag "<link>":
678 $blockSplit = $this->splitIntoBlock('link',$value,1);
679 foreach($blockSplit as $k => $v) {
680 $error = '';
681 if ($k%2) { // block:
682 $tagCode = t3lib_div::unQuoteFilenames(trim(substr($this->getFirstTag($v),0,-1)),true);
683 $link_param = $tagCode[1];
684 $href = '';
685 $siteUrl = $this->siteUrl();
686 // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
687 if(strstr($link_param,'@')) { // mailadr
688 $href = 'mailto:'.eregi_replace('^mailto:','',$link_param);
689 } elseif (substr($link_param,0,1)=='#') { // check if anchor
690 $href = $siteUrl.$link_param;
691 } else {
692 $fileChar=intval(strpos($link_param, '/'));
693 $urlChar=intval(strpos($link_param, '.'));
694
695 // Detects if a file is found in site-root OR is a simulateStaticDocument.
696 list($rootFileDat) = explode('?',$link_param);
697 $rFD_fI = pathinfo($rootFileDat);
698 if (trim($rootFileDat) && !strstr($link_param,'/') && (@is_file(PATH_site.$rootFileDat) || t3lib_div::inList('php,html,htm',strtolower($rFD_fI['extension'])))) {
699 $href = $siteUrl.$link_param;
700 } elseif($urlChar && (strstr($link_param,'//') || !$fileChar || $urlChar<$fileChar)) { // url (external): If doubleSlash or if a '.' comes before a '/'.
701 if (!ereg('^[a-z]*://',trim(strtolower($link_param)))) {$scheme='http://';} else {$scheme='';}
702 $href = $scheme.$link_param;
703 } elseif($fileChar) { // file (internal)
704 $href = $siteUrl.$link_param;
705 } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
706 $link_params_parts = explode('#',$link_param);
707 $idPart = trim($link_params_parts[0]); // Link-data del
708 if (!strcmp($idPart,'')) { $idPart=$this->recPid; } // If no id or alias is given, set it to class record pid
709
710 // FIXME commented because useless - what is it for?
711 // if ($link_params_parts[1] && !$sectionMark) {
712 // $sectionMark = '#'.trim($link_params_parts[1]);
713 // }
714
715 // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
716 $pairParts = t3lib_div::trimExplode(',',$idPart);
717 if (count($pairParts)>1) {
718 $idPart = $pairParts[0];
719 // Type ? future support for?
720 }
721 // Checking if the id-parameter is an alias.
722 if (!t3lib_div::testInt($idPart)) {
723 list($idPartR) = t3lib_BEfunc::getRecordsByField('pages','alias',$idPart);
724 $idPart = intval($idPartR['uid']);
725 }
726 $page = t3lib_BEfunc::getRecord('pages', $idPart);
727 if (is_array($page)) { // Page must exist...
728 $href = $siteUrl.'?id='.$link_param;
729 } else if(strtolower(substr($link_param, 0, 7)) == 'record:') {
730 // linkHandler - allowing links to start with "record:"
731 $href = $link_param;
732 } else {
733 #$href = '';
734 $href = $siteUrl.'?id='.$link_param;
735 $error = 'No page found: '.$idPart;
736 }
737 }
738 }
739
740 // Setting the A-tag:
741 $bTag = '<a href="'.htmlspecialchars($href).'"'.
742 ($tagCode[2]&&$tagCode[2]!='-' ? ' target="'.htmlspecialchars($tagCode[2]).'"' : '').
743 ($tagCode[3]&&$tagCode[3]!='-' ? ' class="'.htmlspecialchars($tagCode[3]).'"' : '').
744 ($tagCode[4] ? ' title="'.htmlspecialchars($tagCode[4]).'"' : '').
745 ($error ? ' rteerror="'.htmlspecialchars($error).'" style="background-color: yellow; border:2px red solid; color: black;"' : ''). // Should be OK to add the style; the transformation back to databsae will remove it...
746 '>';
747 $eTag = '</a>';
748 $blockSplit[$k] = $bTag.$this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
749 }
750 }
751
752 // Return content:
753 return implode('',$blockSplit);
754 }
755
756 /**
757 * Preserve special tags
758 *
759 * @param string Content input
760 * @return string Content output
761 */
762 function TS_preserve_db($value) {
763 if (!$this->preserveTags) return $value;
764
765 // Splitting into blocks for processing (span-tags are used for special tags)
766 $blockSplit = $this->splitIntoBlock('span',$value);
767 foreach($blockSplit as $k => $v) {
768 if ($k%2) { // block:
769 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
770 if ($attribArray['specialtag']) {
771 $theTag = rawurldecode($attribArray['specialtag']);
772 $theTagName = $this->getFirstTagName($theTag);
773 $blockSplit[$k] = $theTag.$this->removeFirstAndLastTag($blockSplit[$k]).'</'.$theTagName.'>';
774 }
775 }
776 }
777 return implode('',$blockSplit);
778 }
779
780 /**
781 * Preserve special tags
782 *
783 * @param string Content input
784 * @return string Content output
785 */
786 function TS_preserve_rte($value) {
787 if (!$this->preserveTags) return $value;
788
789 $blockSplit = $this->splitIntoBlock($this->preserveTags,$value);
790 foreach($blockSplit as $k => $v) {
791 if ($k%2) { // block:
792 $blockSplit[$k] = '<span specialtag="'.rawurlencode($this->getFirstTag($v)).'">'.$this->removeFirstAndLastTag($blockSplit[$k]).'</span>';
793 }
794 }
795 return implode('',$blockSplit);
796 }
797
798 /**
799 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
800 * Cleaning (->db) for standard content elements (ts)
801 *
802 * @param string Content input
803 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
804 * @return string Content output
805 * @see TS_transform_rte()
806 */
807 function TS_transform_db($value,$css=FALSE) {
808
809 // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
810 $this->TS_transform_db_safecounter--;
811 if ($this->TS_transform_db_safecounter<0) return $value;
812
813 // Split the content from RTE by the occurence of these blocks:
814 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->headListTags,$value);
815
816 $cc=0;
817 $aC = count($blockSplit);
818
819 // Avoid superfluous linebreaks by transform_db after ending headListTag
820 while($aC && !strcmp(trim($blockSplit[$aC-1]),'')) {
821 unset($blockSplit[$aC-1]);
822 $aC = count($blockSplit);
823 }
824
825 // Traverse the blocks
826 foreach($blockSplit as $k => $v) {
827 $cc++;
828 $lastBR = $cc==$aC ? '' : chr(10);
829
830 if ($k%2) { // Inside block:
831
832 // Init:
833 $tag=$this->getFirstTag($v);
834 $tagName=strtolower($this->getFirstTagName($v));
835
836 // Process based on the tag:
837 switch($tagName) {
838 case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
839 case 'dd' : // Do the same on dd elements
840 case 'div': // Do the same on div sections, if they were splitted
841 $blockSplit[$k]=$tag.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
842 break;
843 case 'ol':
844 case 'ul': // Transform lists into <typolist>-tags:
845 if (!$css) {
846 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
847 $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
848 while(list($k2)=each($parts)) {
849 $parts[$k2]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/','',$parts[$k2]); // remove all linesbreaks!
850 $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
851 $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
852 $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
853 }
854 if ($tagName=='ol') { $params=' type="1"'; } else { $params=''; }
855 $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
856 }
857 } else {
858 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
859 }
860 break;
861 case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
862 if (!$this->procOptions['preserveTables'] && !$css) {
863 $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
864 } else {
865 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
866 }
867 break;
868 case 'h1':
869 case 'h2':
870 case 'h3':
871 case 'h4':
872 case 'h5':
873 case 'h6':
874 if (!$css) {
875 $attribArray=$this->get_tag_attributes_classic($tag);
876 // Processing inner content here:
877 $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
878
879 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
880 $type = intval(substr($tagName,1));
881 $blockSplit[$k]='<typohead'.
882 ($type!=6?' type="'.$type.'"':'').
883 ($attribArray['align']?' align="'.$attribArray['align'].'"':'').
884 ($attribArray['class']?' class="'.$attribArray['class'].'"':'').
885 '>'.
886 $innerContent.
887 '</typohead>'.
888 $lastBR;
889 } else {
890 $blockSplit[$k]='<'.$tagName.
891 ($attribArray['align']?' align="'.htmlspecialchars($attribArray['align']).'"':'').
892 ($attribArray['class']?' class="'.htmlspecialchars($attribArray['class']).'"':'').
893 '>'.
894 $innerContent.
895 '</'.$tagName.'>'.
896 $lastBR;
897 }
898 } else {
899 // Eliminate true linebreaks inside Hx tags
900 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
901 }
902 break;
903 default:
904 // Eliminate true linebreaks inside other headlist tags and after hr tag
905 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
906 break;
907 }
908 } else { // NON-block:
909 if (strcmp(trim($blockSplit[$k]),'')) {
910 $blockSplit[$k]=$this->divideIntoLines(preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$blockSplit[$k])).$lastBR;
911 $blockSplit[$k]=$this->transformStyledATags($blockSplit[$k]);
912 } else unset($blockSplit[$k]);
913 }
914 }
915 $this->TS_transform_db_safecounter++;
916
917 return implode('',$blockSplit);
918 }
919
920 /**
921 * Wraps a-tags that contain a style attribute with a span-tag
922 *
923 * @param string Content input
924 * @return string Content output
925 */
926 function transformStyledATags($value) {
927 $blockSplit = $this->splitIntoBlock('A',$value);
928 foreach($blockSplit as $k => $v) {
929 if ($k%2) { // If an A-tag was found:
930 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
931 if ($attribArray['style']) { // If "style" attribute is set!
932 $attribArray_copy['style'] = $attribArray['style'];
933 unset($attribArray['style']);
934 $bTag='<span '.t3lib_div::implodeAttributes($attribArray_copy,1).'><a '.t3lib_div::implodeAttributes($attribArray,1).'>';
935 $eTag='</a></span>';
936 $blockSplit[$k] = $bTag.$this->removeFirstAndLastTag($blockSplit[$k]).$eTag;
937 }
938 }
939 }
940 return implode('',$blockSplit);
941 }
942
943 /**
944 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
945 * Set (->rte) for standard content elements (ts)
946 *
947 * @param string Content input
948 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
949 * @return string Content output
950 * @see TS_transform_db()
951 */
952 function TS_transform_rte($value,$css=0) {
953
954 // Split the content from Database by the occurence of these blocks:
955 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->headListTags,$value);
956
957 // Traverse the blocks
958 foreach($blockSplit as $k => $v) {
959 if ($k%2) { // Inside one of the blocks:
960
961 // Init:
962 $tag = $this->getFirstTag($v);
963 $tagName = strtolower($this->getFirstTagName($v));
964 $attribArray = $this->get_tag_attributes_classic($tag);
965
966 // Based on tagname, we do transformations:
967 switch($tagName) {
968 case 'blockquote': // Keep blockquotes
969 case 'dd': // Keep definitions
970 case 'div': // Keep div sections, if they were splitted
971 $blockSplit[$k] = $tag.
972 $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
973 '</'.$tagName.'>';
974 break;
975 case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
976 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
977 $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
978 $tListContent = ereg_replace('^[ ]*'.chr(10),'',$tListContent);
979 $tListContent = ereg_replace(chr(10).'[ ]*$','',$tListContent);
980 $lines = explode(chr(10),$tListContent);
981 $typ = $attribArray['type']==1 ? 'ol' : 'ul';
982 $blockSplit[$k] = '<'.$typ.'>'.chr(10).
983 '<li>'.implode('</li>'.chr(10).'<li>',$lines).'</li>'.
984 '</'.$typ.'>';
985 }
986 break;
987 case 'typohead': // Transform typohead into Hx tags.
988 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
989 $tC = $this->removeFirstAndLastTag($blockSplit[$k]);
990 $typ = t3lib_div::intInRange($attribArray['type'],0,6);
991 if (!$typ) $typ=6;
992 $align = $attribArray['align']?' align="'.$attribArray['align'].'"': '';
993 $class = $attribArray['class']?' class="'.$attribArray['class'].'"': '';
994 $blockSplit[$k] = '<h'.$typ.$align.$class.'>'.
995 $tC.
996 '</h'.$typ.'>';
997 }
998 break;
999 }
1000 $blockSplit[$k+1] = ereg_replace('^[ ]*'.chr(10),'',$blockSplit[$k+1]); // Removing linebreak if typohead
1001 } else { // NON-block:
1002 $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
1003 $singleLineBreak = $blockSplit[$k]==chr(10);
1004 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->headListTags,$nextFTN)) { // Removing linebreak if typolist/typohead
1005 $blockSplit[$k] = ereg_replace(chr(10).'[ ]*$','',$blockSplit[$k]);
1006 }
1007 // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
1008 if (!strcmp($blockSplit[$k],'') && !$singleLineBreak) {
1009 unset($blockSplit[$k]);
1010 } else {
1011 $blockSplit[$k] = $this->setDivTags($blockSplit[$k],($this->procOptions['useDIVasParagraphTagForRTE']?'div':'p'));
1012 }
1013 }
1014 }
1015 return implode(chr(10),$blockSplit);
1016 }
1017
1018 /**
1019 * Transformation handler: 'ts_strip' / direction: "db"
1020 * Removing all non-allowed tags
1021 *
1022 * @param string Content input
1023 * @return string Content output
1024 */
1025 function TS_strip_db($value) {
1026 $value = strip_tags($value,'<'.implode('><',explode(',','b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')).'>');
1027 return $value;
1028 }
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043 /***************************************************************
1044 *
1045 * Generic RTE transformation, analysis and helper functions
1046 *
1047 **************************************************************/
1048
1049 /**
1050 * Reads the file or url $url and returns the content
1051 *
1052 * @param string Filepath/URL to read
1053 * @return string The content from the resource given as input.
1054 * @see t3lib_div::getURL()
1055 */
1056 function getURL($url) {
1057 return t3lib_div::getURL($url);
1058 }
1059
1060 /**
1061 * Function for cleaning content going into the database.
1062 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
1063 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
1064 *
1065 * @param string Content to clean up
1066 * @param string Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
1067 * @return string Clean content
1068 * @see getKeepTags()
1069 */
1070 function HTMLcleaner_db($content,$tagList='') {
1071 if (!$tagList) {
1072 $keepTags = $this->getKeepTags('db');
1073 } else {
1074 $keepTags = $this->getKeepTags('db',$tagList);
1075 }
1076 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
1077 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
1078
1079 // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
1080 $addConfig=array();
1081 if ((is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning'])) {
1082 $addConfig['xhtml']=1;
1083 }
1084
1085 return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC,$addConfig);
1086 }
1087
1088 /**
1089 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
1090 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
1091 *
1092 * @param string The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
1093 * @param string Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
1094 * @return array Configuration array
1095 * @see HTMLcleaner_db()
1096 */
1097 function getKeepTags($direction='rte',$tagList='') {
1098 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1099
1100 // Setting up allowed tags:
1101 if (strcmp($tagList,'')) { // If the $tagList input var is set, this will take precedence
1102 $keepTags = array_flip(t3lib_div::trimExplode(',',$tagList,1));
1103 } else { // Default is to get allowed/denied tags from internal array of processing options:
1104 // Construct default list of tags to keep:
1105 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1106 $keepTags = array_flip(t3lib_div::trimExplode(',',$typoScript_list.','.strtolower($this->procOptions['allowTags']),1));
1107
1108 // For tags to deny, remove them from $keepTags array:
1109 $denyTags = t3lib_div::trimExplode(',',$this->procOptions['denyTags'],1);
1110 foreach($denyTags as $dKe) {
1111 unset($keepTags[$dKe]);
1112 }
1113 }
1114
1115 // Based on the direction of content, set further options:
1116 switch ($direction) {
1117
1118 // GOING from database to Rich Text Editor:
1119 case 'rte':
1120
1121 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1122 // Transform bold/italics tags to strong/em
1123 if (isset($keepTags['b'])) {$keepTags['b']=array('remap'=>'STRONG');}
1124 if (isset($keepTags['i'])) {$keepTags['i']=array('remap'=>'EM');}
1125 }
1126
1127 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1128 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
1129 break;
1130
1131 // GOING from RTE to database:
1132 case 'db':
1133
1134 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1135 // Transform strong/em back to bold/italics:
1136 if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
1137 if (isset($keepTags['em'])) { $keepTags['em']=array('remap'=>'i'); }
1138 }
1139
1140 // Setting up span tags if they are allowed:
1141 if (isset($keepTags['span'])) {
1142 $classes=array_merge(array(''),$this->allowedClasses);
1143 $keepTags['span']=array(
1144 'allowedAttribs' => 'class,style,xml:lang',
1145 'fixAttrib' => Array(
1146 'class' => Array (
1147 'list' => $classes,
1148 'removeIfFalse' => 1
1149 )
1150 ),
1151 'rmTagIfNoAttrib' => 1
1152 );
1153 if (!$this->procOptions['allowedClasses']) unset($keepTags['span']['fixAttrib']['class']['list']);
1154 }
1155
1156 // Setting up font tags if they are allowed:
1157 if (isset($keepTags['font'])) {
1158 $colors=array_merge(array(''),t3lib_div::trimExplode(',',$this->procOptions['allowedFontColors'],1));
1159 $keepTags['font']=array(
1160 'allowedAttribs'=>'face,color,size',
1161 'fixAttrib' => Array(
1162 'face' => Array (
1163 'removeIfFalse' => 1
1164 ),
1165 'color' => Array (
1166 'removeIfFalse' => 1,
1167 'list'=>$colors
1168 ),
1169 'size' => Array (
1170 'removeIfFalse' => 1,
1171 )
1172 ),
1173 'rmTagIfNoAttrib' => 1
1174 );
1175 if (!$this->procOptions['allowedFontColors']) unset($keepTags['font']['fixAttrib']['color']['list']);
1176 }
1177
1178 // Setting further options, getting them from the processiong options:
1179 $TSc = $this->procOptions['HTMLparser_db.'];
1180 if (!$TSc['globalNesting']) $TSc['globalNesting']='b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1181 if (!$TSc['noAttrib']) $TSc['noAttrib']='b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1182
1183 // Transforming the array from TypoScript to regular array:
1184 list($keepTags) = $this->HTMLparserConfig($TSc,$keepTags);
1185 break;
1186 }
1187
1188 // Caching (internally, in object memory) the result unless tagList is set:
1189 if (!$tagList) {
1190 $this->getKeepTags_cache[$direction] = $keepTags;
1191 } else {
1192 return $keepTags;
1193 }
1194 }
1195
1196 // Return result:
1197 return $this->getKeepTags_cache[$direction];
1198 }
1199
1200 /**
1201 * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by chr(10).
1202 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1203 * The function ->setDivTags does the opposite.
1204 * This function processes content to go into the database.
1205 *
1206 * @param string Value to process.
1207 * @param integer Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1208 * @param boolean If true, an array with the lines is returned, otherwise a string of the processed input value.
1209 * @return string Processed input value.
1210 * @see setDivTags()
1211 */
1212 function divideIntoLines($value,$count=5,$returnArray=FALSE) {
1213
1214 // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
1215 if ($this->procOptions['internalizeFontTags']) {$value = $this->internalizeFontTags($value);}
1216
1217 // Setting configuration for processing:
1218 $allowTagsOutside = t3lib_div::trimExplode(',',strtolower($this->procOptions['allowTagsOutside']?$this->procOptions['allowTagsOutside']:'img'),1);
1219 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1220 $divSplit = $this->splitIntoBlock('div,p',$value,1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1221
1222 if ($this->procOptions['keepPDIVattribs']) {
1223 $keepAttribListArr = t3lib_div::trimExplode(',',strtolower($this->procOptions['keepPDIVattribs']),1);
1224 } else {
1225 $keepAttribListArr = array();
1226 }
1227
1228 // Returns plainly the value if there was no div/p sections in it
1229 if (count($divSplit)<=1 || $count<=0) {
1230 return $value;
1231 }
1232
1233 // Traverse the splitted sections:
1234 foreach($divSplit as $k => $v) {
1235 if ($k%2) { // Inside
1236 $v=$this->removeFirstAndLastTag($v);
1237
1238 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1239 $subLines = $this->divideIntoLines($v,$count-1,1);
1240 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1241 // No noting.
1242 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1243 $subLines = array($subLines);
1244 if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1245 $subLines = spliti('<br[[:space:]]*[\/]?>',$v);
1246 }
1247
1248 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1249 reset($subLines);
1250 while(list($sk)=each($subLines)) {
1251
1252 // Clear up the subline for DB.
1253 $subLines[$sk]=$this->HTMLcleaner_db($subLines[$sk]);
1254
1255 // Get first tag, attributes etc:
1256 $fTag = $this->getFirstTag($divSplit[$k]);
1257 $tagName=strtolower($this->getFirstTagName($divSplit[$k]));
1258 $attribs=$this->get_tag_attributes($fTag);
1259
1260 // Keep attributes (lowercase)
1261 $newAttribs=array();
1262 if (count($keepAttribListArr)) {
1263 foreach($keepAttribListArr as $keepA) {
1264 if (isset($attribs[0][$keepA])) { $newAttribs[$keepA] = $attribs[0][$keepA]; }
1265 }
1266 }
1267
1268 // ALIGN attribute:
1269 if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']),'') && strtolower($attribs[0]['align'])!='left') { // Set to value, but not 'left'
1270 $newAttribs['align']=strtolower($attribs[0]['align']);
1271 }
1272
1273 // CLASS attribute:
1274 if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']),'')) { // Set to whatever value
1275 if (!count($this->allowedClasses) || in_array(strtoupper($attribs[0]['class']),$this->allowedClasses)) {
1276 $newAttribs['class']=$attribs[0]['class'];
1277 }
1278 }
1279
1280 // Remove any line break char (10 or 13)
1281 $subLines[$sk]=ereg_replace(chr(10).'|'.chr(13),'',$subLines[$sk]);
1282
1283 // If there are any attributes or if we are supposed to remap the tag, then do so:
1284 if (count($newAttribs) && strcmp($remapParagraphTag,'1')) {
1285 if ($remapParagraphTag=='P') $tagName='p';
1286 if ($remapParagraphTag=='DIV') $tagName='div';
1287 $subLines[$sk]='<'.trim($tagName.' '.$this->compileTagAttribs($newAttribs)).'>'.$subLines[$sk].'</'.$tagName.'>';
1288 }
1289 }
1290 }
1291 // Add the processed line(s)
1292 $divSplit[$k] = implode(chr(10),$subLines);
1293
1294 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1295 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1296 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1297 if (trim(strip_tags($divSplit[$k]))=='&nbsp;' && !preg_match('/\<(img)(\s[^>]*)?\/?>/si', $divSplit[$k]) && !preg_match('/\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1298 $divSplit[$k]='';
1299 }
1300 } else { // outside div:
1301 // Remove positions which are outside div/p tags and without content
1302 $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
1303 if (!strcmp($divSplit[$k],'')) unset($divSplit[$k]); // Remove part if it's empty
1304 }
1305 }
1306
1307 // Return value:
1308 return $returnArray ? $divSplit : implode(chr(10),$divSplit);
1309 }
1310
1311 /**
1312 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1313 * For processing of content going FROM database TO RTE.
1314 *
1315 * @param string Value to convert
1316 * @param string Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1317 * @return string Processed value.
1318 * @see divideIntoLines()
1319 */
1320 function setDivTags($value,$dT='p') {
1321
1322 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1323 $keepTags = $this->getKeepTags('rte');
1324 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
1325 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
1326 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte']?1:0;
1327
1328 // Divide the content into lines, based on chr(10):
1329 $parts = explode(chr(10),$value);
1330 foreach($parts as $k => $v) {
1331
1332 // Processing of line content:
1333 if (!strcmp(trim($parts[$k]),'')) { // If the line is blank, set it to &nbsp;
1334 $parts[$k]='&nbsp;';
1335 } else { // Clean the line content:
1336 $parts[$k]=$this->HTMLcleaner($parts[$k],$keepTags,$kUknown,$hSC);
1337 if ($convNBSP) $parts[$k]=str_replace('&amp;nbsp;','&nbsp;',$parts[$k]);
1338 }
1339
1340 // Wrapping the line in <$dT> is not already wrapped:
1341 $testStr = strtolower(trim($parts[$k]));
1342 if (substr($testStr,0,4)!='<div' || substr($testStr,-6)!='</div>') {
1343 if (substr($testStr,0,2)!='<p' || substr($testStr,-4)!='</p>') {
1344 // Only set p-tags if there is not already div or p tags:
1345 $parts[$k]='<'.$dT.'>'.$parts[$k].'</'.$dT.'>';
1346 }
1347 }
1348 }
1349
1350 // Implode result:
1351 return implode(chr(10),$parts);
1352 }
1353
1354 /**
1355 * This splits the $value in font-tag chunks.
1356 * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
1357 * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
1358 * In that case the font-tags are normally on the OUTSIDE of the sections.
1359 * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
1360 *
1361 * @param string Input content
1362 * @return string Output content
1363 * @see divideIntoLines()
1364 */
1365 function internalizeFontTags($value) {
1366
1367 // Splitting into font tag blocks:
1368 $fontSplit = $this->splitIntoBlock('font',$value);
1369
1370 foreach($fontSplit as $k => $v) {
1371 if ($k%2) { // Inside
1372 $fTag = $this->getFirstTag($v); // Fint font-tag
1373
1374 $divSplit_sub = $this->splitIntoBlock('div,p',$this->removeFirstAndLastTag($v),1);
1375 if (count($divSplit_sub)>1) { // If there were div/p sections inside the font-tag, do something about it...
1376 // traverse those sections:
1377 foreach($divSplit_sub as $k2 => $v2) {
1378 if ($k2%2) { // Inside
1379 $div_p = $this->getFirstTag($v2); // Fint font-tag
1380 $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
1381 $v2=$this->removeFirstAndLastTag($v2); // ... and remove it from original.
1382 $divSplit_sub[$k2]=$div_p.$fTag.$v2.'</font>'.'</'.$div_p_tagname.'>';
1383 } elseif (trim(strip_tags($v2))) {
1384 $divSplit_sub[$k2]=$fTag.$v2.'</font>';
1385 }
1386 }
1387 $fontSplit[$k]=implode('',$divSplit_sub);
1388 }
1389 }
1390 }
1391
1392 return implode('',$fontSplit);
1393 }
1394
1395 /**
1396 * Returns SiteURL based on thisScript.
1397 *
1398 * @return string Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1399 * @see t3lib_div::getIndpEnv()
1400 */
1401 function siteUrl() {
1402 return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1403 }
1404
1405 /**
1406 * Return the storage folder of RTE image files.
1407 * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
1408 *
1409 * @return string
1410 */
1411 function rteImageStorageDir() {
1412 return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
1413 }
1414
1415 /**
1416 * Remove all tables from incoming code
1417 * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
1418 *
1419 * @param string Input value
1420 * @param string Break character to use for linebreaks.
1421 * @return string Output value
1422 */
1423 function removeTables($value,$breakChar='<br />') {
1424
1425 // Splitting value into table blocks:
1426 $tableSplit = $this->splitIntoBlock('table',$value);
1427
1428 // Traverse blocks of tables:
1429 foreach($tableSplit as $k => $v) {
1430 if ($k%2) {
1431 $tableSplit[$k]='';
1432 $rowSplit = $this->splitIntoBlock('tr',$v);
1433 foreach($rowSplit as $k2 => $v2) {
1434 if ($k2%2) {
1435 $cellSplit = $this->getAllParts($this->splitIntoBlock('td',$v2),1,0);
1436 foreach($cellSplit as $k3 => $v3) {
1437 $tableSplit[$k].=$v3.$breakChar;
1438 }
1439 }
1440 }
1441 }
1442 }
1443
1444 // Implode it all again:
1445 return implode($breakChar,$tableSplit);
1446 }
1447
1448 /**
1449 * Default tag mapping for TS
1450 *
1451 * @param string Input code to process
1452 * @param string Direction To databsae (db) or from database to RTE (rte)
1453 * @return string Processed value
1454 */
1455 function defaultTStagMapping($code,$direction='rte') {
1456 if ($direction=='db') {
1457 $code=$this->mapTags($code,array( // Map tags
1458 'strong' => 'b',
1459 'em' => 'i'
1460 ));
1461 }
1462 if ($direction=='rte') {
1463 $code=$this->mapTags($code,array( // Map tags
1464 'b' => 'strong',
1465 'i' => 'em'
1466 ));
1467 }
1468 return $code;
1469 }
1470
1471 /**
1472 * Finds width and height from attrib-array
1473 * If the width and height is found in the style-attribute, use that!
1474 *
1475 * @param array Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1476 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1477 */
1478 function getWHFromAttribs($attribArray) {
1479 $style =trim($attribArray['style']);
1480 if ($style) {
1481 $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1482 // Width
1483 $reg = array();
1484 eregi('width'.$regex,$style,$reg);
1485 $w = intval($reg[1]);
1486 // Height
1487 eregi('height'.$regex,$style,$reg);
1488 $h = intval($reg[1]);
1489 }
1490 if (!$w) {
1491 $w = $attribArray['width'];
1492 }
1493 if (!$h) {
1494 $h = $attribArray['height'];
1495 }
1496 return array(intval($w),intval($h));
1497 }
1498
1499 /**
1500 * Parse <A>-tag href and return status of email,external,file or page
1501 *
1502 * @param string URL to analyse.
1503 * @return array Information in an array about the URL
1504 */
1505 function urlInfoForLinkTags($url) {
1506 $info = array();
1507 $url = trim($url);
1508 if (substr(strtolower($url),0,7)=='mailto:') {
1509 $info['url']=trim(substr($url,7));
1510 $info['type']='email';
1511 } else {
1512 $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
1513 for($a=0;$a<strlen($url);$a++) {
1514 if ($url{$a}!=$curURL{$a}) {
1515 break;
1516 }
1517 }
1518
1519 $info['relScriptPath']=substr($curURL,$a);
1520 $info['relUrl']=substr($url,$a);
1521 $info['url']=$url;
1522 $info['type']='ext';
1523
1524 $siteUrl_parts = parse_url($url);
1525 $curUrl_parts = parse_url($curURL);
1526
1527 if ($siteUrl_parts['host']==$curUrl_parts['host'] // Hosts should match
1528 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'],0,strlen(TYPO3_mainDir))==TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
1529
1530 // New processing order 100502
1531 $uP=parse_url($info['relUrl']);
1532
1533 if (!strcmp('#'.$siteUrl_parts['fragment'],$info['relUrl'])) {
1534 $info['url']=$info['relUrl'];
1535 $info['type']='anchor';
1536 } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php')) {
1537 $pp = explode('id=',$uP['query']);
1538 $id = trim($pp[1]);
1539 if ($id) {
1540 $info['pageid']=$id;
1541 $info['cElement']=$uP['fragment'];
1542 $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
1543 $info['type']='page';
1544 }
1545 } else {
1546 $info['url']=$info['relUrl'];
1547 $info['type']='file';
1548 }
1549 } else {
1550 unset($info['relScriptPath']);
1551 unset($info['relUrl']);
1552 }
1553 }
1554 return $info;
1555 }
1556
1557 /**
1558 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1559 *
1560 * @param string Content input
1561 * @param boolean If true, then the "rtekeep" attribute will not be set.
1562 * @return string Content output
1563 */
1564 function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE) {
1565 $blockSplit = $this->splitIntoBlock('A',$value);
1566 reset($blockSplit);
1567 while(list($k,$v)=each($blockSplit)) {
1568 if ($k%2) { // block:
1569 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
1570
1571 // Checking if there is a scheme, and if not, prepend the current url.
1572 if (strlen($attribArray['href'])) { // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1573 $uP = parse_url(strtolower($attribArray['href']));
1574 if (!$uP['scheme']) {
1575 $attribArray['href'] = $this->siteUrl().substr($attribArray['href'],strlen($this->relBackPath));
1576 }
1577 } else {
1578 $attribArray['rtekeep'] = 1;
1579 }
1580 if (!$dontSetRTEKEEP) $attribArray['rtekeep'] = 1;
1581
1582 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
1583 $eTag='</a>';
1584 $blockSplit[$k] = $bTag.$this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
1585 }
1586 }
1587 return implode('',$blockSplit);
1588 }
1589 }
1590
1591
1592 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']) {
1593 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
1594 }
1595 ?>