removed empty lines at the end of the files
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2008 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 December/2003 by Kasper Skaarhoj
32 * XHTML compatible.
33 *
34 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
35 * @internal
36 */
37 /**
38 * [CLASS/FUNCTION INDEX of SCRIPT]
39 *
40 *
41 *
42 * 103: class t3lib_parsehtml_proc extends t3lib_parsehtml
43 * 138: function init($elRef='',$recPid=0)
44 * 150: function setRelPath($path)
45 * 174: function evalWriteFile($pArr,$currentRecord)
46 *
47 * SECTION: Main function
48 * 232: function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
49 *
50 * SECTION: Specific RTE TRANSFORMATION functions
51 * 398: function TS_images_db($value)
52 * 550: function TS_images_rte($value)
53 * 589: function TS_reglinks($value,$direction)
54 * 626: function TS_links_db($value)
55 * 675: function TS_links_rte($value)
56 * 760: function TS_preserve_db($value)
57 * 784: function TS_preserve_rte($value)
58 * 805: function TS_transform_db($value,$css=FALSE)
59 * 922: function transformStyledATags($value)
60 * 948: function TS_transform_rte($value,$css=0)
61 * 1019: function TS_strip_db($value)
62 *
63 * SECTION: Generic RTE transformation, analysis and helper functions
64 * 1050: function getURL($url)
65 * 1064: function HTMLcleaner_db($content,$tagList='')
66 * 1091: function getKeepTags($direction='rte',$tagList='')
67 * 1200: function divideIntoLines($value,$count=5,$returnArray=FALSE)
68 * 1304: function setDivTags($value,$dT='p')
69 * 1349: function internalizeFontTags($value)
70 * 1385: function siteUrl()
71 * 1395: function rteImageStorageDir()
72 * 1407: function removeTables($value,$breakChar='<br />')
73 * 1439: function defaultTStagMapping($code,$direction='rte')
74 * 1462: function getWHFromAttribs($attribArray)
75 * 1489: function urlInfoForLinkTags($url)
76 * 1548: function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
77 *
78 * TOTAL FUNCTIONS: 28
79 * (This index is automatically created/updated by the extension "extdeveval")
80 *
81 */
82
83 require_once (PATH_t3lib.'class.t3lib_parsehtml.php');
84
85
86
87
88
89
90
91
92
93
94
95
96 /**
97 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
98 *
99 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
100 * @package TYPO3
101 * @subpackage t3lib
102 */
103 class t3lib_parsehtml_proc extends t3lib_parsehtml {
104
105 // Static:
106 var $blockElementList = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,HR,ADDRESS,DL,DD'; // List of tags for these elements
107
108 // Internal, static:
109 var $recPid = 0; // Set this to the pid of the record manipulated by the class.
110 var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
111 var $relPath=''; // Relative path
112 var $relBackPath=''; // Relative back-path
113 var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
114
115 // Internal, dynamic
116 var $TS_transform_db_safecounter=100; // Run-away brake for recursive calls.
117 var $rte_p=''; // Parameters from TCA types configuration related to the RTE
118 var $getKeepTags_cache=array(); // Data caching for processing function
119 var $allowedClasses=array(); // Storage of the allowed CSS class names in the RTE
120 var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
121
122
123
124
125
126
127
128
129
130
131 /**
132 * Initialize, setting element reference and record PID
133 *
134 * @param string Element reference, eg "tt_content:bodytext"
135 * @param integer PID of the record (page id)
136 * @return void
137 */
138 function init($elRef='',$recPid=0) {
139 $this->recPid = $recPid;
140 $this->elRef = $elRef;
141 }
142
143 /**
144 * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
145 * This is used when editing files with the RTE
146 *
147 * @param string The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
148 * @return void There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
149 */
150 function setRelPath($path) {
151 $path = trim($path);
152 $path = ereg_replace('^/','',$path);
153 $path = ereg_replace('/$','',$path);
154 if ($path) {
155 $this->relPath = $path;
156 $this->relBackPath = '';
157 $partsC=count(explode('/',$this->relPath));
158 for ($a=0;$a<$partsC;$a++) {
159 $this->relBackPath.='../';
160 }
161 $this->relPath.='/';
162 }
163 }
164
165 /**
166 * Evaluate the environment for editing a staticFileEdit file.
167 * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
168 *
169 * @param array Parameters for the current field as found in types-config
170 * @param array Current record we are editing.
171 * @return mixed On success an array with various information is returned, otherwise a string with an error message
172 * @see t3lib_TCEmain, t3lib_transferData
173 */
174 function evalWriteFile($pArr,$currentRecord) {
175
176 // Write file configuration:
177 if (is_array($pArr)) {
178 if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
179 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'],-1)=='/'
180 && @is_dir(PATH_site.$GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
181
182 $SW_p = $pArr['parameters'];
183 $SW_editFileField = trim($SW_p[0]);
184 $SW_editFile = $currentRecord[$SW_editFileField];
185 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
186 $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'].$SW_editFile;
187 $SW_editFile = PATH_site.$SW_relpath;
188 if (@is_file($SW_editFile)) {
189 return array(
190 'editFile' => $SW_editFile,
191 'relEditFile' => $SW_relpath,
192 'contentField' => trim($SW_p[1]),
193 'markerField' => trim($SW_p[2]),
194 'loadFromFileField' => trim($SW_p[3]),
195 'statusField' => trim($SW_p[4])
196 );
197 } else return "ERROR: Editfile '".$SW_relpath."' did not exist";
198 } else return "ERROR: Edit file name could not be found or was bad.";
199 } else return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
200 }
201 }
202
203
204
205
206
207
208
209
210
211
212
213
214
215 /**********************************************
216 *
217 * Main function
218 *
219 **********************************************/
220
221 /**
222 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
223 * This is the main function called from tcemain and transfer data classes
224 *
225 * @param string Input value
226 * @param array Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
227 * @param string Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
228 * @param array Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
229 * @return string Output value
230 * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
231 */
232 function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array()) {
233
234 // Init:
235 $this->procOptions = $thisConfig['proc.'];
236 $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
237
238 // dynamic configuration of blockElementList
239 if ($this->procOptions['blockElementList']) {
240 $this->blockElementList = $this->procOptions['blockElementList'];
241 }
242
243 // Get parameters for rte_transformation:
244 $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
245
246 // Setting modes:
247 if (strcmp($this->procOptions['overruleMode'],'')) {
248 $modes = array_unique(t3lib_div::trimExplode(',',$this->procOptions['overruleMode']));
249 } else {
250 $modes = array_unique(t3lib_div::trimExplode('-',$p['mode']));
251 }
252 $revmodes = array_flip($modes);
253
254 // Find special modes and extract them:
255 if (isset($revmodes['ts'])) {
256 $modes[$revmodes['ts']] = 'ts_transform,ts_preserve,ts_images,ts_links';
257 }
258 // Find special modes and extract them:
259 if (isset($revmodes['ts_css'])) {
260 $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
261 }
262
263 // Make list unique
264 $modes = array_unique(t3lib_div::trimExplode(',',implode(',',$modes),1));
265
266 // Reverse order if direction is "rte"
267 if ($direction=='rte') {
268 $modes = array_reverse($modes);
269 }
270
271 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
272 $entry_HTMLparser = $this->procOptions['entryHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_'.$direction.'.']) : '';
273 $exit_HTMLparser = $this->procOptions['exitHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_'.$direction.'.']) : '';
274
275 // Line breaks of content is unified into char-10 only (removing char 13)
276 if (!$this->procOptions['disableUnifyLineBreaks']) {
277 $value = str_replace(chr(13).chr(10),chr(10),$value);
278 }
279
280 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
281 if (is_array($entry_HTMLparser)) {
282 $value = $this->HTMLcleaner($value,$entry_HTMLparser[0],$entry_HTMLparser[1],$entry_HTMLparser[2],$entry_HTMLparser[3]);
283 }
284
285 // Traverse modes:
286 foreach($modes as $cmd) {
287 // ->DB
288 if ($direction=='db') {
289 // Checking for user defined transformation:
290 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
291 $_procObj = &t3lib_div::getUserObj($_classRef);
292 $_procObj->pObj = &$this;
293 $_procObj->transformationKey = $cmd;
294 $value = $_procObj->transform_db($value,$this);
295 } else { // ... else use defaults:
296 switch($cmd) {
297 case 'ts_images':
298 $value = $this->TS_images_db($value);
299 break;
300 case 'ts_reglinks':
301 $value = $this->TS_reglinks($value,'db');
302 break;
303 case 'ts_links':
304 $value = $this->TS_links_db($value);
305 break;
306 case 'ts_preserve':
307 $value = $this->TS_preserve_db($value);
308 break;
309 case 'ts_transform':
310 case 'css_transform':
311 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
312 $this->allowedClasses = t3lib_div::trimExplode(',',strtoupper($this->procOptions['allowedClasses']),1);
313 $value = $this->TS_transform_db($value,$cmd=='css_transform');
314 break;
315 case 'ts_strip':
316 $value = $this->TS_strip_db($value);
317 break;
318 default:
319 break;
320 }
321 }
322 }
323 // ->RTE
324 if ($direction=='rte') {
325 // Checking for user defined transformation:
326 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
327 $_procObj = &t3lib_div::getUserObj($_classRef);
328 $_procObj->pObj = &$this;
329 $value = $_procObj->transform_rte($value,$this);
330 } else { // ... else use defaults:
331 switch($cmd) {
332 case 'ts_images':
333 $value = $this->TS_images_rte($value);
334 break;
335 case 'ts_reglinks':
336 $value = $this->TS_reglinks($value,'rte');
337 break;
338 case 'ts_links':
339 $value = $this->TS_links_rte($value);
340 break;
341 case 'ts_preserve':
342 $value = $this->TS_preserve_rte($value);
343 break;
344 case 'ts_transform':
345 case 'css_transform':
346 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
347 $value = $this->TS_transform_rte($value,$cmd=='css_transform');
348 break;
349 default:
350 break;
351 }
352 }
353 }
354 }
355
356 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
357 if (is_array($exit_HTMLparser)) {
358 $value = $this->HTMLcleaner($value,$exit_HTMLparser[0],$exit_HTMLparser[1],$exit_HTMLparser[2],$exit_HTMLparser[3]);
359 }
360
361 // Final clean up of linebreaks:
362 if (!$this->procOptions['disableUnifyLineBreaks']) {
363 $value = str_replace(chr(13).chr(10),chr(10),$value); // Make sure no \r\n sequences has entered in the meantime...
364 $value = str_replace(chr(10),chr(13).chr(10),$value); // ... and then change all \n into \r\n
365 }
366
367 // Return value:
368 return $value;
369 }
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386 /************************************
387 *
388 * Specific RTE TRANSFORMATION functions
389 *
390 *************************************/
391
392 /**
393 * Transformation handler: 'ts_images' / direction: "db"
394 * Processing images inserted in the RTE.
395 * This is used when content goes from the RTE to the database.
396 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
397 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
398 * Also "magic" images are processed here.
399 *
400 * @param string The content from RTE going to Database
401 * @return string Processed content
402 */
403 function TS_images_db($value) {
404
405 // Split content by <img> tags and traverse the resulting array for processing:
406 $imgSplit = $this->splitTags('img',$value);
407 foreach($imgSplit as $k => $v) {
408 if ($k%2) { // image found, do processing:
409
410 // Init
411 $attribArray = $this->get_tag_attributes_classic($v,1);
412 $siteUrl = $this->siteUrl();
413 $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
414
415 $absRef = trim($attribArray['src']); // It's always a absolute URL coming from the RTE into the Database.
416
417 // make path absolute if it is relative and we have a site path wich is not '/'
418 $pI=pathinfo($absRef);
419 if($sitePath AND !$pI['scheme'] && t3lib_div::isFirstPartOfStr($absRef,$sitePath)) {
420 // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
421 $absRef = substr($absRef,strlen($sitePath));
422 $absRef = $siteUrl.$absRef;
423 }
424
425 // External image from another URL? In that case, fetch image (unless disabled feature).
426 if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
427 $externalFile = $this->getUrl($absRef); // Get it
428 if ($externalFile) {
429 $pU = parse_url($absRef);
430 $pI=pathinfo($pU['path']);
431
432 if (t3lib_div::inList('gif,png,jpeg,jpg',strtolower($pI['extension']))) {
433 $filename = t3lib_div::shortMD5($absRef).'.'.$pI['extension'];
434 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
435 $C_origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
436 if (!@is_file($origFilePath)) {
437 t3lib_div::writeFile($origFilePath,$externalFile);
438 t3lib_div::writeFile($C_origFilePath,$externalFile);
439 }
440 $absRef = $siteUrl.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
441
442 $attribArray['src']=$absRef;
443 $params = t3lib_div::implodeAttributes($attribArray,1);
444 $imgSplit[$k] = '<img '.$params.' />';
445 }
446 }
447 }
448
449 // Check image as local file (siteURL equals the one of the image)
450 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
451 $path = rawurldecode(substr($absRef,strlen($siteUrl))); // Rel-path, rawurldecoded for special characters.
452 $filepath = t3lib_div::getFileAbsFileName($path); // Abs filepath, locked to relative path of this project.
453
454 // Check file existence (in relative dir to this installation!)
455 if ($filepath && @is_file($filepath)) {
456
457 // If "magic image":
458 $pathPre=$this->rteImageStorageDir().'RTEmagicC_';
459 if (t3lib_div::isFirstPartOfStr($path,$pathPre)) {
460 // Find original file:
461 $pI=pathinfo(substr($path,strlen($pathPre)));
462 $filename = substr($pI['basename'],0,-strlen('.'.$pI['extension']));
463 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
464 if (@is_file($origFilePath)) {
465 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
466 $imgObj->init();
467 $imgObj->mayScaleUp=0;
468 $imgObj->tempPath=PATH_site.$imgObj->tempPath;
469
470 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
471 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
472 // Compare dimensions:
473 if ($curWH[0]!=$curInfo[0] || $curWH[1]!=$curInfo[1]) {
474 $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
475 $cW = $curWH[0];
476 $cH = $curWH[1];
477 $cH = 1000; // Make the image based on the width solely...
478 $imgI = $imgObj->imageMagickConvert($origFilePath,$pI['extension'],$cW.'m',$cH.'m');
479 if ($imgI[3]) {
480 $fI=pathinfo($imgI[3]);
481 @copy($imgI[3],$filepath); // Override the child file
482 // Removing width and heigth form style attribute
483 $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
484 $attribArray['width']=$imgI[0];
485 $attribArray['height']=$imgI[1];
486 $params = t3lib_div::implodeAttributes($attribArray,1);
487 $imgSplit[$k]='<img '.$params.' />';
488 }
489 }
490 }
491
492 } elseif ($this->procOptions['plainImageMode']) { // If "plain image" has been configured:
493
494 // Image dimensions as set in the image tag, if any
495 $curWH = $this->getWHFromAttribs($attribArray);
496 if ($curWH[0]) $attribArray['width'] = $curWH[0];
497 if ($curWH[1]) $attribArray['height'] = $curWH[1];
498
499 // Removing width and heigth form style attribute
500 $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
501
502 // Finding dimensions of image file:
503 $fI = @getimagesize($filepath);
504
505 // Perform corrections to aspect ratio based on configuration:
506 switch((string)$this->procOptions['plainImageMode']) {
507 case 'lockDimensions':
508 $attribArray['width']=$fI[0];
509 $attribArray['height']=$fI[1];
510 break;
511 case 'lockRatioWhenSmaller': // If the ratio has to be smaller, then first set the width...:
512 if ($attribArray['width']>$fI[0]) $attribArray['width'] = $fI[0];
513 case 'lockRatio':
514 if ($fI[0]>0) {
515 $attribArray['height']=round($attribArray['width']*($fI[1]/$fI[0]));
516 }
517 break;
518 }
519
520 // Compile the image tag again:
521 $params = t3lib_div::implodeAttributes($attribArray,1);
522 $imgSplit[$k]='<img '.$params.' />';
523 }
524 } else { // Remove image if it was not found in a proper position on the server!
525
526 // Commented out; removing the image tag might not be that logical...
527 #$imgSplit[$k]='';
528 }
529 }
530
531 // Convert abs to rel url
532 if ($imgSplit[$k]) {
533 $attribArray=$this->get_tag_attributes_classic($imgSplit[$k],1);
534 $absRef = trim($attribArray['src']);
535 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
536 $attribArray['src'] = $this->relBackPath.substr($absRef,strlen($siteUrl));
537 if (!isset($attribArray['alt'])) $attribArray['alt']=''; // Must have alt-attribute for XHTML compliance.
538 $imgSplit[$k]='<img '.t3lib_div::implodeAttributes($attribArray,1,1).' />';
539 }
540 }
541 }
542 }
543 return implode('',$imgSplit);
544 }
545
546 /**
547 * Transformation handler: 'ts_images' / direction: "rte"
548 * Processing images from database content going into the RTE.
549 * Processing includes converting the src attribute to an absolute URL.
550 *
551 * @param string Content input
552 * @return string Content output
553 */
554 function TS_images_rte($value) {
555
556 $siteUrl = $this->siteUrl();
557 $sitePath = str_replace (t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
558
559 // Split content by <img> tags and traverse the resulting array for processing:
560 $imgSplit = $this->splitTags('img',$value);
561 foreach($imgSplit as $k => $v) {
562 if ($k%2) { // image found:
563
564 // Init
565 $attribArray=$this->get_tag_attributes_classic($v,1);
566 $absRef = trim($attribArray['src']);
567
568 // Unless the src attribute is already pointing to an external URL:
569 if (strtolower(substr($absRef,0,4))!='http') {
570 $attribArray['src'] = substr($attribArray['src'],strlen($this->relBackPath));
571 // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
572 $attribArray['src'] = preg_replace('#^'.preg_quote($sitePath,'#').'#','',$attribArray['src']);
573 $attribArray['src'] = $siteUrl.$attribArray['src'];
574 if (!isset($attribArray['alt'])) $attribArray['alt']='';
575 $params = t3lib_div::implodeAttributes($attribArray);
576 $imgSplit[$k]='<img '.$params.' />';
577 }
578 }
579 }
580
581 // return processed content:
582 return implode('',$imgSplit);
583 }
584
585 /**
586 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
587 * Converting <A>-tags to/from abs/rel
588 *
589 * @param string Content input
590 * @param string Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
591 * @return string Content output
592 */
593 function TS_reglinks($value,$direction) {
594 $retVal = '';
595
596 switch($direction) {
597 case 'rte':
598 $retVal = $this->TS_AtagToAbs($value,1);
599 break;
600 case 'db':
601 $siteURL = $this->siteUrl();
602 $blockSplit = $this->splitIntoBlock('A',$value);
603 reset($blockSplit);
604 while(list($k,$v)=each($blockSplit)) {
605 if ($k%2) { // block:
606 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
607 // If the url is local, remove url-prefix
608 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
609 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
610 }
611 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
612 $eTag='</a>';
613 $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
614 }
615 }
616 $retVal = implode('',$blockSplit);
617 break;
618 }
619 return $retVal;
620 }
621
622 /**
623 * Transformation handler: 'ts_links' / direction: "db"
624 * Converting <A>-tags to <link tags>
625 *
626 * @param string Content input
627 * @return string Content output
628 * @see TS_links_rte()
629 */
630 function TS_links_db($value) {
631
632 // Split content into <a> tag blocks and process:
633 $blockSplit = $this->splitIntoBlock('A',$value);
634 foreach($blockSplit as $k => $v) {
635 if ($k%2) { // If an A-tag was found:
636 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
637 $info = $this->urlInfoForLinkTags($attribArray['href']);
638
639 // Check options:
640 $attribArray_copy = $attribArray;
641 unset($attribArray_copy['href']);
642 unset($attribArray_copy['target']);
643 unset($attribArray_copy['class']);
644 unset($attribArray_copy['title']);
645 if ($attribArray_copy['rteerror']) { // Unset "rteerror" and "style" attributes if "rteerror" is set!
646 unset($attribArray_copy['style']);
647 unset($attribArray_copy['rteerror']);
648 }
649 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
650 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
651 $bTag='<link '.$info['url'].($info['query']?',0,'.$info['query']:'').($attribArray['target']?' '.$attribArray['target']:(($attribArray['class'] || $attribArray['title'])?' -':'')).($attribArray['class']?' '.$attribArray['class']:($attribArray['title']?' -':'')).($attribArray['title']?' "'.$attribArray['title'].'"':'').'>';
652 $eTag='</link>';
653 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
654 } else { // ... otherwise store the link as a-tag.
655 // Unsetting 'rtekeep' attribute if that had been set.
656 unset($attribArray['rtekeep']);
657 // If the url is local, remove url-prefix
658 $siteURL = $this->siteUrl();
659 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
660 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
661 }
662 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
663 $eTag='</a>';
664 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
665 }
666 }
667 }
668 return implode('',$blockSplit);
669 }
670
671 /**
672 * Transformation handler: 'ts_links' / direction: "rte"
673 * Converting <link tags> to <A>-tags
674 *
675 * @param string Content input
676 * @return string Content output
677 * @see TS_links_rte()
678 */
679 function TS_links_rte($value) {
680 $value = $this->TS_AtagToAbs($value);
681
682 // Split content by the TYPO3 pseudo tag "<link>":
683 $blockSplit = $this->splitIntoBlock('link',$value,1);
684 foreach($blockSplit as $k => $v) {
685 $error = '';
686 if ($k%2) { // block:
687 $tagCode = t3lib_div::unQuoteFilenames(trim(substr($this->getFirstTag($v),0,-1)),true);
688 $link_param = $tagCode[1];
689 $href = '';
690 $siteUrl = $this->siteUrl();
691 // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
692 if(strstr($link_param,'@')) { // mailadr
693 $href = 'mailto:'.eregi_replace('^mailto:','',$link_param);
694 } elseif (substr($link_param,0,1)=='#') { // check if anchor
695 $href = $siteUrl.$link_param;
696 } else {
697 $fileChar=intval(strpos($link_param, '/'));
698 $urlChar=intval(strpos($link_param, '.'));
699
700 // Detects if a file is found in site-root OR is a simulateStaticDocument.
701 list($rootFileDat) = explode('?',$link_param);
702 $rFD_fI = pathinfo($rootFileDat);
703 if (trim($rootFileDat) && !strstr($link_param,'/') && (@is_file(PATH_site.$rootFileDat) || t3lib_div::inList('php,html,htm',strtolower($rFD_fI['extension'])))) {
704 $href = $siteUrl.$link_param;
705 } elseif($urlChar && (strstr($link_param,'//') || !$fileChar || $urlChar<$fileChar)) { // url (external): If doubleSlash or if a '.' comes before a '/'.
706 if (!ereg('^[a-z]*://',trim(strtolower($link_param)))) {$scheme='http://';} else {$scheme='';}
707 $href = $scheme.$link_param;
708 } elseif($fileChar) { // file (internal)
709 $href = $siteUrl.$link_param;
710 } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
711 $link_params_parts = explode('#',$link_param);
712 $idPart = trim($link_params_parts[0]); // Link-data del
713 if (!strcmp($idPart,'')) { $idPart=$this->recPid; } // If no id or alias is given, set it to class record pid
714
715 // FIXME commented because useless - what is it for?
716 // if ($link_params_parts[1] && !$sectionMark) {
717 // $sectionMark = '#'.trim($link_params_parts[1]);
718 // }
719
720 // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
721 $pairParts = t3lib_div::trimExplode(',',$idPart);
722 if (count($pairParts)>1) {
723 $idPart = $pairParts[0];
724 // Type ? future support for?
725 }
726 // Checking if the id-parameter is an alias.
727 if (!t3lib_div::testInt($idPart)) {
728 list($idPartR) = t3lib_BEfunc::getRecordsByField('pages','alias',$idPart);
729 $idPart = intval($idPartR['uid']);
730 }
731 $page = t3lib_BEfunc::getRecord('pages', $idPart);
732 if (is_array($page)) { // Page must exist...
733 $pairParts = t3lib_div::trimExplode(',',$link_param);
734 $href = $siteUrl.'?id='.$pairParts[0].($pairParts[2]?$pairParts[2]:'');
735 } else if(strtolower(substr($link_param, 0, 7)) == 'record:') {
736 // linkHandler - allowing links to start with "record:"
737 $href = $link_param;
738 } else {
739 #$href = '';
740 $href = $siteUrl.'?id='.$link_param;
741 $error = 'No page found: '.$idPart;
742 }
743 }
744 }
745
746 // Setting the A-tag:
747 $bTag = '<a href="'.htmlspecialchars($href).'"'.
748 ($tagCode[2]&&$tagCode[2]!='-' ? ' target="'.htmlspecialchars($tagCode[2]).'"' : '').
749 ($tagCode[3]&&$tagCode[3]!='-' ? ' class="'.htmlspecialchars($tagCode[3]).'"' : '').
750 ($tagCode[4] ? ' title="'.htmlspecialchars($tagCode[4]).'"' : '').
751 ($error ? ' rteerror="'.htmlspecialchars($error).'" style="background-color: yellow; border:2px red solid; color: black;"' : ''). // Should be OK to add the style; the transformation back to databsae will remove it...
752 '>';
753 $eTag = '</a>';
754 $blockSplit[$k] = $bTag.$this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
755 }
756 }
757
758 // Return content:
759 return implode('',$blockSplit);
760 }
761
762 /**
763 * Preserve special tags
764 *
765 * @param string Content input
766 * @return string Content output
767 */
768 function TS_preserve_db($value) {
769 if (!$this->preserveTags) return $value;
770
771 // Splitting into blocks for processing (span-tags are used for special tags)
772 $blockSplit = $this->splitIntoBlock('span',$value);
773 foreach($blockSplit as $k => $v) {
774 if ($k%2) { // block:
775 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
776 if ($attribArray['specialtag']) {
777 $theTag = rawurldecode($attribArray['specialtag']);
778 $theTagName = $this->getFirstTagName($theTag);
779 $blockSplit[$k] = $theTag.$this->removeFirstAndLastTag($blockSplit[$k]).'</'.$theTagName.'>';
780 }
781 }
782 }
783 return implode('',$blockSplit);
784 }
785
786 /**
787 * Preserve special tags
788 *
789 * @param string Content input
790 * @return string Content output
791 */
792 function TS_preserve_rte($value) {
793 if (!$this->preserveTags) return $value;
794
795 $blockSplit = $this->splitIntoBlock($this->preserveTags,$value);
796 foreach($blockSplit as $k => $v) {
797 if ($k%2) { // block:
798 $blockSplit[$k] = '<span specialtag="'.rawurlencode($this->getFirstTag($v)).'">'.$this->removeFirstAndLastTag($blockSplit[$k]).'</span>';
799 }
800 }
801 return implode('',$blockSplit);
802 }
803
804 /**
805 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
806 * Cleaning (->db) for standard content elements (ts)
807 *
808 * @param string Content input
809 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
810 * @return string Content output
811 * @see TS_transform_rte()
812 */
813 function TS_transform_db($value,$css=FALSE) {
814
815 // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
816 $this->TS_transform_db_safecounter--;
817 if ($this->TS_transform_db_safecounter<0) return $value;
818
819 // Split the content from RTE by the occurence of these blocks:
820 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$value);
821
822 $cc=0;
823 $aC = count($blockSplit);
824
825 // Avoid superfluous linebreaks by transform_db after ending headListTag
826 while($aC && !strcmp(trim($blockSplit[$aC-1]),'')) {
827 unset($blockSplit[$aC-1]);
828 $aC = count($blockSplit);
829 }
830
831 // Traverse the blocks
832 foreach($blockSplit as $k => $v) {
833 $cc++;
834 $lastBR = $cc==$aC ? '' : chr(10);
835
836 if ($k%2) { // Inside block:
837
838 // Init:
839 $tag=$this->getFirstTag($v);
840 $tagName=strtolower($this->getFirstTagName($v));
841
842 // Process based on the tag:
843 switch($tagName) {
844 case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
845 case 'dd' : // Do the same on dd elements
846 case 'div': // Do the same on div sections, if they were splitted
847 $blockSplit[$k]=$tag.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
848 break;
849 case 'ol':
850 case 'ul': // Transform lists into <typolist>-tags:
851 if (!$css) {
852 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
853 $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
854 while(list($k2)=each($parts)) {
855 $parts[$k2]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/','',$parts[$k2]); // remove all linesbreaks!
856 $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
857 $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
858 $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
859 }
860 if ($tagName=='ol') { $params=' type="1"'; } else { $params=''; }
861 $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
862 }
863 } else {
864 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
865 }
866 break;
867 case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
868 if (!$this->procOptions['preserveTables'] && !$css) {
869 $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
870 } else {
871 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
872 }
873 break;
874 case 'h1':
875 case 'h2':
876 case 'h3':
877 case 'h4':
878 case 'h5':
879 case 'h6':
880 if (!$css) {
881 $attribArray=$this->get_tag_attributes_classic($tag);
882 // Processing inner content here:
883 $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
884
885 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
886 $type = intval(substr($tagName,1));
887 $blockSplit[$k]='<typohead'.
888 ($type!=6?' type="'.$type.'"':'').
889 ($attribArray['align']?' align="'.$attribArray['align'].'"':'').
890 ($attribArray['class']?' class="'.$attribArray['class'].'"':'').
891 '>'.
892 $innerContent.
893 '</typohead>'.
894 $lastBR;
895 } else {
896 $blockSplit[$k]='<'.$tagName.
897 ($attribArray['align']?' align="'.htmlspecialchars($attribArray['align']).'"':'').
898 ($attribArray['class']?' class="'.htmlspecialchars($attribArray['class']).'"':'').
899 '>'.
900 $innerContent.
901 '</'.$tagName.'>'.
902 $lastBR;
903 }
904 } else {
905 // Eliminate true linebreaks inside Hx tags
906 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
907 }
908 break;
909 default:
910 // Eliminate true linebreaks inside other headlist tags and after hr tag
911 $blockSplit[$k]=preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
912 break;
913 }
914 } else { // NON-block:
915 if (strcmp(trim($blockSplit[$k]),'')) {
916 $blockSplit[$k]=$this->divideIntoLines(preg_replace('/['.preg_quote(chr(10).chr(13)).']+/',' ',$blockSplit[$k])).$lastBR;
917 $blockSplit[$k]=$this->transformStyledATags($blockSplit[$k]);
918 } else unset($blockSplit[$k]);
919 }
920 }
921 $this->TS_transform_db_safecounter++;
922
923 return implode('',$blockSplit);
924 }
925
926 /**
927 * Wraps a-tags that contain a style attribute with a span-tag
928 *
929 * @param string Content input
930 * @return string Content output
931 */
932 function transformStyledATags($value) {
933 $blockSplit = $this->splitIntoBlock('A',$value);
934 foreach($blockSplit as $k => $v) {
935 if ($k%2) { // If an A-tag was found:
936 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
937 if ($attribArray['style']) { // If "style" attribute is set!
938 $attribArray_copy['style'] = $attribArray['style'];
939 unset($attribArray['style']);
940 $bTag='<span '.t3lib_div::implodeAttributes($attribArray_copy,1).'><a '.t3lib_div::implodeAttributes($attribArray,1).'>';
941 $eTag='</a></span>';
942 $blockSplit[$k] = $bTag.$this->removeFirstAndLastTag($blockSplit[$k]).$eTag;
943 }
944 }
945 }
946 return implode('',$blockSplit);
947 }
948
949 /**
950 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
951 * Set (->rte) for standard content elements (ts)
952 *
953 * @param string Content input
954 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
955 * @return string Content output
956 * @see TS_transform_db()
957 */
958 function TS_transform_rte($value,$css=0) {
959
960 // Split the content from Database by the occurence of these blocks:
961 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$value);
962
963 // Traverse the blocks
964 foreach($blockSplit as $k => $v) {
965 if ($k%2) { // Inside one of the blocks:
966
967 // Init:
968 $tag = $this->getFirstTag($v);
969 $tagName = strtolower($this->getFirstTagName($v));
970 $attribArray = $this->get_tag_attributes_classic($tag);
971
972 // Based on tagname, we do transformations:
973 switch($tagName) {
974 case 'blockquote': // Keep blockquotes
975 case 'dd': // Keep definitions
976 case 'div': // Keep div sections, if they were splitted
977 $blockSplit[$k] = $tag.
978 $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
979 '</'.$tagName.'>';
980 break;
981 case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
982 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
983 $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
984 $tListContent = ereg_replace('^[ ]*'.chr(10),'',$tListContent);
985 $tListContent = ereg_replace(chr(10).'[ ]*$','',$tListContent);
986 $lines = explode(chr(10),$tListContent);
987 $typ = $attribArray['type']==1 ? 'ol' : 'ul';
988 $blockSplit[$k] = '<'.$typ.'>'.chr(10).
989 '<li>'.implode('</li>'.chr(10).'<li>',$lines).'</li>'.
990 '</'.$typ.'>';
991 }
992 break;
993 case 'typohead': // Transform typohead into Hx tags.
994 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
995 $tC = $this->removeFirstAndLastTag($blockSplit[$k]);
996 $typ = t3lib_div::intInRange($attribArray['type'],0,6);
997 if (!$typ) $typ=6;
998 $align = $attribArray['align']?' align="'.$attribArray['align'].'"': '';
999 $class = $attribArray['class']?' class="'.$attribArray['class'].'"': '';
1000 $blockSplit[$k] = '<h'.$typ.$align.$class.'>'.
1001 $tC.
1002 '</h'.$typ.'>';
1003 }
1004 break;
1005 }
1006 $blockSplit[$k+1] = ereg_replace('^[ ]*'.chr(10),'',$blockSplit[$k+1]); // Removing linebreak if typohead
1007 } else { // NON-block:
1008 $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
1009 $singleLineBreak = $blockSplit[$k]==chr(10);
1010 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.($this->procOptions['preserveDIVSections']?'DIV,':'').$this->blockElementList,$nextFTN)) { // Removing linebreak if typolist/typohead
1011 $blockSplit[$k] = ereg_replace(chr(10).'[ ]*$','',$blockSplit[$k]);
1012 }
1013 // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
1014 if (!strcmp($blockSplit[$k],'') && !$singleLineBreak) {
1015 unset($blockSplit[$k]);
1016 } else {
1017 $blockSplit[$k] = $this->setDivTags($blockSplit[$k],($this->procOptions['useDIVasParagraphTagForRTE']?'div':'p'));
1018 }
1019 }
1020 }
1021 return implode(chr(10),$blockSplit);
1022 }
1023
1024 /**
1025 * Transformation handler: 'ts_strip' / direction: "db"
1026 * Removing all non-allowed tags
1027 *
1028 * @param string Content input
1029 * @return string Content output
1030 */
1031 function TS_strip_db($value) {
1032 $value = strip_tags($value,'<'.implode('><',explode(',','b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')).'>');
1033 return $value;
1034 }
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049 /***************************************************************
1050 *
1051 * Generic RTE transformation, analysis and helper functions
1052 *
1053 **************************************************************/
1054
1055 /**
1056 * Reads the file or url $url and returns the content
1057 *
1058 * @param string Filepath/URL to read
1059 * @return string The content from the resource given as input.
1060 * @see t3lib_div::getURL()
1061 */
1062 function getURL($url) {
1063 return t3lib_div::getURL($url);
1064 }
1065
1066 /**
1067 * Function for cleaning content going into the database.
1068 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
1069 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
1070 *
1071 * @param string Content to clean up
1072 * @param string Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
1073 * @return string Clean content
1074 * @see getKeepTags()
1075 */
1076 function HTMLcleaner_db($content,$tagList='') {
1077 if (!$tagList) {
1078 $keepTags = $this->getKeepTags('db');
1079 } else {
1080 $keepTags = $this->getKeepTags('db',$tagList);
1081 }
1082 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
1083 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
1084
1085 // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
1086 $addConfig=array();
1087 if ((is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning'])) {
1088 $addConfig['xhtml']=1;
1089 }
1090
1091 return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC,$addConfig);
1092 }
1093
1094 /**
1095 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
1096 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
1097 *
1098 * @param string The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
1099 * @param string Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
1100 * @return array Configuration array
1101 * @see HTMLcleaner_db()
1102 */
1103 function getKeepTags($direction='rte',$tagList='') {
1104 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1105
1106 // Setting up allowed tags:
1107 if (strcmp($tagList,'')) { // If the $tagList input var is set, this will take precedence
1108 $keepTags = array_flip(t3lib_div::trimExplode(',',$tagList,1));
1109 } else { // Default is to get allowed/denied tags from internal array of processing options:
1110 // Construct default list of tags to keep:
1111 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1112 $keepTags = array_flip(t3lib_div::trimExplode(',',$typoScript_list.','.strtolower($this->procOptions['allowTags']),1));
1113
1114 // For tags to deny, remove them from $keepTags array:
1115 $denyTags = t3lib_div::trimExplode(',',$this->procOptions['denyTags'],1);
1116 foreach($denyTags as $dKe) {
1117 unset($keepTags[$dKe]);
1118 }
1119 }
1120
1121 // Based on the direction of content, set further options:
1122 switch ($direction) {
1123
1124 // GOING from database to Rich Text Editor:
1125 case 'rte':
1126
1127 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1128 // Transform bold/italics tags to strong/em
1129 if (isset($keepTags['b'])) {$keepTags['b']=array('remap'=>'STRONG');}
1130 if (isset($keepTags['i'])) {$keepTags['i']=array('remap'=>'EM');}
1131 }
1132
1133 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1134 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
1135 break;
1136
1137 // GOING from RTE to database:
1138 case 'db':
1139
1140 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1141 // Transform strong/em back to bold/italics:
1142 if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
1143 if (isset($keepTags['em'])) { $keepTags['em']=array('remap'=>'i'); }
1144 }
1145
1146 // Setting up span tags if they are allowed:
1147 if (isset($keepTags['span'])) {
1148 $classes=array_merge(array(''),$this->allowedClasses);
1149 $keepTags['span']=array(
1150 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir',
1151 'fixAttrib' => Array(
1152 'class' => Array (
1153 'list' => $classes,
1154 'removeIfFalse' => 1
1155 ),
1156 ),
1157 'rmTagIfNoAttrib' => 1
1158 );
1159 if (!$this->procOptions['allowedClasses']) unset($keepTags['span']['fixAttrib']['class']['list']);
1160 }
1161
1162 // Setting up font tags if they are allowed:
1163 if (isset($keepTags['font'])) {
1164 $colors=array_merge(array(''),t3lib_div::trimExplode(',',$this->procOptions['allowedFontColors'],1));
1165 $keepTags['font']=array(
1166 'allowedAttribs'=>'face,color,size',
1167 'fixAttrib' => Array(
1168 'face' => Array (
1169 'removeIfFalse' => 1
1170 ),
1171 'color' => Array (
1172 'removeIfFalse' => 1,
1173 'list'=>$colors
1174 ),
1175 'size' => Array (
1176 'removeIfFalse' => 1,
1177 )
1178 ),
1179 'rmTagIfNoAttrib' => 1
1180 );
1181 if (!$this->procOptions['allowedFontColors']) unset($keepTags['font']['fixAttrib']['color']['list']);
1182 }
1183
1184 // Setting further options, getting them from the processiong options:
1185 $TSc = $this->procOptions['HTMLparser_db.'];
1186 if (!$TSc['globalNesting']) $TSc['globalNesting']='b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1187 if (!$TSc['noAttrib']) $TSc['noAttrib']='b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1188
1189 // Transforming the array from TypoScript to regular array:
1190 list($keepTags) = $this->HTMLparserConfig($TSc,$keepTags);
1191 break;
1192 }
1193
1194 // Caching (internally, in object memory) the result unless tagList is set:
1195 if (!$tagList) {
1196 $this->getKeepTags_cache[$direction] = $keepTags;
1197 } else {
1198 return $keepTags;
1199 }
1200 }
1201
1202 // Return result:
1203 return $this->getKeepTags_cache[$direction];
1204 }
1205
1206 /**
1207 * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by chr(10).
1208 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1209 * The function ->setDivTags does the opposite.
1210 * This function processes content to go into the database.
1211 *
1212 * @param string Value to process.
1213 * @param integer Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1214 * @param boolean If true, an array with the lines is returned, otherwise a string of the processed input value.
1215 * @return string Processed input value.
1216 * @see setDivTags()
1217 */
1218 function divideIntoLines($value,$count=5,$returnArray=FALSE) {
1219
1220 // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
1221 if ($this->procOptions['internalizeFontTags']) {$value = $this->internalizeFontTags($value);}
1222
1223 // Setting configuration for processing:
1224 $allowTagsOutside = t3lib_div::trimExplode(',',strtolower($this->procOptions['allowTagsOutside']?$this->procOptions['allowTagsOutside']:'img'),1);
1225 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1226 $divSplit = $this->splitIntoBlock('div,p',$value,1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1227
1228 if ($this->procOptions['keepPDIVattribs']) {
1229 $keepAttribListArr = t3lib_div::trimExplode(',',strtolower($this->procOptions['keepPDIVattribs']),1);
1230 } else {
1231 $keepAttribListArr = array();
1232 }
1233
1234 // Returns plainly the value if there was no div/p sections in it
1235 if (count($divSplit)<=1 || $count<=0) {
1236 return $value;
1237 }
1238
1239 // Traverse the splitted sections:
1240 foreach($divSplit as $k => $v) {
1241 if ($k%2) { // Inside
1242 $v=$this->removeFirstAndLastTag($v);
1243
1244 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1245 $subLines = $this->divideIntoLines($v,$count-1,1);
1246 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1247 // No noting.
1248 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1249 $subLines = array($subLines);
1250 if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1251 $subLines = spliti('<br[[:space:]]*[\/]?>',$v);
1252 }
1253
1254 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1255 reset($subLines);
1256 while(list($sk)=each($subLines)) {
1257
1258 // Clear up the subline for DB.
1259 $subLines[$sk]=$this->HTMLcleaner_db($subLines[$sk]);
1260
1261 // Get first tag, attributes etc:
1262 $fTag = $this->getFirstTag($divSplit[$k]);
1263 $tagName=strtolower($this->getFirstTagName($divSplit[$k]));
1264 $attribs=$this->get_tag_attributes($fTag);
1265
1266 // Keep attributes (lowercase)
1267 $newAttribs=array();
1268 if (count($keepAttribListArr)) {
1269 foreach($keepAttribListArr as $keepA) {
1270 if (isset($attribs[0][$keepA])) { $newAttribs[$keepA] = $attribs[0][$keepA]; }
1271 }
1272 }
1273
1274 // ALIGN attribute:
1275 if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']),'') && strtolower($attribs[0]['align'])!='left') { // Set to value, but not 'left'
1276 $newAttribs['align']=strtolower($attribs[0]['align']);
1277 }
1278
1279 // CLASS attribute:
1280 if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']),'')) { // Set to whatever value
1281 if (!count($this->allowedClasses) || in_array(strtoupper($attribs[0]['class']),$this->allowedClasses)) {
1282 $newAttribs['class']=$attribs[0]['class'];
1283 }
1284 }
1285
1286 // Remove any line break char (10 or 13)
1287 $subLines[$sk]=ereg_replace(chr(10).'|'.chr(13),'',$subLines[$sk]);
1288
1289 // If there are any attributes or if we are supposed to remap the tag, then do so:
1290 if (count($newAttribs) && strcmp($remapParagraphTag,'1')) {
1291 if ($remapParagraphTag=='P') $tagName='p';
1292 if ($remapParagraphTag=='DIV') $tagName='div';
1293 $subLines[$sk]='<'.trim($tagName.' '.$this->compileTagAttribs($newAttribs)).'>'.$subLines[$sk].'</'.$tagName.'>';
1294 }
1295 }
1296 }
1297 // Add the processed line(s)
1298 $divSplit[$k] = implode(chr(10),$subLines);
1299
1300 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1301 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1302 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1303 if (trim(strip_tags($divSplit[$k]))=='&nbsp;' && !preg_match('/\<(img)(\s[^>]*)?\/?>/si', $divSplit[$k]) && !preg_match('/\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1304 $divSplit[$k]='';
1305 }
1306 } else { // outside div:
1307 // Remove positions which are outside div/p tags and without content
1308 $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
1309 if (!strcmp($divSplit[$k],'')) unset($divSplit[$k]); // Remove part if it's empty
1310 }
1311 }
1312
1313 // Return value:
1314 return $returnArray ? $divSplit : implode(chr(10),$divSplit);
1315 }
1316
1317 /**
1318 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1319 * For processing of content going FROM database TO RTE.
1320 *
1321 * @param string Value to convert
1322 * @param string Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1323 * @return string Processed value.
1324 * @see divideIntoLines()
1325 */
1326 function setDivTags($value,$dT='p') {
1327
1328 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1329 $keepTags = $this->getKeepTags('rte');
1330 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
1331 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
1332 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte']?1:0;
1333
1334 // Divide the content into lines, based on chr(10):
1335 $parts = explode(chr(10),$value);
1336 foreach($parts as $k => $v) {
1337
1338 // Processing of line content:
1339 if (!strcmp(trim($parts[$k]),'')) { // If the line is blank, set it to &nbsp;
1340 $parts[$k]='&nbsp;';
1341 } else { // Clean the line content:
1342 $parts[$k]=$this->HTMLcleaner($parts[$k],$keepTags,$kUknown,$hSC);
1343 if ($convNBSP) $parts[$k]=str_replace('&amp;nbsp;','&nbsp;',$parts[$k]);
1344 }
1345
1346 // Wrapping the line in <$dT> is not already wrapped:
1347 $testStr = strtolower(trim($parts[$k]));
1348 if (substr($testStr,0,4)!='<div' || substr($testStr,-6)!='</div>') {
1349 if (substr($testStr,0,2)!='<p' || substr($testStr,-4)!='</p>') {
1350 // Only set p-tags if there is not already div or p tags:
1351 $parts[$k]='<'.$dT.'>'.$parts[$k].'</'.$dT.'>';
1352 }
1353 }
1354 }
1355
1356 // Implode result:
1357 return implode(chr(10),$parts);
1358 }
1359
1360 /**
1361 * This splits the $value in font-tag chunks.
1362 * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
1363 * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
1364 * In that case the font-tags are normally on the OUTSIDE of the sections.
1365 * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
1366 *
1367 * @param string Input content
1368 * @return string Output content
1369 * @see divideIntoLines()
1370 */
1371 function internalizeFontTags($value) {
1372
1373 // Splitting into font tag blocks:
1374 $fontSplit = $this->splitIntoBlock('font',$value);
1375
1376 foreach($fontSplit as $k => $v) {
1377 if ($k%2) { // Inside
1378 $fTag = $this->getFirstTag($v); // Fint font-tag
1379
1380 $divSplit_sub = $this->splitIntoBlock('div,p',$this->removeFirstAndLastTag($v),1);
1381 if (count($divSplit_sub)>1) { // If there were div/p sections inside the font-tag, do something about it...
1382 // traverse those sections:
1383 foreach($divSplit_sub as $k2 => $v2) {
1384 if ($k2%2) { // Inside
1385 $div_p = $this->getFirstTag($v2); // Fint font-tag
1386 $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
1387 $v2=$this->removeFirstAndLastTag($v2); // ... and remove it from original.
1388 $divSplit_sub[$k2]=$div_p.$fTag.$v2.'</font>'.'</'.$div_p_tagname.'>';
1389 } elseif (trim(strip_tags($v2))) {
1390 $divSplit_sub[$k2]=$fTag.$v2.'</font>';
1391 }
1392 }
1393 $fontSplit[$k]=implode('',$divSplit_sub);
1394 }
1395 }
1396 }
1397
1398 return implode('',$fontSplit);
1399 }
1400
1401 /**
1402 * Returns SiteURL based on thisScript.
1403 *
1404 * @return string Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1405 * @see t3lib_div::getIndpEnv()
1406 */
1407 function siteUrl() {
1408 return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1409 }
1410
1411 /**
1412 * Return the storage folder of RTE image files.
1413 * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
1414 *
1415 * @return string
1416 */
1417 function rteImageStorageDir() {
1418 return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
1419 }
1420
1421 /**
1422 * Remove all tables from incoming code
1423 * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
1424 *
1425 * @param string Input value
1426 * @param string Break character to use for linebreaks.
1427 * @return string Output value
1428 */
1429 function removeTables($value,$breakChar='<br />') {
1430
1431 // Splitting value into table blocks:
1432 $tableSplit = $this->splitIntoBlock('table',$value);
1433
1434 // Traverse blocks of tables:
1435 foreach($tableSplit as $k => $v) {
1436 if ($k%2) {
1437 $tableSplit[$k]='';
1438 $rowSplit = $this->splitIntoBlock('tr',$v);
1439 foreach($rowSplit as $k2 => $v2) {
1440 if ($k2%2) {
1441 $cellSplit = $this->getAllParts($this->splitIntoBlock('td',$v2),1,0);
1442 foreach($cellSplit as $k3 => $v3) {
1443 $tableSplit[$k].=$v3.$breakChar;
1444 }
1445 }
1446 }
1447 }
1448 }
1449
1450 // Implode it all again:
1451 return implode($breakChar,$tableSplit);
1452 }
1453
1454 /**
1455 * Default tag mapping for TS
1456 *
1457 * @param string Input code to process
1458 * @param string Direction To databsae (db) or from database to RTE (rte)
1459 * @return string Processed value
1460 */
1461 function defaultTStagMapping($code,$direction='rte') {
1462 if ($direction=='db') {
1463 $code=$this->mapTags($code,array( // Map tags
1464 'strong' => 'b',
1465 'em' => 'i'
1466 ));
1467 }
1468 if ($direction=='rte') {
1469 $code=$this->mapTags($code,array( // Map tags
1470 'b' => 'strong',
1471 'i' => 'em'
1472 ));
1473 }
1474 return $code;
1475 }
1476
1477 /**
1478 * Finds width and height from attrib-array
1479 * If the width and height is found in the style-attribute, use that!
1480 *
1481 * @param array Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1482 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1483 */
1484 function getWHFromAttribs($attribArray) {
1485 $style =trim($attribArray['style']);
1486 if ($style) {
1487 $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1488 // Width
1489 $reg = array();
1490 eregi('width'.$regex,$style,$reg);
1491 $w = intval($reg[1]);
1492 // Height
1493 eregi('height'.$regex,$style,$reg);
1494 $h = intval($reg[1]);
1495 }
1496 if (!$w) {
1497 $w = $attribArray['width'];
1498 }
1499 if (!$h) {
1500 $h = $attribArray['height'];
1501 }
1502 return array(intval($w),intval($h));
1503 }
1504
1505 /**
1506 * Parse <A>-tag href and return status of email,external,file or page
1507 *
1508 * @param string URL to analyse.
1509 * @return array Information in an array about the URL
1510 */
1511 function urlInfoForLinkTags($url) {
1512 $info = array();
1513 $url = trim($url);
1514 if (substr(strtolower($url),0,7)=='mailto:') {
1515 $info['url']=trim(substr($url,7));
1516 $info['type']='email';
1517 } else {
1518 $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
1519 for($a=0;$a<strlen($url);$a++) {
1520 if ($url{$a}!=$curURL{$a}) {
1521 break;
1522 }
1523 }
1524
1525 $info['relScriptPath']=substr($curURL,$a);
1526 $info['relUrl']=substr($url,$a);
1527 $info['url']=$url;
1528 $info['type']='ext';
1529
1530 $siteUrl_parts = parse_url($url);
1531 $curUrl_parts = parse_url($curURL);
1532
1533 if ($siteUrl_parts['host']==$curUrl_parts['host'] // Hosts should match
1534 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'],0,strlen(TYPO3_mainDir))==TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
1535
1536 // New processing order 100502
1537 $uP=parse_url($info['relUrl']);
1538
1539 if (!strcmp('#'.$siteUrl_parts['fragment'],$info['relUrl'])) {
1540 $info['url']=$info['relUrl'];
1541 $info['type']='anchor';
1542 } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php')) {
1543 $pp = explode('id=',$uP['query']);
1544 $parameters = explode('&', $pp[1]);
1545 $id = array_shift($parameters);
1546 if ($id) {
1547 $info['pageid']=$id;
1548 $info['cElement']=$uP['fragment'];
1549 $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
1550 $info['type']='page';
1551 $info['query'] = $parameters[0]?'&'.implode('&', $parameters):'';
1552 }
1553 } else {
1554 $info['url']=$info['relUrl'];
1555 $info['type']='file';
1556 }
1557 } else {
1558 unset($info['relScriptPath']);
1559 unset($info['relUrl']);
1560 }
1561 }
1562 return $info;
1563 }
1564
1565 /**
1566 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1567 *
1568 * @param string Content input
1569 * @param boolean If true, then the "rtekeep" attribute will not be set.
1570 * @return string Content output
1571 */
1572 function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE) {
1573 $blockSplit = $this->splitIntoBlock('A',$value);
1574 reset($blockSplit);
1575 while(list($k,$v)=each($blockSplit)) {
1576 if ($k%2) { // block:
1577 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
1578
1579 // Checking if there is a scheme, and if not, prepend the current url.
1580 if (strlen($attribArray['href'])) { // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1581 $uP = parse_url(strtolower($attribArray['href']));
1582 if (!$uP['scheme']) {
1583 $attribArray['href'] = $this->siteUrl().substr($attribArray['href'],strlen($this->relBackPath));
1584 }
1585 } else {
1586 $attribArray['rtekeep'] = 1;
1587 }
1588 if (!$dontSetRTEKEEP) $attribArray['rtekeep'] = 1;
1589
1590 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
1591 $eTag='</a>';
1592 $blockSplit[$k] = $bTag.$this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
1593 }
1594 }
1595 return implode('',$blockSplit);
1596 }
1597 }
1598
1599
1600 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']) {
1601 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
1602 }
1603
1604 ?>