* Added "rtehtmlarea" as a system extension
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 December/2003 by Kasper Skaarhoj
32 * XHTML compatible.
33 *
34 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
35 * @internal
36 */
37 /**
38 * [CLASS/FUNCTION INDEX of SCRIPT]
39 *
40 *
41 *
42 * 102: class t3lib_parsehtml_proc extends t3lib_parsehtml
43 * 137: function init($elRef='',$recPid=0)
44 * 149: function setRelPath($path)
45 * 173: function evalWriteFile($pArr,$currentRecord)
46 *
47 * SECTION: Main function
48 * 231: function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
49 *
50 * SECTION: Specific RTE TRANSFORMATION functions
51 * 397: function TS_images_db($value)
52 * 538: function TS_images_rte($value)
53 * 572: function TS_reglinks($value,$direction)
54 * 606: function TS_links_db($value)
55 * 654: function TS_links_rte($value)
56 * 735: function TS_preserve_db($value)
57 * 759: function TS_preserve_rte($value)
58 * 780: function TS_transform_db($value,$css=FALSE)
59 * 891: function TS_transform_rte($value,$css=0)
60 * 962: function TS_strip_db($value)
61 *
62 * SECTION: Generic RTE transformation, analysis and helper functions
63 * 993: function getURL($url)
64 * 1007: function HTMLcleaner_db($content,$tagList='')
65 * 1028: function getKeepTags($direction='rte',$tagList='')
66 * 1137: function divideIntoLines($value,$count=5,$returnArray=FALSE)
67 * 1241: function setDivTags($value,$dT='p')
68 * 1286: function internalizeFontTags($value)
69 * 1322: function siteUrl()
70 * 1332: function rteImageStorageDir()
71 * 1344: function removeTables($value,$breakChar='<br />')
72 * 1376: function defaultTStagMapping($code,$direction='rte')
73 * 1399: function getWHFromAttribs($attribArray)
74 * 1425: function urlInfoForLinkTags($url)
75 * 1484: function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
76 *
77 * TOTAL FUNCTIONS: 27
78 * (This index is automatically created/updated by the extension "extdeveval")
79 *
80 */
81
82 require_once (PATH_t3lib.'class.t3lib_parsehtml.php');
83
84
85
86
87
88
89
90
91
92
93
94
95 /**
96 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
97 *
98 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
99 * @package TYPO3
100 * @subpackage t3lib
101 */
102 class t3lib_parsehtml_proc extends t3lib_parsehtml {
103
104 // Static:
105 var $headListTags = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,HR,ADDRESS,DL'; // List of tags for these elements
106
107 // Internal, static:
108 var $recPid = 0; // Set this to the pid of the record manipulated by the class.
109 var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
110 var $relPath=''; // Relative path
111 var $relBackPath=''; // Relative back-path
112 var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
113
114 // Internal, dynamic
115 var $TS_transform_db_safecounter=100; // Run-away brake for recursive calls.
116 var $rte_p=''; // Parameters from TCA types configuration related to the RTE
117 var $getKeepTags_cache=array(); // Data caching for processing function
118 var $allowedClasses=array(); // Storage of the allowed CSS class names in the RTE
119 var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
120
121
122
123
124
125
126
127
128
129
130 /**
131 * Initialize, setting element reference and record PID
132 *
133 * @param string Element reference, eg "tt_content:bodytext"
134 * @param integer PID of the record (page id)
135 * @return void
136 */
137 function init($elRef='',$recPid=0) {
138 $this->recPid = $recPid;
139 $this->elRef = $elRef;
140 }
141
142 /**
143 * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
144 * This is used when editing files with the RTE
145 *
146 * @param string The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
147 * @return void There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
148 */
149 function setRelPath($path) {
150 $path = trim($path);
151 $path = ereg_replace('^/','',$path);
152 $path = ereg_replace('/$','',$path);
153 if ($path) {
154 $this->relPath = $path;
155 $this->relBackPath = '';
156 $partsC=count(explode('/',$this->relPath));
157 for ($a=0;$a<$partsC;$a++) {
158 $this->relBackPath.='../';
159 }
160 $this->relPath.='/';
161 }
162 }
163
164 /**
165 * Evaluate the environment for editing a staticFileEdit file.
166 * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
167 *
168 * @param array Parameters for the current field as found in types-config
169 * @param array Current record we are editing.
170 * @return mixed On success an array with various information is returned, otherwise a string with an error message
171 * @see t3lib_TCEmain, t3lib_transferData
172 */
173 function evalWriteFile($pArr,$currentRecord) {
174
175 // Write file configuration:
176 if (is_array($pArr)) {
177 if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
178 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'],-1)=='/'
179 && @is_dir(PATH_site.$GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
180
181 $SW_p = $pArr['parameters'];
182 $SW_editFileField = trim($SW_p[0]);
183 $SW_editFile = $currentRecord[$SW_editFileField];
184 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
185 $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'].$SW_editFile;
186 $SW_editFile = PATH_site.$SW_relpath;
187 if (@is_file($SW_editFile)) {
188 return array(
189 'editFile' => $SW_editFile,
190 'relEditFile' => $SW_relpath,
191 'contentField' => trim($SW_p[1]),
192 'markerField' => trim($SW_p[2]),
193 'loadFromFileField' => trim($SW_p[3]),
194 'statusField' => trim($SW_p[4])
195 );
196 } else return "ERROR: Editfile '".$SW_relpath."' did not exist";
197 } else return "ERROR: Edit file name could not be found or was bad.";
198 } else return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
199 }
200 }
201
202
203
204
205
206
207
208
209
210
211
212
213
214 /**********************************************
215 *
216 * Main function
217 *
218 **********************************************/
219
220 /**
221 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
222 * This is the main function called from tcemain and transfer data classes
223 *
224 * @param string Input value
225 * @param array Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
226 * @param string Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
227 * @param array Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
228 * @return string Output value
229 * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
230 */
231 function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array()) {
232
233 // Init:
234 $this->procOptions = $thisConfig['proc.'];
235 $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
236
237 // Get parameters for rte_transformation:
238 $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
239
240 // Setting modes:
241 if (strcmp($this->procOptions['overruleMode'],'')) {
242 $modes = array_unique(t3lib_div::trimExplode(',',$this->procOptions['overruleMode']));
243 } else {
244 $modes = array_unique(t3lib_div::trimExplode('-',$p['mode']));
245 }
246 $revmodes = array_flip($modes);
247
248 // Find special modes and extract them:
249 if (isset($revmodes['ts'])) {
250 $modes[$revmodes['ts']] = 'ts_transform,ts_preserve,ts_images,ts_links';
251 }
252 // Find special modes and extract them:
253 if (isset($revmodes['ts_css'])) {
254 $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
255 }
256
257 // Make list unique
258 $modes = array_unique(t3lib_div::trimExplode(',',implode(',',$modes),1));
259
260 // Reverse order if direction is "rte"
261 if ($direction=='rte') {
262 $modes = array_reverse($modes);
263 }
264
265 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
266 $entry_HTMLparser = $this->procOptions['entryHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_'.$direction.'.']) : '';
267 $exit_HTMLparser = $this->procOptions['exitHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_'.$direction.'.']) : '';
268
269 // Line breaks of content is unified into char-10 only (removing char 13)
270 if (!$this->procOptions['disableUnifyLineBreaks']) {
271 $value = str_replace(chr(13).chr(10),chr(10),$value);
272 }
273
274 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
275 if (is_array($entry_HTMLparser)) {
276 $value = $this->HTMLcleaner($value,$entry_HTMLparser[0],$entry_HTMLparser[1],$entry_HTMLparser[2],$entry_HTMLparser[3]);
277 }
278
279 // Traverse modes:
280 foreach($modes as $cmd) {
281 // ->DB
282 if ($direction=='db') {
283 // Checking for user defined transformation:
284 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
285 $_procObj = &t3lib_div::getUserObj($_classRef);
286 $_procObj->pObj = &$this;
287 $_procObj->transformationKey = $cmd;
288 $value = $_procObj->transform_db($value,$this);
289 } else { // ... else use defaults:
290 switch($cmd) {
291 case 'ts_images':
292 $value = $this->TS_images_db($value);
293 break;
294 case 'ts_reglinks':
295 $value = $this->TS_reglinks($value,'db');
296 break;
297 case 'ts_links':
298 $value = $this->TS_links_db($value);
299 break;
300 case 'ts_preserve':
301 $value = $this->TS_preserve_db($value);
302 break;
303 case 'ts_transform':
304 case 'css_transform':
305 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
306 $this->allowedClasses = t3lib_div::trimExplode(',',strtoupper($this->procOptions['allowedClasses']),1);
307 $value = $this->TS_transform_db($value,$cmd=='css_transform');
308 break;
309 case 'ts_strip':
310 $value = $this->TS_strip_db($value);
311 break;
312 default:
313 break;
314 }
315 }
316 }
317 // ->RTE
318 if ($direction=='rte') {
319 // Checking for user defined transformation:
320 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
321 $_procObj = &t3lib_div::getUserObj($_classRef);
322 $_procObj->pObj = &$this;
323 $value = $_procObj->transform_rte($value,$this);
324 } else { // ... else use defaults:
325 switch($cmd) {
326 case 'ts_images':
327 $value = $this->TS_images_rte($value);
328 break;
329 case 'ts_reglinks':
330 $value = $this->TS_reglinks($value,'rte');
331 break;
332 case 'ts_links':
333 $value = $this->TS_links_rte($value);
334 break;
335 case 'ts_preserve':
336 $value = $this->TS_preserve_rte($value);
337 break;
338 case 'ts_transform':
339 case 'css_transform':
340 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
341 $value = $this->TS_transform_rte($value,$cmd=='css_transform');
342 break;
343 default:
344 break;
345 }
346 }
347 }
348 }
349
350 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
351 if (is_array($exit_HTMLparser)) {
352 $value = $this->HTMLcleaner($value,$exit_HTMLparser[0],$exit_HTMLparser[1],$exit_HTMLparser[2],$exit_HTMLparser[3]);
353 }
354
355 // Final clean up of linebreaks:
356 if (!$this->procOptions['disableUnifyLineBreaks']) {
357 $value = str_replace(chr(13).chr(10),chr(10),$value); // Make sure no \r\n sequences has entered in the meantime...
358 $value = str_replace(chr(10),chr(13).chr(10),$value); // ... and then change all \n into \r\n
359 }
360
361 // Return value:
362 return $value;
363 }
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380 /************************************
381 *
382 * Specific RTE TRANSFORMATION functions
383 *
384 *************************************/
385
386 /**
387 * Transformation handler: 'ts_images' / direction: "db"
388 * Processing images inserted in the RTE.
389 * This is used when content goes from the RTE to the database.
390 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
391 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
392 * Also "magic" images are processed here.
393 *
394 * @param string The content from RTE going to Database
395 * @return string Processed content
396 */
397 function TS_images_db($value) {
398
399 // Split content by <img> tags and traverse the resulting array for processing:
400 $imgSplit = $this->splitTags('img',$value);
401 foreach($imgSplit as $k => $v) {
402 if ($k%2) { // image found, do processing:
403
404 // Init
405 $attribArray = $this->get_tag_attributes_classic($v,1);
406 $siteUrl = $this->siteUrl();
407 $absRef = trim($attribArray['src']); // It's always a absolute URL coming from the RTE into the Database.
408
409 // External image from another URL? In that case, fetch image (unless disabled feature).
410 if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
411 $externalFile = $this->getUrl($absRef); // Get it
412 if ($externalFile) {
413 $pU = parse_url($absRef);
414 $pI=pathinfo($pU['path']);
415
416 if (t3lib_div::inList('gif,png,jpeg,jpg',strtolower($pI['extension']))) {
417 $filename = t3lib_div::shortMD5($absRef).'.'.$pI['extension'];
418 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
419 $C_origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
420 if (!@is_file($origFilePath)) {
421 t3lib_div::writeFile($origFilePath,$externalFile);
422 t3lib_div::writeFile($C_origFilePath,$externalFile);
423 }
424 $absRef = $siteUrl.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
425
426 $attribArray['src']=$absRef;
427 $params = t3lib_div::implodeAttributes($attribArray,1);
428 $imgSplit[$k] = '<img '.$params.' />';
429 }
430 }
431 }
432 // Check image as local file (siteURL equals the one of the image)
433 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
434 $path = rawurldecode(substr($absRef,strlen($siteUrl))); // Rel-path, rawurldecoded for special characters.
435 $filepath = t3lib_div::getFileAbsFileName($path); // Abs filepath, locked to relative path of this project.
436
437 // Check file existence (in relative dir to this installation!)
438 if ($filepath && @is_file($filepath)) {
439
440 // If "magic image":
441 $pathPre=$this->rteImageStorageDir().'RTEmagicC_';
442 if (t3lib_div::isFirstPartOfStr($path,$pathPre)) {
443 // Find original file:
444 $pI=pathinfo(substr($path,strlen($pathPre)));
445 $filename = substr($pI['basename'],0,-strlen('.'.$pI['extension']));
446 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
447 if (@is_file($origFilePath)) {
448 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
449 $imgObj->init();
450 $imgObj->mayScaleUp=0;
451 $imgObj->tempPath=PATH_site.$imgObj->tempPath;
452
453 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
454 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
455 // Compare dimensions:
456 if ($curWH[0]!=$curInfo[0] || $curWH[1]!=$curInfo[1]) {
457 $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
458 $cW = $curWH[0];
459 $cH = $curWH[1];
460 $cH = 1000; // Make the image based on the width solely...
461 $imgI = $imgObj->imageMagickConvert($origFilePath,$pI['extension'],$cW.'m',$cH.'m');
462 if ($imgI[3]) {
463 $fI=pathinfo($imgI[3]);
464 @copy($imgI[3],$filepath); // Override the child file
465 unset($attribArray['style']);
466 $attribArray['width']=$imgI[0];
467 $attribArray['height']=$imgI[1];
468 if (!$attribArray['border']) $attribArray['border']=0;
469 $params = t3lib_div::implodeAttributes($attribArray,1);
470 $imgSplit[$k]='<img '.$params.' />';
471 }
472 }
473 }
474
475 } elseif ($this->procOptions['plainImageMode']) { // If "plain image" has been configured:
476
477 // Image dimensions as set in the image tag
478 $curWH = $this->getWHFromAttribs($attribArray);
479 $attribArray['width'] = $curWH[0];
480 $attribArray['height'] = $curWH[1];
481
482 // Forcing values for style and border:
483 unset($attribArray['style']);
484 if (!$attribArray['border']) $attribArray['border'] = 0;
485
486 // Finding dimensions of image file:
487 $fI = @getimagesize($filepath);
488
489 // Perform corrections to aspect ratio based on configuration:
490 switch((string)$this->procOptions['plainImageMode']) {
491 case 'lockDimensions':
492 $attribArray['width']=$fI[0];
493 $attribArray['height']=$fI[1];
494 break;
495 case 'lockRatioWhenSmaller': // If the ratio has to be smaller, then first set the width...:
496 if ($attribArray['width']>$fI[0]) $attribArray['width'] = $fI[0];
497 case 'lockRatio':
498 if ($fI[0]>0) {
499 $attribArray['height']=round($attribArray['width']*($fI[1]/$fI[0]));
500 }
501 break;
502 }
503
504 // Compile the image tag again:
505 $params = t3lib_div::implodeAttributes($attribArray,1);
506 $imgSplit[$k]='<img '.$params.' />';
507 }
508 } else { // Remove image if it was not found in a proper position on the server!
509
510 // Commented out; removing the image tag might not be that logical...
511 #$imgSplit[$k]='';
512 }
513 }
514
515 // Convert abs to rel url
516 if ($imgSplit[$k]) {
517 $attribArray=$this->get_tag_attributes_classic($imgSplit[$k],1);
518 $absRef = trim($attribArray['src']);
519 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
520 $attribArray['src'] = $this->relBackPath.substr($absRef,strlen($siteUrl));
521 if (!isset($attribArray['alt'])) $attribArray['alt']=''; // Must have alt-attribute for XHTML compliance.
522 $imgSplit[$k]='<img '.t3lib_div::implodeAttributes($attribArray,1,1).' />';
523 }
524 }
525 }
526 }
527 return implode('',$imgSplit);
528 }
529
530 /**
531 * Transformation handler: 'ts_images' / direction: "rte"
532 * Processing images from database content going into the RTE.
533 * Processing includes converting the src attribute to an absolute URL.
534 *
535 * @param string Content input
536 * @return string Content output
537 */
538 function TS_images_rte($value) {
539
540 // Split content by <img> tags and traverse the resulting array for processing:
541 $imgSplit = $this->splitTags('img',$value);
542 foreach($imgSplit as $k => $v) {
543 if ($k%2) { // image found:
544
545 // Init
546 $attribArray=$this->get_tag_attributes_classic($v,1);
547 $siteUrl = $this->siteUrl();
548 $absRef = trim($attribArray['src']);
549
550 // Unless the src attribute is already pointing to an external URL:
551 if (strtolower(substr($absRef,0,4))!='http') {
552 $attribArray['src'] = $siteUrl.substr($attribArray['src'],strlen($this->relBackPath));
553 if (!isset($attribArray['alt'])) $attribArray['alt']='';
554 $params = t3lib_div::implodeAttributes($attribArray);
555 $imgSplit[$k]='<img '.$params.' />';
556 }
557 }
558 }
559
560 // return processed content:
561 return implode('',$imgSplit);
562 }
563
564 /**
565 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
566 * Converting <A>-tags to/from abs/rel
567 *
568 * @param string Content input
569 * @param string Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
570 * @return string Content output
571 */
572 function TS_reglinks($value,$direction) {
573 switch($direction) {
574 case 'rte':
575 return $this->TS_AtagToAbs($value,1);
576 break;
577 case 'db':
578 $siteURL = $this->siteUrl();
579 $blockSplit = $this->splitIntoBlock('A',$value);
580 reset($blockSplit);
581 while(list($k,$v)=each($blockSplit)) {
582 if ($k%2) { // block:
583 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
584 // If the url is local, remove url-prefix
585 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
586 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
587 }
588 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
589 $eTag='</a>';
590 $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
591 }
592 }
593 return implode('',$blockSplit);
594 break;
595 }
596 }
597
598 /**
599 * Transformation handler: 'ts_links' / direction: "db"
600 * Converting <A>-tags to <LINK tags>
601 *
602 * @param string Content input
603 * @return string Content output
604 * @see TS_links_rte()
605 */
606 function TS_links_db($value) {
607
608 // Split content into <a> tag blocks and process:
609 $blockSplit = $this->splitIntoBlock('A',$value);
610 foreach($blockSplit as $k => $v) {
611 if ($k%2) { // If an A-tag was found:
612 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
613 $info = $this->urlInfoForLinkTags($attribArray['href']);
614
615 // Check options:
616 $attribArray_copy = $attribArray;
617 unset($attribArray_copy['href']);
618 unset($attribArray_copy['target']);
619 unset($attribArray_copy['class']);
620 unset($attribArray_copy['title']);
621 if ($attribArray_copy['rteerror']) { // Unset "rteerror" and "style" attributes if "rteerror" is set!
622 unset($attribArray_copy['style']);
623 unset($attribArray_copy['rteerror']);
624 }
625 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
626 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
627 $bTag='<LINK '.$info['url'].($attribArray['target']?' '.$attribArray['target']:(($attribArray['class'] || $attribArray['title'])?' -':'')).($attribArray['class']?' '.$attribArray['class']:($attribArray['title']?' -':'')).($attribArray['title']?' "'.$attribArray['title'].'"':'').'>';
628 $eTag='</LINK>';
629 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
630 } else { // ... otherwise store the link as a-tag.
631 // Unsetting 'rtekeep' attribute if that had been set.
632 unset($attribArray['rtekeep']);
633 // If the url is local, remove url-prefix
634 $siteURL = $this->siteUrl();
635 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
636 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
637 }
638 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
639 $eTag='</a>';
640 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
641 }
642 }
643 }
644 return implode('',$blockSplit);
645 }
646
647 /**
648 * Transformation handler: 'ts_links' / direction: "rte"
649 * Converting <LINK tags> to <A>-tags
650 *
651 * @param string Content input
652 * @return string Content output
653 * @see TS_links_rte()
654 */
655 function TS_links_rte($value) {
656 $value = $this->TS_AtagToAbs($value);
657
658 // Split content by the TYPO3 pseudo tag "<LINK>":
659 $blockSplit = $this->splitIntoBlock('link',$value,1);
660 foreach($blockSplit as $k => $v) {
661 $error = '';
662 if ($k%2) { // block:
663 $tagCode = t3lib_div::unQuoteFilenames(trim(substr($this->getFirstTag($v),0,-1)),true);
664 $link_param = $tagCode[1];
665 $href = '';
666 $siteUrl = $this->siteUrl();
667 // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
668 if(strstr($link_param,'@')) { // mailadr
669 $href = 'mailto:'.eregi_replace('^mailto:','',$link_param);
670 } elseif (substr($link_param,0,1)=='#') { // check if anchor
671 $href = $siteUrl.$link_param;
672 } else {
673 $fileChar=intval(strpos($link_param, '/'));
674 $urlChar=intval(strpos($link_param, '.'));
675
676 // Detects if a file is found in site-root OR is a simulateStaticDocument.
677 list($rootFileDat) = explode('?',$link_param);
678 $rFD_fI = pathinfo($rootFileDat);
679 if (trim($rootFileDat) && !strstr($link_param,'/') && (@is_file(PATH_site.$rootFileDat) || t3lib_div::inList('php,html,htm',strtolower($rFD_fI['extension'])))) {
680 $href = $siteUrl.$link_param;
681 } elseif($urlChar && (strstr($link_param,'//') || !$fileChar || $urlChar<$fileChar)) { // url (external): If doubleSlash or if a '.' comes before a '/'.
682 if (!ereg('^[a-z]*://',trim(strtolower($link_param)))) {$scheme='http://';} else {$scheme='';}
683 $href = $scheme.$link_param;
684 } elseif($fileChar) { // file (internal)
685 $href = $siteUrl.$link_param;
686 } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
687 $link_params_parts = explode('#',$link_param);
688 $idPart = trim($link_params_parts[0]); // Link-data del
689 if (!strcmp($idPart,'')) { $idPart=$this->recPid; } // If no id or alias is given, set it to class record pid
690 if ($link_params_parts[1] && !$sectionMark) {
691 $sectionMark = '#'.trim($link_params_parts[1]);
692 }
693 // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
694 $pairParts = t3lib_div::trimExplode(',',$idPart);
695 if (count($pairParts)>1) {
696 $idPart = $pairParts[0];
697 // Type ? future support for?
698 }
699 // Checking if the id-parameter is an alias.
700 if (!t3lib_div::testInt($idPart)) {
701 list($idPartR) = t3lib_BEfunc::getRecordsByField('pages','alias',$idPart);
702 $idPart = intval($idPartR['uid']);
703 }
704 $page = t3lib_BEfunc::getRecord('pages', $idPart);
705 if (is_array($page)) { // Page must exist...
706 $href = $siteUrl.'?id='.$link_param;
707 } else {
708 #$href = '';
709 $href = $siteUrl.'?id='.$link_param;
710 $error = 'No page found: '.$idPart;
711 }
712 }
713 }
714
715 // Setting the A-tag:
716 $bTag = '<a href="'.htmlspecialchars($href).'"'.
717 ($tagCode[2]&&$tagCode[2]!='-' ? ' target="'.htmlspecialchars($tagCode[2]).'"' : '').
718 ($tagCode[3]&&$tagCode[3]!='-' ? ' class="'.htmlspecialchars($tagCode[3]).'"' : '').
719 ($tagCode[4] ? ' title="'.htmlspecialchars($tagCode[4]).'"' : '').
720 ($error ? ' rteerror="'.htmlspecialchars($error).'" style="background-color: yellow; border:2px red solid; color: black;"' : ''). // Should be OK to add the style; the transformation back to databsae will remove it...
721 '>';
722 $eTag = '</a>';
723 $blockSplit[$k] = $bTag.$this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
724 }
725 }
726
727 // Return content:
728 return implode('',$blockSplit);
729 }
730
731 /**
732 * Preserve special tags
733 *
734 * @param string Content input
735 * @return string Content output
736 */
737 function TS_preserve_db($value) {
738 if (!$this->preserveTags) return $value;
739
740 // Splitting into blocks for processing (span-tags are used for special tags)
741 $blockSplit = $this->splitIntoBlock('span',$value);
742 foreach($blockSplit as $k => $v) {
743 if ($k%2) { // block:
744 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
745 if ($attribArray['specialtag']) {
746 $theTag = rawurldecode($attribArray['specialtag']);
747 $theTagName = $this->getFirstTagName($theTag);
748 $blockSplit[$k] = $theTag.$this->removeFirstAndLastTag($blockSplit[$k]).'</'.$theTagName.'>';
749 }
750 }
751 }
752 return implode('',$blockSplit);
753 }
754
755 /**
756 * Preserve special tags
757 *
758 * @param string Content input
759 * @return string Content output
760 */
761 function TS_preserve_rte($value) {
762 if (!$this->preserveTags) return $value;
763
764 $blockSplit = $this->splitIntoBlock($this->preserveTags,$value);
765 foreach($blockSplit as $k => $v) {
766 if ($k%2) { // block:
767 $blockSplit[$k] = '<span specialtag="'.rawurlencode($this->getFirstTag($v)).'">'.$this->removeFirstAndLastTag($blockSplit[$k]).'</span>';
768 }
769 }
770 return implode('',$blockSplit);
771 }
772
773 /**
774 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
775 * Cleaning (->db) for standard content elements (ts)
776 *
777 * @param string Content input
778 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
779 * @return string Content output
780 * @see TS_transform_rte()
781 */
782 function TS_transform_db($value,$css=FALSE) {
783
784 // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
785 $this->TS_transform_db_safecounter--;
786 if ($this->TS_transform_db_safecounter<0) return $value;
787
788 // Split the content from RTE by the occurence of these blocks:
789 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.$this->headListTags,$value);
790
791 $cc=0;
792 $aC = count($blockSplit);
793
794 // Avoid superfluous linebreaks by transform_db after ending headListTag
795 while($aC && !strcmp(trim($blockSplit[$aC-1]),'')) {
796 unset($blockSplit[$aC-1]);
797 $aC = count($blockSplit);
798 }
799
800 // Traverse the blocks
801 foreach($blockSplit as $k => $v) {
802 $cc++;
803 $lastBR = $cc==$aC ? '' : chr(10);
804
805 if ($k%2) { // Inside block:
806
807 // Init:
808 $tag=$this->getFirstTag($v);
809 $tagName=strtolower($this->getFirstTagName($v));
810
811 // Process based on the tag:
812 switch($tagName) {
813 case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
814 $blockSplit[$k]='<'.$tagName.'>'.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
815 break;
816 case 'ol':
817 case 'ul': // Transform lists into <typolist>-tags:
818 if (!$css) {
819 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
820 $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
821 while(list($k2)=each($parts)) {
822 $parts[$k2]=ereg_replace(chr(10).'|'.chr(13),'',$parts[$k2]); // remove all linesbreaks!
823 $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
824 $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
825 $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
826 }
827 if ($tagName=='ol') { $params=' type="1"'; } else { $params=''; }
828 $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
829 }
830 } else {
831 $blockSplit[$k]=$this->transformStyledATags($blockSplit[$k]).$lastBR;
832 }
833 break;
834 case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
835 if (!$this->procOptions['preserveTables'] && !$css) {
836 $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
837 } else {
838 $blockSplit[$k]=str_replace(chr(10),' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
839 }
840 break;
841 case 'h1':
842 case 'h2':
843 case 'h3':
844 case 'h4':
845 case 'h5':
846 case 'h6':
847 if (!$css) {
848 $attribArray=$this->get_tag_attributes_classic($tag);
849 // Processing inner content here:
850 $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
851
852 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
853 $type = intval(substr($tagName,1));
854 $blockSplit[$k]='<typohead'.
855 ($type!=6?' type="'.$type.'"':'').
856 ($attribArray['align']?' align="'.$attribArray['align'].'"':'').
857 ($attribArray['class']?' class="'.$attribArray['class'].'"':'').
858 '>'.
859 $innerContent.
860 '</typohead>'.
861 $lastBR;
862 } else {
863 $blockSplit[$k]='<'.$tagName.
864 ($attribArray['align']?' align="'.htmlspecialchars($attribArray['align']).'"':'').
865 ($attribArray['class']?' class="'.htmlspecialchars($attribArray['class']).'"':'').
866 '>'.
867 $innerContent.
868 '</'.$tagName.'>'.
869 $lastBR;
870 }
871 } else {
872 // Eliminate true linebreaks inside Hx tags
873 $blockSplit[$k]=str_replace(chr(10),' ',$this->transformStyledATags($blockSplit[$k])).$lastBR;
874 }
875 break;
876 default:
877 $blockSplit[$k]=$this->transformStyledATags($blockSplit[$k]).$lastBR;
878 break;
879 }
880 } else { // NON-block:
881 if (strcmp(trim($blockSplit[$k]),'')) {
882 $blockSplit[$k]=$this->divideIntoLines(str_replace(chr(10),' ',$blockSplit[$k])).$lastBR;
883 $blockSplit[$k]=$this->transformStyledATags($blockSplit[$k]);
884 } else unset($blockSplit[$k]);
885 }
886 }
887 $this->TS_transform_db_safecounter++;
888
889 return implode('',$blockSplit);
890 }
891
892 /**
893 * Wraps a-tags that contain a style attribute with a span-tag
894 *
895 * @param string Content input
896 * @return string Content output
897 */
898 function transformStyledATags($value) {
899 $blockSplit = $this->splitIntoBlock('A',$value);
900 foreach($blockSplit as $k => $v) {
901 if ($k%2) { // If an A-tag was found:
902 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
903 if ($attribArray['style']) { // If "style" attribute is set!
904 $attribArray_copy['style'] = $attribArray['style'];
905 unset($attribArray['style']);
906 $bTag='<span '.t3lib_div::implodeAttributes($attribArray_copy,1).'><a '.t3lib_div::implodeAttributes($attribArray,1).'>';
907 $eTag='</a></span>';
908 $blockSplit[$k] = $bTag.$this->removeFirstAndLastTag($blockSplit[$k]).$eTag;
909 }
910 }
911 }
912 return implode('',$blockSplit);
913 }
914
915 /**
916 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
917 * Set (->rte) for standard content elements (ts)
918 *
919 * @param string Content input
920 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
921 * @return string Content output
922 * @see TS_transform_db()
923 */
924 function TS_transform_rte($value,$css=0) {
925
926 // Split the content from Database by the occurence of these blocks:
927 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$value);
928
929 // Traverse the blocks
930 foreach($blockSplit as $k => $v) {
931 if ($k%2) { // Inside one of the blocks:
932
933 // Init:
934 $tag = $this->getFirstTag($v);
935 $tagName = strtolower($this->getFirstTagName($v));
936 $attribArray = $this->get_tag_attributes_classic($tag);
937
938 // Based on tagname, we do transformations:
939 switch($tagName) {
940 case 'blockquote': // Keep blockquotes:
941 $blockSplit[$k] = $tag.
942 $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
943 '</'.$tagName.'>';
944 break;
945 case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
946 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
947 $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
948 $tListContent = ereg_replace('^[ ]*'.chr(10),'',$tListContent);
949 $tListContent = ereg_replace(chr(10).'[ ]*$','',$tListContent);
950 $lines = explode(chr(10),$tListContent);
951 $typ = $attribArray['type']==1 ? 'ol' : 'ul';
952 $blockSplit[$k] = '<'.$typ.'>'.chr(10).
953 '<li>'.implode('</li>'.chr(10).'<li>',$lines).'</li>'.
954 '</'.$typ.'>';
955 }
956 break;
957 case 'typohead': // Transform typohead into Hx tags.
958 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
959 $tC = $this->removeFirstAndLastTag($blockSplit[$k]);
960 $typ = t3lib_div::intInRange($attribArray['type'],0,6);
961 if (!$typ) $typ=6;
962 $align = $attribArray['align']?' align="'.$attribArray['align'].'"': '';
963 $class = $attribArray['class']?' class="'.$attribArray['class'].'"': '';
964 $blockSplit[$k] = '<h'.$typ.$align.$class.'>'.
965 $tC.
966 '</h'.$typ.'>';
967 }
968 break;
969 }
970 $blockSplit[$k+1] = ereg_replace('^[ ]*'.chr(10),'',$blockSplit[$k+1]); // Removing linebreak if typohead
971 } else { // NON-block:
972 $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
973 $singleLineBreak = $blockSplit[$k]==chr(10);
974 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$nextFTN)) { // Removing linebreak if typolist/typohead
975 $blockSplit[$k] = ereg_replace(chr(10).'[ ]*$','',$blockSplit[$k]);
976 }
977 // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
978 if (!strcmp($blockSplit[$k],'') && !$singleLineBreak) {
979 unset($blockSplit[$k]);
980 } else {
981 $blockSplit[$k] = $this->setDivTags($blockSplit[$k],($this->procOptions['useDIVasParagraphTagForRTE']?'div':'p'));
982 }
983 }
984 }
985 return implode(chr(10),$blockSplit);
986 }
987
988 /**
989 * Transformation handler: 'ts_strip' / direction: "db"
990 * Removing all non-allowed tags
991 *
992 * @param string Content input
993 * @return string Content output
994 */
995 function TS_strip_db($value) {
996 $value = strip_tags($value,'<'.implode('><',explode(',','b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')).'>');
997 return $value;
998 }
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013 /***************************************************************
1014 *
1015 * Generic RTE transformation, analysis and helper functions
1016 *
1017 **************************************************************/
1018
1019 /**
1020 * Reads the file or url $url and returns the content
1021 *
1022 * @param string Filepath/URL to read
1023 * @return string The content from the resource given as input.
1024 * @see t3lib_div::getURL()
1025 */
1026 function getURL($url) {
1027 return t3lib_div::getURL($url);
1028 }
1029
1030 /**
1031 * Function for cleaning content going into the database.
1032 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
1033 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
1034 *
1035 * @param string Content to clean up
1036 * @param string Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
1037 * @return string Clean content
1038 * @see getKeepTags()
1039 */
1040 function HTMLcleaner_db($content,$tagList='') {
1041 if (!$tagList) {
1042 $keepTags = $this->getKeepTags('db');
1043 } else {
1044 $keepTags = $this->getKeepTags('db',$tagList);
1045 }
1046 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
1047 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
1048
1049 // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
1050 $addConfig=array();
1051 if ((is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning'])) {
1052 $addConfig['xhtml']=1;
1053 }
1054
1055 return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC,$addConfig);
1056 }
1057
1058 /**
1059 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
1060 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
1061 *
1062 * @param string The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
1063 * @param string Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
1064 * @return array Configuration array
1065 * @see HTMLcleaner_db()
1066 */
1067 function getKeepTags($direction='rte',$tagList='') {
1068 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1069
1070 // Setting up allowed tags:
1071 if (strcmp($tagList,'')) { // If the $tagList input var is set, this will take precedence
1072 $keepTags = array_flip(t3lib_div::trimExplode(',',$tagList,1));
1073 } else { // Default is to get allowed/denied tags from internal array of processing options:
1074 // Construct default list of tags to keep:
1075 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1076 $keepTags = array_flip(t3lib_div::trimExplode(',',$typoScript_list.','.strtolower($this->procOptions['allowTags']),1));
1077
1078 // For tags to deny, remove them from $keepTags array:
1079 $denyTags = t3lib_div::trimExplode(',',$this->procOptions['denyTags'],1);
1080 foreach($denyTags as $dKe) {
1081 unset($keepTags[$dKe]);
1082 }
1083 }
1084
1085 // Based on the direction of content, set further options:
1086 switch ($direction) {
1087
1088 // GOING from database to Rich Text Editor:
1089 case 'rte':
1090 // Transform bold/italics tags to strong/em
1091 if (isset($keepTags['b'])) {$keepTags['b']=array('remap'=>'STRONG');}
1092 if (isset($keepTags['i'])) {$keepTags['i']=array('remap'=>'EM');}
1093
1094 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1095 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
1096 break;
1097
1098 // GOING from RTE to database:
1099 case 'db':
1100 // Transform strong/em back to bold/italics:
1101 if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
1102 if (isset($keepTags['em'])) { $keepTags['em']=array('remap'=>'i'); }
1103
1104 // Setting up span tags if they are allowed:
1105 if (isset($keepTags['span'])) {
1106 $classes=array_merge(array(''),$this->allowedClasses);
1107 $keepTags['span']=array(
1108 'allowedAttribs' => 'class,style,xml:lang',
1109 'fixAttrib' => Array(
1110 'class' => Array (
1111 'list' => $classes,
1112 'removeIfFalse' => 1
1113 )
1114 ),
1115 'rmTagIfNoAttrib' => 1
1116 );
1117 if (!$this->procOptions['allowedClasses']) unset($keepTags['span']['fixAttrib']['class']['list']);
1118 }
1119
1120 // Setting up font tags if they are allowed:
1121 if (isset($keepTags['font'])) {
1122 $colors=array_merge(array(''),t3lib_div::trimExplode(',',$this->procOptions['allowedFontColors'],1));
1123 $keepTags['font']=array(
1124 'allowedAttribs'=>'face,color,size',
1125 'fixAttrib' => Array(
1126 'face' => Array (
1127 'removeIfFalse' => 1
1128 ),
1129 'color' => Array (
1130 'removeIfFalse' => 1,
1131 'list'=>$colors
1132 ),
1133 'size' => Array (
1134 'removeIfFalse' => 1,
1135 )
1136 ),
1137 'rmTagIfNoAttrib' => 1
1138 );
1139 if (!$this->procOptions['allowedFontColors']) unset($keepTags['font']['fixAttrib']['color']['list']);
1140 }
1141
1142 // Setting further options, getting them from the processiong options:
1143 $TSc = $this->procOptions['HTMLparser_db.'];
1144 if (!$TSc['globalNesting']) $TSc['globalNesting']='b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1145 if (!$TSc['noAttrib']) $TSc['noAttrib']='b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1146
1147 // Transforming the array from TypoScript to regular array:
1148 list($keepTags) = $this->HTMLparserConfig($TSc,$keepTags);
1149 break;
1150 }
1151
1152 // Caching (internally, in object memory) the result unless tagList is set:
1153 if (!$tagList) {
1154 $this->getKeepTags_cache[$direction] = $keepTags;
1155 } else {
1156 return $keepTags;
1157 }
1158 }
1159
1160 // Return result:
1161 return $this->getKeepTags_cache[$direction];
1162 }
1163
1164 /**
1165 * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by chr(10).
1166 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1167 * The function ->setDivTags does the opposite.
1168 * This function processes content to go into the database.
1169 *
1170 * @param string Value to process.
1171 * @param integer Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1172 * @param boolean If true, an array with the lines is returned, otherwise a string of the processed input value.
1173 * @return string Processed input value.
1174 * @see setDivTags()
1175 */
1176 function divideIntoLines($value,$count=5,$returnArray=FALSE) {
1177
1178 // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
1179 if ($this->procOptions['internalizeFontTags']) {$value = $this->internalizeFontTags($value);}
1180
1181 // Setting configuration for processing:
1182 $allowTagsOutside = t3lib_div::trimExplode(',',strtolower($this->procOptions['allowTagsOutside']?$this->procOptions['allowTagsOutside']:'img'),1);
1183 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1184 $divSplit = $this->splitIntoBlock('div,p',$value,1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1185
1186 if ($this->procOptions['keepPDIVattribs']) {
1187 $keepAttribListArr = t3lib_div::trimExplode(',',strtolower($this->procOptions['keepPDIVattribs']),1);
1188 } else {
1189 $keepAttribListArr = array();
1190 }
1191
1192 // Returns plainly the value if there was no div/p sections in it
1193 if (count($divSplit)<=1 || $count<=0) {
1194 return $value;
1195 }
1196
1197 // Traverse the splitted sections:
1198 foreach($divSplit as $k => $v) {
1199 if ($k%2) { // Inside
1200 $v=$this->removeFirstAndLastTag($v);
1201
1202 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1203 $subLines = $this->divideIntoLines($v,$count-1,1);
1204 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1205 // No noting.
1206 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1207 $subLines = array($subLines);
1208 if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1209 $subLines = spliti('<br[[:space:]]*[\/]?>',$v);
1210 }
1211
1212 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1213 reset($subLines);
1214 while(list($sk)=each($subLines)) {
1215
1216 // Clear up the subline for DB.
1217 $subLines[$sk]=$this->HTMLcleaner_db($subLines[$sk]);
1218
1219 // Get first tag, attributes etc:
1220 $fTag = $this->getFirstTag($divSplit[$k]);
1221 $tagName=strtolower($this->getFirstTagName($divSplit[$k]));
1222 $attribs=$this->get_tag_attributes($fTag);
1223
1224 // Keep attributes (lowercase)
1225 $newAttribs=array();
1226 if (count($keepAttribListArr)) {
1227 foreach($keepAttribListArr as $keepA) {
1228 if (isset($attribs[0][$keepA])) { $newAttribs[$keepA] = $attribs[0][$keepA]; }
1229 }
1230 }
1231
1232 // ALIGN attribute:
1233 if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']),'') && strtolower($attribs[0]['align'])!='left') { // Set to value, but not 'left'
1234 $newAttribs['align']=strtolower($attribs[0]['align']);
1235 }
1236
1237 // CLASS attribute:
1238 if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']),'')) { // Set to whatever value
1239 if (!count($this->allowedClasses) || in_array(strtoupper($attribs[0]['class']),$this->allowedClasses)) {
1240 $newAttribs['class']=$attribs[0]['class'];
1241 }
1242 }
1243
1244 // Remove any line break char (10 or 13)
1245 $subLines[$sk]=ereg_replace(chr(10).'|'.chr(13),'',$subLines[$sk]);
1246
1247 // If there are any attributes or if we are supposed to remap the tag, then do so:
1248 if (count($newAttribs) && strcmp($remapParagraphTag,'1')) {
1249 if ($remapParagraphTag=='P') $tagName='p';
1250 if ($remapParagraphTag=='DIV') $tagName='div';
1251 $subLines[$sk]='<'.trim($tagName.' '.$this->compileTagAttribs($newAttribs)).'>'.$subLines[$sk].'</'.$tagName.'>';
1252 }
1253 }
1254 }
1255 // Add the processed line(s)
1256 $divSplit[$k] = implode(chr(10),$subLines);
1257
1258 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank:
1259 if (trim(strip_tags($divSplit[$k]))=='&nbsp;') $divSplit[$k]='';
1260 } else { // outside div:
1261 // Remove positions which are outside div/p tags and without content
1262 $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
1263 if (!strcmp($divSplit[$k],'')) unset($divSplit[$k]); // Remove part if it's empty
1264 }
1265 }
1266
1267 // Return value:
1268 return $returnArray ? $divSplit : implode(chr(10),$divSplit);
1269 }
1270
1271 /**
1272 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1273 * For processing of content going FROM database TO RTE.
1274 *
1275 * @param string Value to convert
1276 * @param string Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1277 * @return string Processed value.
1278 * @see divideIntoLines()
1279 */
1280 function setDivTags($value,$dT='p') {
1281
1282 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1283 $keepTags = $this->getKeepTags('rte');
1284 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
1285 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
1286 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte']?1:0;
1287
1288 // Divide the content into lines, based on chr(10):
1289 $parts = explode(chr(10),$value);
1290 foreach($parts as $k => $v) {
1291
1292 // Processing of line content:
1293 if (!strcmp(trim($parts[$k]),'')) { // If the line is blank, set it to &nbsp;
1294 $parts[$k]='&nbsp;';
1295 } else { // Clean the line content:
1296 $parts[$k]=$this->HTMLcleaner($parts[$k],$keepTags,$kUknown,$hSC);
1297 if ($convNBSP) $parts[$k]=str_replace('&amp;nbsp;','&nbsp;',$parts[$k]);
1298 }
1299
1300 // Wrapping the line in <$dT> is not already wrapped:
1301 $testStr = strtolower(trim($parts[$k]));
1302 if (substr($testStr,0,4)!='<div' || substr($testStr,-6)!='</div>') {
1303 if (substr($testStr,0,2)!='<p' || substr($testStr,-4)!='</p>') {
1304 // Only set p-tags if there is not already div or p tags:
1305 $parts[$k]='<'.$dT.'>'.$parts[$k].'</'.$dT.'>';
1306 }
1307 }
1308 }
1309
1310 // Implode result:
1311 return implode(chr(10),$parts);
1312 }
1313
1314 /**
1315 * This splits the $value in font-tag chunks.
1316 * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
1317 * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
1318 * In that case the font-tags are normally on the OUTSIDE of the sections.
1319 * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
1320 *
1321 * @param string Input content
1322 * @return string Output content
1323 * @see divideIntoLines()
1324 */
1325 function internalizeFontTags($value) {
1326
1327 // Splitting into font tag blocks:
1328 $fontSplit = $this->splitIntoBlock('font',$value);
1329
1330 foreach($fontSplit as $k => $v) {
1331 if ($k%2) { // Inside
1332 $fTag = $this->getFirstTag($v); // Fint font-tag
1333
1334 $divSplit_sub = $this->splitIntoBlock('div,p',$this->removeFirstAndLastTag($v),1);
1335 if (count($divSplit_sub)>1) { // If there were div/p sections inside the font-tag, do something about it...
1336 // traverse those sections:
1337 foreach($divSplit_sub as $k2 => $v2) {
1338 if ($k2%2) { // Inside
1339 $div_p = $this->getFirstTag($v2); // Fint font-tag
1340 $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
1341 $v2=$this->removeFirstAndLastTag($v2); // ... and remove it from original.
1342 $divSplit_sub[$k2]=$div_p.$fTag.$v2.'</font>'.'</'.$div_p_tagname.'>';
1343 } elseif (trim(strip_tags($v2))) {
1344 $divSplit_sub[$k2]=$fTag.$v2.'</font>';
1345 }
1346 }
1347 $fontSplit[$k]=implode('',$divSplit_sub);
1348 }
1349 }
1350 }
1351
1352 return implode('',$fontSplit);
1353 }
1354
1355 /**
1356 * Returns SiteURL based on thisScript.
1357 *
1358 * @return string Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1359 * @see t3lib_div::getIndpEnv()
1360 */
1361 function siteUrl() {
1362 return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1363 }
1364
1365 /**
1366 * Return the storage folder of RTE image files.
1367 * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
1368 *
1369 * @return string
1370 */
1371 function rteImageStorageDir() {
1372 return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
1373 }
1374
1375 /**
1376 * Remove all tables from incoming code
1377 * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
1378 *
1379 * @param string Input value
1380 * @param string Break character to use for linebreaks.
1381 * @return string Output value
1382 */
1383 function removeTables($value,$breakChar='<br />') {
1384
1385 // Splitting value into table blocks:
1386 $tableSplit = $this->splitIntoBlock('table',$value);
1387
1388 // Traverse blocks of tables:
1389 foreach($tableSplit as $k => $v) {
1390 if ($k%2) {
1391 $tableSplit[$k]='';
1392 $rowSplit = $this->splitIntoBlock('tr',$v);
1393 foreach($rowSplit as $k2 => $v2) {
1394 if ($k2%2) {
1395 $cellSplit = $this->getAllParts($this->splitIntoBlock('td',$v2),1,0);
1396 foreach($cellSplit as $k3 => $v3) {
1397 $tableSplit[$k].=$v3.$breakChar;
1398 }
1399 }
1400 }
1401 }
1402 }
1403
1404 // Implode it all again:
1405 return implode($breakChar,$tableSplit);
1406 }
1407
1408 /**
1409 * Default tag mapping for TS
1410 *
1411 * @param string Input code to process
1412 * @param string Direction To databsae (db) or from database to RTE (rte)
1413 * @return string Processed value
1414 */
1415 function defaultTStagMapping($code,$direction='rte') {
1416 if ($direction=='db') {
1417 $code=$this->mapTags($code,array( // Map tags
1418 'strong' => 'b',
1419 'em' => 'i'
1420 ));
1421 }
1422 if ($direction=='rte') {
1423 $code=$this->mapTags($code,array( // Map tags
1424 'b' => 'strong',
1425 'i' => 'em'
1426 ));
1427 }
1428 return $code;
1429 }
1430
1431 /**
1432 * Finds width and height from attrib-array
1433 * If the width and height is found in the style-attribute, use that!
1434 *
1435 * @param array Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1436 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1437 */
1438 function getWHFromAttribs($attribArray) {
1439 $style =trim($attribArray['style']);
1440 if ($style) {
1441 $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1442 // Width
1443 eregi('width'.$regex,$style,$reg);
1444 $w = intval($reg[1]);
1445 // Height
1446 eregi('height'.$regex,$style,$reg);
1447 $h = intval($reg[1]);
1448 }
1449 if (!$w) {
1450 $w = $attribArray['width'];
1451 }
1452 if (!$h) {
1453 $h = $attribArray['height'];
1454 }
1455 return array(intval($w),intval($h));
1456 }
1457
1458 /**
1459 * Parse <A>-tag href and return status of email,external,file or page
1460 *
1461 * @param string URL to analyse.
1462 * @return array Information in an array about the URL
1463 */
1464 function urlInfoForLinkTags($url) {
1465 $info = array();
1466 $url = trim($url);
1467 if (substr(strtolower($url),0,7)=='mailto:') {
1468 $info['url']=trim(substr($url,7));
1469 $info['type']='email';
1470 } else {
1471 $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
1472 for($a=0;$a<strlen($url);$a++) {
1473 if ($url[$a]!=$curURL[$a]) {
1474 break;
1475 }
1476 }
1477
1478 $info['relScriptPath']=substr($curURL,$a);
1479 $info['relUrl']=substr($url,$a);
1480 $info['url']=$url;
1481 $info['type']='ext';
1482
1483 $siteUrl_parts = parse_url($url);
1484 $curUrl_parts = parse_url($curURL);
1485
1486 if ($siteUrl_parts['host']==$curUrl_parts['host'] // Hosts should match
1487 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'],0,strlen(TYPO3_mainDir))==TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
1488
1489 // New processing order 100502
1490 $uP=parse_url($info['relUrl']);
1491
1492 if (!strcmp('#'.$siteUrl_parts['fragment'],$info['relUrl'])) {
1493 $info['url']=$info['relUrl'];
1494 $info['type']='anchor';
1495 } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php')) {
1496 $pp = explode('id=',$uP['query']);
1497 $id = trim($pp[1]);
1498 if ($id) {
1499 $info['pageid']=$id;
1500 $info['cElement']=$uP['fragment'];
1501 $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
1502 $info['type']='page';
1503 }
1504 } else {
1505 $info['url']=$info['relUrl'];
1506 $info['type']='file';
1507 }
1508 } else {
1509 unset($info['relScriptPath']);
1510 unset($info['relUrl']);
1511 }
1512 }
1513 return $info;
1514 }
1515
1516 /**
1517 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1518 *
1519 * @param string Content input
1520 * @param boolean If true, then the "rtekeep" attribute will not be set.
1521 * @return string Content output
1522 */
1523 function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE) {
1524 $blockSplit = $this->splitIntoBlock('A',$value);
1525 reset($blockSplit);
1526 while(list($k,$v)=each($blockSplit)) {
1527 if ($k%2) { // block:
1528 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
1529
1530 // Checking if there is a scheme, and if not, prepend the current url.
1531 if (strlen($attribArray['href'])) { // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1532 $uP = parse_url(strtolower($attribArray['href']));
1533 if (!$uP['scheme']) {
1534 $attribArray['href'] = $this->siteUrl().substr($attribArray['href'],strlen($this->relBackPath));
1535 }
1536 } else {
1537 $attribArray['rtekeep'] = 1;
1538 }
1539 if (!$dontSetRTEKEEP) $attribArray['rtekeep'] = 1;
1540
1541 $bTag='<a '.t3lib_div::implodeAttributes($attribArray,1).'>';
1542 $eTag='</a>';
1543 $blockSplit[$k] = $bTag.$this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
1544 }
1545 }
1546 return implode('',$blockSplit);
1547 }
1548 }
1549
1550
1551 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']) {
1552 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
1553 }
1554 ?>