Changed things in parsehtml_proc that preserves <a-tags> which are anchor points.
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2004 Kasper Skaarhoj (kasper@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 December/2003 by Kasper Skaarhoj
32 * XHTML compatible.
33 *
34 * @author Kasper Skaarhoj <kasper@typo3.com>
35 * @internal
36 */
37 /**
38 * [CLASS/FUNCTION INDEX of SCRIPT]
39 *
40 *
41 *
42 * 102: class t3lib_parsehtml_proc extends t3lib_parsehtml
43 * 137: function init($elRef='',$recPid=0)
44 * 149: function setRelPath($path)
45 * 173: function evalWriteFile($pArr,$currentRecord)
46 *
47 * SECTION: Main function
48 * 231: function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
49 *
50 * SECTION: Specific RTE TRANSFORMATION functions
51 * 378: function TS_images_db($value)
52 * 517: function TS_images_rte($value)
53 * 551: function TS_reglinks($value,$direction)
54 * 585: function TS_links_db($value)
55 * 629: function TS_links_rte($value)
56 * 704: function TS_preserve_db($value)
57 * 728: function TS_preserve_rte($value)
58 * 749: function TS_transform_db($value,$css=FALSE)
59 * 860: function TS_transform_rte($value,$css=0)
60 * 931: function TS_strip_db($value)
61 *
62 * SECTION: Generic RTE transformation, analysis and helper functions
63 * 962: function getURL($url)
64 * 976: function HTMLcleaner_db($content,$tagList='')
65 * 997: function getKeepTags($direction='rte',$tagList='')
66 * 1106: function divideIntoLines($value,$count=5,$returnArray=FALSE)
67 * 1210: function setDivTags($value,$dT='p')
68 * 1255: function internalizeFontTags($value)
69 * 1291: function siteUrl()
70 * 1301: function rteImageStorageDir()
71 * 1313: function removeTables($value,$breakChar='<br />')
72 * 1345: function defaultTStagMapping($code,$direction='rte')
73 * 1368: function getWHFromAttribs($attribArray)
74 * 1394: function urlInfoForLinkTags($url)
75 * 1453: function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
76 *
77 * TOTAL FUNCTIONS: 27
78 * (This index is automatically created/updated by the extension "extdeveval")
79 *
80 */
81
82 require_once (PATH_t3lib.'class.t3lib_parsehtml.php');
83
84
85
86
87
88
89
90
91
92
93
94
95 /**
96 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
97 *
98 * @author Kasper Skaarhoj <kasper@typo3.com>
99 * @package TYPO3
100 * @subpackage t3lib
101 */
102 class t3lib_parsehtml_proc extends t3lib_parsehtml {
103
104 // Static:
105 var $headListTags = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6'; // List of tags for header, pre and list containers
106
107 // Internal, static:
108 var $recPid = 0; // Set this to the pid of the record manipulated by the class.
109 var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
110 var $relPath=''; // Relative path
111 var $relBackPath=''; // Relative back-path
112 var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
113
114 // Internal, dynamic
115 var $TS_transform_db_safecounter=100; // Run-away brake for recursive calls.
116 var $rte_p=''; // Parameters from TCA types configuration related to the RTE
117 var $getKeepTags_cache=array(); // Data caching for processing function
118 var $allowedClasses=array(); // Storage of the allowed CSS class names in the RTE
119 var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
120
121
122
123
124
125
126
127
128
129
130 /**
131 * Initialize, setting element reference and record PID
132 *
133 * @param string Element reference, eg "tt_content:bodytext"
134 * @param integer PID of the record (page id)
135 * @return void
136 */
137 function init($elRef='',$recPid=0) {
138 $this->recPid = $recPid;
139 $this->elRef = $elRef;
140 }
141
142 /**
143 * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
144 * This is used when editing files with the RTE
145 *
146 * @param string The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
147 * @return void There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
148 */
149 function setRelPath($path) {
150 $path = trim($path);
151 $path = ereg_replace('^/','',$path);
152 $path = ereg_replace('/$','',$path);
153 if ($path) {
154 $this->relPath = $path;
155 $this->relBackPath = '';
156 $partsC=count(explode('/',$this->relPath));
157 for ($a=0;$a<$partsC;$a++) {
158 $this->relBackPath.='../';
159 }
160 $this->relPath.='/';
161 }
162 }
163
164 /**
165 * Evaluate the environment for editing a staticFileEdit file.
166 * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
167 *
168 * @param array Parameters for the current field as found in types-config
169 * @param array Current record we are editing.
170 * @return mixed On success an array with various information is returned, otherwise a string with an error message
171 * @see t3lib_TCEmain, t3lib_transferData
172 */
173 function evalWriteFile($pArr,$currentRecord) {
174
175 // Write file configuration:
176 if (is_array($pArr)) {
177 if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
178 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'],-1)=='/'
179 && @is_dir(PATH_site.$GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
180
181 $SW_p = $pArr['parameters'];
182 $SW_editFileField = trim($SW_p[0]);
183 $SW_editFile = $currentRecord[$SW_editFileField];
184 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
185 $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'].$SW_editFile;
186 $SW_editFile = PATH_site.$SW_relpath;
187 if (@is_file($SW_editFile)) {
188 return array(
189 'editFile' => $SW_editFile,
190 'relEditFile' => $SW_relpath,
191 'contentField' => trim($SW_p[1]),
192 'markerField' => trim($SW_p[2]),
193 'loadFromFileField' => trim($SW_p[3]),
194 'statusField' => trim($SW_p[4])
195 );
196 } else return "ERROR: Editfile '".$SW_relpath."' did not exist";
197 } else return "ERROR: Edit file name could not be found or was bad.";
198 } else return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
199 }
200 }
201
202
203
204
205
206
207
208
209
210
211
212
213
214 /**********************************************
215 *
216 * Main function
217 *
218 **********************************************/
219
220 /**
221 * Tranform value for RTE based on specConf in the direction specified by $direction (rte/db)
222 * This is the main function called from tcemain and transfer data classes
223 *
224 * @param string Input value
225 * @param array Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
226 * @param string Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
227 * @param array Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
228 * @return string Output value
229 * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
230 */
231 function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array()) {
232
233 // Init:
234 $this->procOptions=$thisConfig['proc.'];
235 $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
236
237 // Get parameters for rte_transformation:
238 $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
239
240 // Setting modes:
241 if (strcmp($this->procOptions['overruleMode'],'')) {
242 $modes=array_unique(t3lib_div::trimExplode(',',$this->procOptions['overruleMode']));
243 } else {
244 $modes=array_unique(t3lib_div::trimExplode('-',$p['mode']));
245 }
246 $revmodes=array_flip($modes);
247
248 // Find special modes and extract them:
249 if (isset($revmodes['ts'])) {
250 $modes[$revmodes['ts']]='ts_transform,ts_preserve,ts_images,ts_links';
251 }
252 // Find special modes and extract them:
253 if (isset($revmodes['ts_css'])) {
254 $modes[$revmodes['ts_css']]='css_transform,ts_images,ts_links';
255 }
256 $modes = array_unique(t3lib_div::trimExplode(',',implode(',',$modes),1));
257 if ($direction=='rte') {
258 $modes=array_reverse($modes);
259 }
260
261 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
262 $entry_HTMLparser = $this->procOptions['entryHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_'.$direction.'.']) : '';
263 $exit_HTMLparser = $this->procOptions['exitHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_'.$direction.'.']) : '';
264
265 // Line breaks of content is unified into char-10 only (removing char 13)
266 if (!$this->procOptions['disableUnifyLineBreaks']) {
267 $value = str_replace(chr(13).chr(10),chr(10),$value);
268 }
269
270 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
271 if (is_array($entry_HTMLparser)) {
272 $value = $this->HTMLcleaner($value,$entry_HTMLparser[0],$entry_HTMLparser[1],$entry_HTMLparser[2],$entry_HTMLparser[3]);
273 }
274
275 // Traverse modes:
276 foreach($modes as $cmd) {
277 // ->DB
278 if ($direction=='db') {
279 switch($cmd) {
280 case 'ts_images':
281 $value=$this->TS_images_db($value);
282 break;
283 case 'ts_reglinks':
284 $value=$this->TS_reglinks($value,'db');
285 break;
286 case 'ts_links':
287 $value=$this->TS_links_db($value);
288 break;
289 case 'ts_preserve':
290 $value=$this->TS_preserve_db($value);
291 break;
292 case 'ts_transform':
293 case 'css_transform':
294 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
295 $this->allowedClasses = t3lib_div::trimExplode(',',strtoupper($this->procOptions['allowedClasses']),1);
296 $value=$this->TS_transform_db($value,$cmd=='css_transform');
297 break;
298 case 'ts_strip':
299 $value=$this->TS_strip_db($value);
300 break;
301 case 'dummy':
302 break;
303 }
304 }
305 // ->RTE
306 if ($direction=='rte') {
307 switch($cmd) {
308 case 'ts_images':
309 $value=$this->TS_images_rte($value);
310 break;
311 case 'ts_reglinks':
312 $value=$this->TS_reglinks($value,'rte');
313 break;
314 case 'ts_links':
315 $value=$this->TS_links_rte($value);
316 break;
317 case 'ts_preserve':
318 $value=$this->TS_preserve_rte($value);
319 break;
320 case 'ts_transform':
321 case 'css_transform':
322 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
323 $value=$this->TS_transform_rte($value,$cmd=='css_transform');
324 break;
325 case 'dummy':
326 break;
327 }
328 }
329 }
330
331 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
332 if (is_array($exit_HTMLparser)) {
333 $value = $this->HTMLcleaner($value,$exit_HTMLparser[0],$exit_HTMLparser[1],$exit_HTMLparser[2],$exit_HTMLparser[3]);
334 }
335
336 // Final clean up of linebreaks:
337 if (!$this->procOptions['disableUnifyLineBreaks']) {
338 $value = str_replace(chr(13).chr(10),chr(10),$value); // Make sure no \r\n sequences has entered in the meantime...
339 $value = str_replace(chr(10),chr(13).chr(10),$value); // ... and then change all \n into \r\n
340 }
341
342 // Return value:
343 return $value;
344 }
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361 /************************************
362 *
363 * Specific RTE TRANSFORMATION functions
364 *
365 *************************************/
366
367 /**
368 * Transformation handler: 'ts_images' / direction: "db"
369 * Processing images inserted in the RTE.
370 * This is used when content goes from the RTE to the database.
371 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
372 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
373 * Also "magic" images are processed here.
374 *
375 * @param string The content from RTE going to Database
376 * @return string Processed content
377 */
378 function TS_images_db($value) {
379
380 // Split content by <img> tags and traverse the resulting array for processing:
381 $imgSplit = $this->splitTags('img',$value);
382 foreach($imgSplit as $k => $v) {
383 if ($k%2) { // image found, do processing:
384
385 // Init
386 $attribArray = $this->get_tag_attributes_classic($v,1);
387 $siteUrl = $this->siteUrl();
388 $absRef = trim($attribArray['src']); // It's always a absolute URL coming from the RTE into the Database.
389
390 // External image from another URL? In that case, fetch image (unless disabled feature).
391 if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
392 $externalFile = $this->getUrl($absRef); // Get it
393 if ($externalFile) {
394 $pU = parse_url($absRef);
395 $pI=pathinfo($pU['path']);
396
397 if (t3lib_div::inList('gif,png,jpeg,jpg',strtolower($pI['extension']))) {
398 $filename = t3lib_div::shortMD5($absRef).'.'.$pI['extension'];
399 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
400 $C_origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
401 if (!@is_file($origFilePath)) {
402 t3lib_div::writeFile($origFilePath,$externalFile);
403 t3lib_div::writeFile($C_origFilePath,$externalFile);
404 }
405 $absRef = $siteUrl.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
406
407 $attribArray['src']=$absRef;
408 $params = t3lib_div::implodeParams($attribArray,1);
409 $imgSplit[$k] = '<img '.$params.' />';
410 }
411 }
412 }
413 // Check image as local file (siteURL equals the one of the image)
414 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
415 $path = rawurldecode(substr($absRef,strlen($siteUrl))); // Rel-path, rawurldecoded for special characters.
416 $filepath = t3lib_div::getFileAbsFileName($path); // Abs filepath, locked to relative path of this project.
417
418 // Check file existence (in relative dir to this installation!)
419 if ($filepath && @is_file($filepath)) {
420
421 // If "magic image":
422 $pathPre=$this->rteImageStorageDir().'RTEmagicC_';
423 if (t3lib_div::isFirstPartOfStr($path,$pathPre)) {
424 // Find original file:
425 $pI=pathinfo(substr($path,strlen($pathPre)));
426 $filename = substr($pI['basename'],0,-strlen('.'.$pI['extension']));
427 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
428 if (@is_file($origFilePath)) {
429 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
430 $imgObj->init();
431 $imgObj->mayScaleUp=0;
432 $imgObj->tempPath=PATH_site.$imgObj->tempPath;
433
434 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
435 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
436 // Compare dimensions:
437 if ($curWH[0]!=$curInfo[0] || $curWH[1]!=$curInfo[1]) {
438 $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
439 $cW = $curWH[0];
440 $cH = $curWH[1];
441 $cH = 1000; // Make the image based on the width solely...
442 $imgI = $imgObj->imageMagickConvert($origFilePath,$pI['extension'],$cW.'m',$cH.'m');
443 if ($imgI[3]) {
444 $fI=pathinfo($imgI[3]);
445 @copy($imgI[3],$filepath); // Override the child file
446 unset($attribArray['style']);
447 $attribArray['width']=$imgI[0];
448 $attribArray['height']=$imgI[1];
449 if (!$attribArray['border']) $attribArray['border']=0;
450 $params = t3lib_div::implodeParams($attribArray,1);
451 $imgSplit[$k]='<img '.$params.' />';
452 }
453 }
454 }
455
456 } elseif ($this->procOptions['plainImageMode']) { // If "plain image" has been configured:
457
458 // Image dimensions as set in the image tag
459 $curWH = $this->getWHFromAttribs($attribArray);
460 $attribArray['width'] = $curWH[0];
461 $attribArray['height'] = $curWH[1];
462
463 // Forcing values for style and border:
464 unset($attribArray['style']);
465 if (!$attribArray['border']) $attribArray['border'] = 0;
466
467 // Finding dimensions of image file:
468 $fI = @getimagesize($filepath);
469
470 // Perform corrections to aspect ratio based on configuration:
471 switch((string)$this->procOptions['plainImageMode']) {
472 case 'lockDimensions':
473 $attribArray['width']=$fI[0];
474 $attribArray['height']=$fI[1];
475 break;
476 case 'lockRatioWhenSmaller': // If the ratio has to be smaller, then first set the width...:
477 if ($attribArray['width']>$fI[0]) $attribArray['width'] = $fI[0];
478 case 'lockRatio':
479 if ($fI[0]>0) {
480 $attribArray['height']=round($attribArray['width']*($fI[1]/$fI[0]));
481 }
482 break;
483 }
484
485 // Compile the image tag again:
486 $params = t3lib_div::implodeParams($attribArray,1);
487 $imgSplit[$k]='<img '.$params.' />';
488 }
489 } else { // Remove image if it was not found in a proper position on the server!
490
491 // Commented out; removing the image tag might not be that logical...
492 #$imgSplit[$k]='';
493 }
494 }
495
496 // Convert abs to rel url
497 if ($imgSplit[$k]) {
498 $attribArray=$this->get_tag_attributes_classic($imgSplit[$k],1);
499 $absRef = trim($attribArray['src']);
500 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
501 $attribArray['src'] = $this->relBackPath.substr($absRef,strlen($siteUrl));
502 if (!isset($attribArray['alt'])) $attribArray['alt']=''; // Must have alt-attribute for XHTML compliance.
503 $imgSplit[$k]='<img '.t3lib_div::implodeParams($attribArray,1,1).' />';
504 }
505 }
506 }
507 }
508 return implode('',$imgSplit);
509 }
510
511 /**
512 * Transformation handler: 'ts_images' / direction: "rte"
513 * Processing images from database content going into the RTE.
514 * Processing includes converting the src attribute to an absolute URL.
515 *
516 * @param string Content input
517 * @return string Content output
518 */
519 function TS_images_rte($value) {
520
521 // Split content by <img> tags and traverse the resulting array for processing:
522 $imgSplit = $this->splitTags('img',$value);
523 foreach($imgSplit as $k => $v) {
524 if ($k%2) { // image found:
525
526 // Init
527 $attribArray=$this->get_tag_attributes_classic($v,1);
528 $siteUrl = $this->siteUrl();
529 $absRef = trim($attribArray['src']);
530
531 // Unless the src attribute is already pointing to an external URL:
532 if (strtolower(substr($absRef,0,4))!='http') {
533 $attribArray['src'] = $siteUrl.substr($attribArray['src'],strlen($this->relBackPath));
534 if (!isset($attribArray['alt'])) $attribArray['alt']='';
535 $params = t3lib_div::implodeParams($attribArray);
536 $imgSplit[$k]='<img '.$params.' />';
537 }
538 }
539 }
540
541 // return processed content:
542 return implode('',$imgSplit);
543 }
544
545 /**
546 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
547 * Converting <A>-tags to/from abs/rel
548 *
549 * @param string Content input
550 * @param string Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
551 * @return string Content output
552 */
553 function TS_reglinks($value,$direction) {
554 switch($direction) {
555 case 'rte':
556 return $this->TS_AtagToAbs($value,1);
557 break;
558 case 'db':
559 $siteURL = $this->siteUrl();
560 $blockSplit = $this->splitIntoBlock('A',$value);
561 reset($blockSplit);
562 while(list($k,$v)=each($blockSplit)) {
563 if ($k%2) { // block:
564 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
565 // If the url is local, remove url-prefix
566 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
567 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
568 }
569 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
570 $eTag='</a>';
571 $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
572 }
573 }
574 return implode('',$blockSplit);
575 break;
576 }
577 }
578
579 /**
580 * Transformation handler: 'ts_links' / direction: "db"
581 * Converting <A>-tags to <LINK tags>
582 *
583 * @param string Content input
584 * @return string Content output
585 * @see TS_links_rte()
586 */
587 function TS_links_db($value) {
588
589 // Split content into <a> tag blocks and process:
590 $blockSplit = $this->splitIntoBlock('A',$value);
591 foreach($blockSplit as $k => $v) {
592 if ($k%2) { // If an A-tag was found:
593 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
594 $info = $this->urlInfoForLinkTags($attribArray['href']);
595
596 // Check options:
597 $attribArray_copy = $attribArray;
598 unset($attribArray_copy['href']);
599 unset($attribArray_copy['target']);
600 unset($attribArray_copy['class']);
601 if ($attribArray_copy['rteerror']) { // Unset "rteerror" and "style" attributes if "rteerror" is set!
602 unset($attribArray_copy['style']);
603 unset($attribArray_copy['rteerror']);
604 }
605 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
606 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
607 $bTag='<LINK '.$info['url'].($attribArray['target']?' '.$attribArray['target']:($attribArray['class']?' -':'')).($attribArray['class']?' '.$attribArray['class']:'').'>';
608 $eTag='</LINK>';
609 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
610 } else { // ... otherwise store the link as a-tag.
611 // Unsetting 'rtekeep' attribute if that had been set.
612 unset($attribArray['rtekeep']);
613 // If the url is local, remove url-prefix
614 $siteURL = $this->siteUrl();
615 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
616 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
617 }
618 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
619 $eTag='</a>';
620 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
621 }
622 }
623 }
624 return implode('',$blockSplit);
625 }
626
627 /**
628 * Transformation handler: 'ts_links' / direction: "rte"
629 * Converting <LINK tags> to <A>-tags
630 *
631 * @param string Content input
632 * @return string Content output
633 * @see TS_links_rte()
634 */
635 function TS_links_rte($value) {
636 $value = $this->TS_AtagToAbs($value);
637
638 // Split content by the TYPO3 pseudo tag "<LINK>":
639 $blockSplit = $this->splitIntoBlock('link',$value,1);
640 foreach($blockSplit as $k => $v) {
641 if ($k%2) { // block:
642 $tagCode = t3lib_div::trimExplode(' ',trim(substr($this->getFirstTag($v),0,-1)),1);
643 $link_param = $tagCode[1];
644 $href = '';
645 $siteUrl = $this->siteUrl();
646 // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
647 if(strstr($link_param,'@')) { // mailadr
648 $href = 'mailto:'.eregi_replace('^mailto:','',$link_param);
649 } elseif (substr($link_param,0,1)=='#') { // check if anchor
650 $href = $siteUrl.$link_param;
651 } else {
652 $fileChar=intval(strpos($link_param, '/'));
653 $urlChar=intval(strpos($link_param, '.'));
654
655 // Detects if a file is found in site-root OR is a simulateStaticDocument.
656 list($rootFileDat) = explode('?',$link_param);
657 $rFD_fI = pathinfo($rootFileDat);
658 if (trim($rootFileDat) && !strstr($link_param,'/') && (@is_file(PATH_site.$rootFileDat) || t3lib_div::inList('php,html,htm',strtolower($rFD_fI['extension'])))) {
659 $href = $siteUrl.$link_param;
660 } elseif($urlChar && (strstr($link_param,'//') || !$fileChar || $urlChar<$fileChar)) { // url (external): If doubleSlash or if a '.' comes before a '/'.
661 if (!ereg('^[a-z]*://',trim(strtolower($link_param)))) {$scheme='http://';} else {$scheme='';}
662 $href = $scheme.$link_param;
663 } elseif($fileChar) { // file (internal)
664 $href = $siteUrl.$link_param;
665 } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
666 $link_params_parts = explode('#',$link_param);
667 $idPart = trim($link_params_parts[0]); // Link-data del
668 if (!strcmp($idPart,'')) { $idPart=$this->recPid; } // If no id or alias is given, set it to class record pid
669 if ($link_params_parts[1] && !$sectionMark) {
670 $sectionMark = '#'.trim($link_params_parts[1]);
671 }
672 // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
673 $pairParts = t3lib_div::trimExplode(',',$idPart);
674 if (count($pairParts)>1) {
675 $idPart = $pairParts[0];
676 // Type ? future support for?
677 }
678 // Checking if the id-parameter is an alias.
679 if (!t3lib_div::testInt($idPart)) {
680 list($idPartR) = t3lib_BEfunc::getRecordsByField('pages','alias',$idPart);
681 $idPart = intval($idPartR['uid']);
682 }
683 $page = t3lib_BEfunc::getRecord('pages', $idPart);
684 if (is_array($page)) { // Page must exist...
685 $href = $siteUrl.'?id='.$link_param;
686 } else {
687 #$href = '';
688 $href = $siteUrl.'?id='.$link_param;
689 $error = 'No page found: '.$idPart;
690 }
691 }
692 }
693
694 // Setting the A-tag:
695 $bTag = '<a href="'.htmlspecialchars($href).'"'.
696 ($tagCode[2]&&$tagCode[2]!='-' ? ' target="'.htmlspecialchars($tagCode[2]).'"' : '').
697 ($tagCode[3] ? ' class="'.htmlspecialchars($tagCode[3]).'"' : '').
698 ($error ? ' rteerror="'.htmlspecialchars($error).'" style="background-color: yellow; border:2px red solid; color: black;"' : ''). // Should be OK to add the style; the transformation back to databsae will remove it...
699 '>';
700 $eTag = '</a>';
701 $blockSplit[$k] = $bTag.$this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
702 }
703 }
704
705 // Return content:
706 return implode('',$blockSplit);
707 }
708
709 /**
710 * Preserve special tags
711 *
712 * @param string Content input
713 * @return string Content output
714 */
715 function TS_preserve_db($value) {
716 if (!$this->preserveTags) return $value;
717
718 // Splitting into blocks for processing (span-tags are used for special tags)
719 $blockSplit = $this->splitIntoBlock('span',$value);
720 foreach($blockSplit as $k => $v) {
721 if ($k%2) { // block:
722 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
723 if ($attribArray['specialtag']) {
724 $theTag = rawurldecode($attribArray['specialtag']);
725 $theTagName = $this->getFirstTagName($theTag);
726 $blockSplit[$k] = $theTag.$this->removeFirstAndLastTag($blockSplit[$k]).'</'.$theTagName.'>';
727 }
728 }
729 }
730 return implode('',$blockSplit);
731 }
732
733 /**
734 * Preserve special tags
735 *
736 * @param string Content input
737 * @return string Content output
738 */
739 function TS_preserve_rte($value) {
740 if (!$this->preserveTags) return $value;
741
742 $blockSplit = $this->splitIntoBlock($this->preserveTags,$value);
743 foreach($blockSplit as $k => $v) {
744 if ($k%2) { // block:
745 $blockSplit[$k] = '<span specialtag="'.rawurlencode($this->getFirstTag($v)).'">'.$this->removeFirstAndLastTag($blockSplit[$k]).'</span>';
746 }
747 }
748 return implode('',$blockSplit);
749 }
750
751 /**
752 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
753 * Cleaning (->db) for standard content elements (ts)
754 *
755 * @param string Content input
756 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
757 * @return string Content output
758 * @see TS_transform_rte()
759 */
760 function TS_transform_db($value,$css=FALSE) {
761
762 // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
763 $this->TS_transform_db_safecounter--;
764 if ($this->TS_transform_db_safecounter<0) return $value;
765
766 // Split the content from RTE by the occurence of these blocks:
767 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.$this->headListTags,$value);
768
769 $cc=0;
770 $aC = count($blockSplit);
771
772 // Traverse the blocks
773 foreach($blockSplit as $k => $v) {
774 $cc++;
775 $lastBR = $cc==$aC ? '' : chr(10);
776
777 if ($k%2) { // Inside block:
778
779 // Init:
780 $tag=$this->getFirstTag($v);
781 $tagName=strtolower($this->getFirstTagName($v));
782
783 // Process based on the tag:
784 switch($tagName) {
785 case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
786 $blockSplit[$k]='<'.$tagName.'>'.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
787 break;
788 case 'ol':
789 case 'ul': // Transform lists into <typolist>-tags:
790 if (!$css) {
791 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
792 $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
793 while(list($k2)=each($parts)) {
794 $parts[$k2]=ereg_replace(chr(10).'|'.chr(13),'',$parts[$k2]); // remove all linesbreaks!
795 $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
796 $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
797 $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
798 }
799 if ($tagName=='ol') { $params=' type="1"'; } else { $params=''; }
800 $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
801 }
802 } else {
803 $blockSplit[$k].=$lastBR;
804 }
805 break;
806 case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
807 if (!$this->procOptions['preserveTables'] && !$css) {
808 $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
809 } else {
810 $blockSplit[$k]=str_replace(chr(10),'',$blockSplit[$k]).$lastBR;
811 }
812 break;
813 case 'h1':
814 case 'h2':
815 case 'h3':
816 case 'h4':
817 case 'h5':
818 case 'h6':
819 if (!$css) {
820 $attribArray=$this->get_tag_attributes_classic($tag);
821 // Processing inner content here:
822 $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
823
824 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
825 $type = intval(substr($tagName,1));
826 $blockSplit[$k]='<typohead'.
827 ($type!=6?' type="'.$type.'"':'').
828 ($attribArray['align']?' align="'.$attribArray['align'].'"':'').
829 ($attribArray['class']?' class="'.$attribArray['class'].'"':'').
830 '>'.
831 $innerContent.
832 '</typohead>'.
833 $lastBR;
834 } else {
835 $blockSplit[$k]='<'.$tagName.
836 ($attribArray['align']?' align="'.htmlspecialchars($attribArray['align']).'"':'').
837 ($attribArray['class']?' class="'.htmlspecialchars($attribArray['class']).'"':'').
838 '>'.
839 $innerContent.
840 '</'.$tagName.'>'.
841 $lastBR;
842 }
843 } else {
844 $blockSplit[$k].=$lastBR;
845 }
846 break;
847 default:
848 $blockSplit[$k].=$lastBR;
849 break;
850 }
851 } else { // NON-block:
852 if (strcmp(trim($blockSplit[$k]),'')) {
853 $blockSplit[$k]=$this->divideIntoLines($blockSplit[$k]).$lastBR;
854 } else unset($blockSplit[$k]);
855 }
856 }
857 $this->TS_transform_db_safecounter++;
858
859 return implode('',$blockSplit);
860 }
861
862 /**
863 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
864 * Set (->rte) for standard content elements (ts)
865 *
866 * @param string Content input
867 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
868 * @return string Content output
869 * @see TS_transform_db()
870 */
871 function TS_transform_rte($value,$css=0) {
872
873 // Split the content from Database by the occurence of these blocks:
874 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$value);
875
876 // Traverse the blocks
877 foreach($blockSplit as $k => $v) {
878 if ($k%2) { // Inside one of the blocks:
879
880 // Init:
881 $tag=$this->getFirstTag($v);
882 $tagName=strtolower($this->getFirstTagName($v));
883 $attribArray=$this->get_tag_attributes_classic($tag);
884
885 // Based on tagname, we do transformations:
886 switch($tagName) {
887 case 'blockquote': // Keep blockquotes:
888 $blockSplit[$k] = $tag.
889 $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
890 '</'.$tagName.'>';
891 break;
892 case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
893 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
894 $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
895 $tListContent = ereg_replace('^[ ]*'.chr(10),'',$tListContent);
896 $tListContent = ereg_replace(chr(10).'[ ]*$','',$tListContent);
897 $lines=explode(chr(10),$tListContent);
898 $typ= $attribArray['type']==1?'ol':'ul';
899 $blockSplit[$k] = '<'.$typ.'>'.chr(10).
900 '<li>'.implode('</li>'.chr(10).'<li>',$lines).'</li>'.
901 '</'.$typ.'>';
902 }
903 break;
904 case 'typohead': // Transform typohead into Hx tags.
905 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
906 $tC=$this->removeFirstAndLastTag($blockSplit[$k]);
907 $typ=t3lib_div::intInRange($attribArray['type'],0,6);
908 if (!$typ) $typ=6;
909 $align = $attribArray['align']?' align="'.$attribArray['align'].'"': '';
910 $class = $attribArray['class']?' class="'.$attribArray['class'].'"': '';
911 $blockSplit[$k] = '<h'.$typ.$align.$class.'>'.
912 $tC.
913 '</h'.$typ.'>';
914 }
915 break;
916 }
917 $blockSplit[$k+1]=ereg_replace('^[ ]*'.chr(10),'',$blockSplit[$k+1]); // Removing linebreak if typohead
918 } else { // NON-block:
919 $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
920 $singleLineBreak = $blockSplit[$k]==chr(10);
921 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$nextFTN)) { // Removing linebreak if typolist/typohead
922 $blockSplit[$k]=ereg_replace(chr(10).'[ ]*$','',$blockSplit[$k]);
923 }
924 // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
925 if (!strcmp($blockSplit[$k],'') && !$singleLineBreak) {
926 unset($blockSplit[$k]);
927 } else {
928 $blockSplit[$k]=$this->setDivTags($blockSplit[$k],($this->procOptions['useDIVasParagraphTagForRTE']?'div':'p'));
929 }
930 }
931 }
932 return implode(chr(10),$blockSplit);
933 }
934
935 /**
936 * Transformation handler: 'ts_strip' / direction: "db"
937 * Removing all non-allowed tags
938 *
939 * @param string Content input
940 * @return string Content output
941 */
942 function TS_strip_db($value) {
943 $value = strip_tags($value,'<'.implode('><',explode(',','b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')).'>');
944 return $value;
945 }
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960 /***************************************************************
961 *
962 * Generic RTE transformation, analysis and helper functions
963 *
964 **************************************************************/
965
966 /**
967 * Reads the file or url $url and returns the content
968 *
969 * @param string Filepath/URL to read
970 * @return string The content from the resource given as input.
971 * @see t3lib_div::getURL()
972 */
973 function getURL($url) {
974 return t3lib_div::getURL($url);
975 }
976
977 /**
978 * Function for cleaning content going into the database.
979 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
980 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
981 *
982 * @param string Content to clean up
983 * @param string Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
984 * @return string Clean content
985 * @see getKeepTags()
986 */
987 function HTMLcleaner_db($content,$tagList='') {
988 if (!$tagList) {
989 $keepTags = $this->getKeepTags('db');
990 } else {
991 $keepTags = $this->getKeepTags('db',$tagList);
992 }
993 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
994 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
995
996 return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC);
997 }
998
999 /**
1000 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
1001 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
1002 *
1003 * @param string The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
1004 * @param string Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
1005 * @return array Configuration array
1006 * @see HTMLcleaner_db()
1007 */
1008 function getKeepTags($direction='rte',$tagList='') {
1009 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1010
1011 // Setting up allowed tags:
1012 if (strcmp($tagList,'')) { // If the $tagList input var is set, this will take precedence
1013 $keepTags = array_flip(t3lib_div::trimExplode(',',$tagList,1));
1014 } else { // Default is to get allowed/denied tags from internal array of processing options:
1015 // Construct default list of tags to keep:
1016 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1017 $keepTags = array_flip(t3lib_div::trimExplode(',',$typoScript_list.','.strtolower($this->procOptions['allowTags']),1));
1018
1019 // For tags to deny, remove them from $keepTags array:
1020 $denyTags = t3lib_div::trimExplode(',',$this->procOptions['denyTags'],1);
1021 foreach($denyTags as $dKe) {
1022 unset($keepTags[$dKe]);
1023 }
1024 }
1025
1026 // Based on the direction of content, set further options:
1027 switch ($direction) {
1028
1029 // GOING from database to Rich Text Editor:
1030 case 'rte':
1031 // Transform bold/italics tags to strong/em
1032 if (isset($keepTags['b'])) {$keepTags['b']=array('remap'=>'STRONG');}
1033 if (isset($keepTags['i'])) {$keepTags['i']=array('remap'=>'EM');}
1034
1035 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1036 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
1037 break;
1038
1039 // GOING from RTE to database:
1040 case 'db':
1041 // Transform strong/em back to bold/italics:
1042 if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
1043 if (isset($keepTags['em'])) { $keepTags['em']=array('remap'=>'i'); }
1044
1045 // Setting up span tags if they are allowed:
1046 if (isset($keepTags['span'])) {
1047 $classes=array_merge(array(''),$this->allowedClasses);
1048 $keepTags['span']=array(
1049 'allowedAttribs'=>'class',
1050 'fixAttrib' => Array(
1051 'class' => Array (
1052 'list' => $classes,
1053 'removeIfFalse' => 1
1054 )
1055 ),
1056 'rmTagIfNoAttrib' => 1
1057 );
1058 if (!$this->procOptions['allowedClasses']) unset($keepTags['span']['fixAttrib']['class']['list']);
1059 }
1060
1061 // Setting up font tags if they are allowed:
1062 if (isset($keepTags['font'])) {
1063 $colors=array_merge(array(''),t3lib_div::trimExplode(',',$this->procOptions['allowedFontColors'],1));
1064 $keepTags['font']=array(
1065 'allowedAttribs'=>'face,color,size',
1066 'fixAttrib' => Array(
1067 'face' => Array (
1068 'removeIfFalse' => 1
1069 ),
1070 'color' => Array (
1071 'removeIfFalse' => 1,
1072 'list'=>$colors
1073 ),
1074 'size' => Array (
1075 'removeIfFalse' => 1,
1076 )
1077 ),
1078 'rmTagIfNoAttrib' => 1
1079 );
1080 if (!$this->procOptions['allowedFontColors']) unset($keepTags['font']['fixAttrib']['color']['list']);
1081 }
1082
1083 // Setting further options, getting them from the processiong options:
1084 $TSc = $this->procOptions['HTMLparser_db.'];
1085 if (!$TSc['globalNesting']) $TSc['globalNesting']='b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1086 if (!$TSc['noAttrib']) $TSc['noAttrib']='b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1087
1088 // Transforming the array from TypoScript to regular array:
1089 list($keepTags) = $this->HTMLparserConfig($TSc,$keepTags);
1090 break;
1091 }
1092
1093 // Caching (internally, in object memory) the result unless tagList is set:
1094 if (!$tagList) {
1095 $this->getKeepTags_cache[$direction] = $keepTags;
1096 } else {
1097 return $keepTags;
1098 }
1099 }
1100
1101 // Return result:
1102 return $this->getKeepTags_cache[$direction];
1103 }
1104
1105 /**
1106 * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by chr(10).
1107 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1108 * The function ->setDivTags does the opposite.
1109 * This function processes content to go into the database.
1110 *
1111 * @param string Value to process.
1112 * @param integer Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1113 * @param boolean If true, an array with the lines is returned, otherwise a string of the processed input value.
1114 * @return string Processed input value.
1115 * @see setDivTags()
1116 */
1117 function divideIntoLines($value,$count=5,$returnArray=FALSE) {
1118
1119 // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
1120 if ($this->procOptions['internalizeFontTags']) {$value = $this->internalizeFontTags($value);}
1121
1122 // Setting configuration for processing:
1123 $allowTagsOutside = t3lib_div::trimExplode(',',strtolower($this->procOptions['allowTagsOutside']?$this->procOptions['allowTagsOutside']:'img'),1);
1124 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1125 $divSplit = $this->splitIntoBlock('div,p',$value,1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1126
1127 if ($this->procOptions['keepPDIVattribs']) {
1128 $keepAttribListArr = t3lib_div::trimExplode(',',strtolower($this->procOptions['keepPDIVattribs']),1);
1129 } else {
1130 $keepAttribListArr = array();
1131 }
1132
1133 // Returns plainly the value if there was no div/p sections in it
1134 if (count($divSplit)<=1 || $count<=0) {
1135 return $value;
1136 }
1137
1138 // Traverse the splitted sections:
1139 foreach($divSplit as $k => $v) {
1140 if ($k%2) { // Inside
1141 $v=$this->removeFirstAndLastTag($v);
1142
1143 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1144 $subLines = $this->divideIntoLines($v,$count-1,1);
1145 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1146 // No noting.
1147 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1148 $subLines=array($subLines);
1149 if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1150 $subLines = spliti('<br[[:space:]]*[\/]?>',$v);
1151 }
1152
1153 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1154 reset($subLines);
1155 while(list($sk)=each($subLines)) {
1156
1157 // Clear up the subline for DB.
1158 $subLines[$sk]=$this->HTMLcleaner_db($subLines[$sk]);
1159
1160 // Get first tag, attributes etc:
1161 $fTag = $this->getFirstTag($divSplit[$k]);
1162 $tagName=strtolower($this->getFirstTagName($divSplit[$k]));
1163 $attribs=$this->get_tag_attributes($fTag);
1164
1165 // Keep attributes (lowercase)
1166 $newAttribs=array();
1167 if (count($keepAttribListArr)) {
1168 foreach($keepAttribListArr as $keepA) {
1169 if (isset($attribs[0][$keepA])) { $newAttribs[$keepA] = $attribs[0][$keepA]; }
1170 }
1171 }
1172
1173 // ALIGN attribute:
1174 if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']),'') && strtolower($attribs[0]['align'])!='left') { // Set to value, but not 'left'
1175 $newAttribs['align']=strtolower($attribs[0]['align']);
1176 }
1177
1178 // CLASS attribute:
1179 if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']),'')) { // Set to whatever value
1180 if (!count($this->allowedClasses) || in_array(strtoupper($attribs[0]['class']),$this->allowedClasses)) {
1181 $newAttribs['class']=$attribs[0]['class'];
1182 }
1183 }
1184
1185 // Remove any line break char (10 or 13)
1186 $subLines[$sk]=ereg_replace(chr(10).'|'.chr(13),'',$subLines[$sk]);
1187
1188 // If there are any attributes or if we are supposed to remap the tag, then do so:
1189 if (count($newAttribs) && strcmp($remapParagraphTag,'1')) {
1190 if ($remapParagraphTag=='P') $tagName='p';
1191 if ($remapParagraphTag=='DIV') $tagName='div';
1192 $subLines[$sk]='<'.trim($tagName.' '.$this->compileTagAttribs($newAttribs)).'>'.$subLines[$sk].'</'.$tagName.'>';
1193 }
1194 }
1195 }
1196 // Add the processed line(s)
1197 $divSplit[$k] = implode(chr(10),$subLines);
1198
1199 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank:
1200 if (trim(strip_tags($divSplit[$k]))=='&nbsp;') $divSplit[$k]='';
1201 } else { // outside div:
1202 // Remove positions which are outside div/p tags and without content
1203 $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
1204 if (!strcmp($divSplit[$k],'')) unset($divSplit[$k]); // Remove part if it's empty
1205 }
1206 }
1207
1208 // Return value:
1209 return $returnArray ? $divSplit : implode(chr(10),$divSplit);
1210 }
1211
1212 /**
1213 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1214 * For processing of content going FROM database TO RTE.
1215 *
1216 * @param string Value to convert
1217 * @param string Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1218 * @return string Processed value.
1219 * @see divideIntoLines()
1220 */
1221 function setDivTags($value,$dT='p') {
1222
1223 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1224 $keepTags = $this->getKeepTags('rte');
1225 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
1226 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
1227 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte']?1:0;
1228
1229 // Divide the content into lines, based on chr(10):
1230 $parts = explode(chr(10),$value);
1231 foreach($parts as $k => $v) {
1232
1233 // Processing of line content:
1234 if (!strcmp(trim($parts[$k]),'')) { // If the line is blank, set it to &nbsp;
1235 $parts[$k]='&nbsp;';
1236 } else { // Clean the line content:
1237 $parts[$k]=$this->HTMLcleaner($parts[$k],$keepTags,$kUknown,$hSC);
1238 if ($convNBSP) $parts[$k]=str_replace('&amp;nbsp;','&nbsp;',$parts[$k]);
1239 }
1240
1241 // Wrapping the line in <$dT> is not already wrapped:
1242 $testStr = strtolower(trim($parts[$k]));
1243 if (substr($testStr,0,4)!='<div' || substr($testStr,-6)!='</div>') {
1244 if (substr($testStr,0,2)!='<p' || substr($testStr,-4)!='</p>') {
1245 // Only set p-tags if there is not already div or p tags:
1246 $parts[$k]='<'.$dT.'>'.$parts[$k].'</'.$dT.'>';
1247 }
1248 }
1249 }
1250
1251 // Implode result:
1252 return implode(chr(10),$parts);
1253 }
1254
1255 /**
1256 * This splits the $value in font-tag chunks.
1257 * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
1258 * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
1259 * In that case the font-tags are normally on the OUTSIDE of the sections.
1260 * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
1261 *
1262 * @param string Input content
1263 * @return string Output content
1264 * @see divideIntoLines()
1265 */
1266 function internalizeFontTags($value) {
1267
1268 // Splitting into font tag blocks:
1269 $fontSplit = $this->splitIntoBlock('font',$value);
1270
1271 foreach($fontSplit as $k => $v) {
1272 if ($k%2) { // Inside
1273 $fTag = $this->getFirstTag($v); // Fint font-tag
1274
1275 $divSplit_sub = $this->splitIntoBlock('div,p',$this->removeFirstAndLastTag($v),1);
1276 if (count($divSplit_sub)>1) { // If there were div/p sections inside the font-tag, do something about it...
1277 // traverse those sections:
1278 foreach($divSplit_sub as $k2 => $v2) {
1279 if ($k2%2) { // Inside
1280 $div_p = $this->getFirstTag($v2); // Fint font-tag
1281 $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
1282 $v2=$this->removeFirstAndLastTag($v2); // ... and remove it from original.
1283 $divSplit_sub[$k2]=$div_p.$fTag.$v2.'</font>'.'</'.$div_p_tagname.'>';
1284 } elseif (trim(strip_tags($v2))) {
1285 $divSplit_sub[$k2]=$fTag.$v2.'</font>';
1286 }
1287 }
1288 $fontSplit[$k]=implode('',$divSplit_sub);
1289 }
1290 }
1291 }
1292
1293 return implode('',$fontSplit);
1294 }
1295
1296 /**
1297 * Returns SiteURL based on thisScript.
1298 *
1299 * @return string Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1300 * @see t3lib_div::getIndpEnv()
1301 */
1302 function siteUrl() {
1303 return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1304 }
1305
1306 /**
1307 * Return the storage folder of RTE image files.
1308 * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
1309 *
1310 * @return string
1311 */
1312 function rteImageStorageDir() {
1313 return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
1314 }
1315
1316 /**
1317 * Remove all tables from incoming code
1318 * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
1319 *
1320 * @param string Input value
1321 * @param string Break character to use for linebreaks.
1322 * @return string Output value
1323 */
1324 function removeTables($value,$breakChar='<br />') {
1325
1326 // Splitting value into table blocks:
1327 $tableSplit = $this->splitIntoBlock('table',$value);
1328
1329 // Traverse blocks of tables:
1330 foreach($tableSplit as $k => $v) {
1331 if ($k%2) {
1332 $tableSplit[$k]='';
1333 $rowSplit = $this->splitIntoBlock('tr',$v);
1334 foreach($rowSplit as $k2 => $v2) {
1335 if ($k2%2) {
1336 $cellSplit = $this->getAllParts($this->splitIntoBlock('td',$v2),1,0);
1337 foreach($cellSplit as $k3 => $v3) {
1338 $tableSplit[$k].=$v3.$breakChar;
1339 }
1340 }
1341 }
1342 }
1343 }
1344
1345 // Implode it all again:
1346 return implode($breakChar,$tableSplit);
1347 }
1348
1349 /**
1350 * Default tag mapping for TS
1351 *
1352 * @param string Input code to process
1353 * @param string Direction To databsae (db) or from database to RTE (rte)
1354 * @return string Processed value
1355 */
1356 function defaultTStagMapping($code,$direction='rte') {
1357 if ($direction=='db') {
1358 $code=$this->mapTags($code,array( // Map tags
1359 'strong' => 'b',
1360 'em' => 'i'
1361 ));
1362 }
1363 if ($direction=='rte') {
1364 $code=$this->mapTags($code,array( // Map tags
1365 'b' => 'strong',
1366 'i' => 'em'
1367 ));
1368 }
1369 return $code;
1370 }
1371
1372 /**
1373 * Finds width and height from attrib-array
1374 * If the width and height is found in the style-attribute, use that!
1375 *
1376 * @param array Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1377 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1378 */
1379 function getWHFromAttribs($attribArray) {
1380 $style =trim($attribArray['style']);
1381 if ($style) {
1382 $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1383 // Width
1384 eregi('width'.$regex,$style,$reg);
1385 $w = intval($reg[1]);
1386 // Height
1387 eregi('height'.$regex,$style,$reg);
1388 $h = intval($reg[1]);
1389 }
1390 if (!$w) {
1391 $w = $attribArray['width'];
1392 }
1393 if (!$h) {
1394 $h = $attribArray['height'];
1395 }
1396 return array(intval($w),intval($h));
1397 }
1398
1399 /**
1400 * Parse <A>-tag href and return status of email,external,file or page
1401 *
1402 * @param string URL to analyse.
1403 * @return array Information in an array about the URL
1404 */
1405 function urlInfoForLinkTags($url) {
1406 $info = array();
1407 $url = trim($url);
1408 if (substr(strtolower($url),0,7)=='mailto:') {
1409 $info['url']=trim(substr($url,7));
1410 $info['type']='email';
1411 } else {
1412 $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
1413 for($a=0;$a<strlen($url);$a++) {
1414 if ($url[$a]!=$curURL[$a]) {
1415 break;
1416 }
1417 }
1418
1419 $info['relScriptPath']=substr($curURL,$a);
1420 $info['relUrl']=substr($url,$a);
1421 $info['url']=$url;
1422 $info['type']='ext';
1423
1424 $siteUrl_parts = parse_url($url);
1425 $curUrl_parts = parse_url($curURL);
1426
1427 if ($siteUrl_parts['host']==$curUrl_parts['host'] // Hosts should match
1428 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'],0,strlen(TYPO3_mainDir))==TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
1429
1430 // New processing order 100502
1431 $uP=parse_url($info['relUrl']);
1432
1433 if (!strcmp('#'.$siteUrl_parts['fragment'],$info['relUrl'])) {
1434 $info['url']=$info['relUrl'];
1435 $info['type']='anchor';
1436 } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php')) {
1437 $pp = explode('id=',$uP['query']);
1438 $id = trim($pp[1]);
1439 if ($id) {
1440 $info['pageid']=$id;
1441 $info['cElement']=$uP['fragment'];
1442 $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
1443 $info['type']='page';
1444 }
1445 } else {
1446 $info['url']=$info['relUrl'];
1447 $info['type']='file';
1448 }
1449 } else {
1450 unset($info['relScriptPath']);
1451 unset($info['relUrl']);
1452 }
1453 }
1454 return $info;
1455 }
1456
1457 /**
1458 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1459 *
1460 * @param string Content input
1461 * @param boolean If true, then the "rtekeep" attribute will not be set.
1462 * @return string Content output
1463 */
1464 function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE) {
1465 $blockSplit = $this->splitIntoBlock('A',$value);
1466 reset($blockSplit);
1467 while(list($k,$v)=each($blockSplit)) {
1468 if ($k%2) { // block:
1469 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v),1);
1470
1471 // Checking if there is a scheme, and if not, prepend the current url.
1472 if (strlen($attribArray['href'])) { // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1473 $uP = parse_url(strtolower($attribArray['href']));
1474 if (!$uP['scheme']) {
1475 $attribArray['href'] = $this->siteUrl().substr($attribArray['href'],strlen($this->relBackPath));
1476 }
1477 } else {
1478 $attribArray['rtekeep'] = 1;
1479 }
1480 if (!$dontSetRTEKEEP) $attribArray['rtekeep'] = 1;
1481
1482 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
1483 $eTag='</a>';
1484 $blockSplit[$k] = $bTag.$this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
1485 }
1486 }
1487 return implode('',$blockSplit);
1488 }
1489 }
1490
1491
1492 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']) {
1493 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
1494 }
1495 ?>