Updated copyright notices to show "2004"
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2004 Kasper Skaarhoj (kasper@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 December/2003 by Kasper Skaarhoj
32 * XHTML compatible.
33 *
34 * @author Kasper Skaarhoj <kasper@typo3.com>
35 * @internal
36 */
37 /**
38 * [CLASS/FUNCTION INDEX of SCRIPT]
39 *
40 *
41 *
42 * 102: class t3lib_parsehtml_proc extends t3lib_parsehtml
43 * 137: function init($elRef='',$recPid=0)
44 * 149: function setRelPath($path)
45 * 173: function evalWriteFile($pArr,$currentRecord)
46 *
47 * SECTION: Main function
48 * 231: function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
49 *
50 * SECTION: Specific RTE TRANSFORMATION functions
51 * 378: function TS_images_db($value)
52 * 479: function TS_images_rte($value)
53 * 513: function TS_reglinks($value,$direction)
54 * 547: function TS_links_db($value)
55 * 591: function TS_links_rte($value)
56 * 666: function TS_preserve_db($value)
57 * 690: function TS_preserve_rte($value)
58 * 711: function TS_transform_db($value,$css=FALSE)
59 * 822: function TS_transform_rte($value,$css=0)
60 * 893: function TS_strip_db($value)
61 *
62 * SECTION: Generic RTE transformation, analysis and helper functions
63 * 924: function getURL($url)
64 * 938: function HTMLcleaner_db($content,$tagList='')
65 * 959: function getKeepTags($direction='rte',$tagList='')
66 * 1068: function divideIntoLines($value,$count=5,$returnArray=FALSE)
67 * 1172: function setDivTags($value,$dT='p')
68 * 1217: function internalizeFontTags($value)
69 * 1253: function siteUrl()
70 * 1263: function rteImageStorageDir()
71 * 1275: function removeTables($value,$breakChar='<br />')
72 * 1307: function defaultTStagMapping($code,$direction='rte')
73 * 1330: function getWHFromAttribs($attribArray)
74 * 1356: function urlInfoForLinkTags($url)
75 * 1415: function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
76 *
77 * TOTAL FUNCTIONS: 27
78 * (This index is automatically created/updated by the extension "extdeveval")
79 *
80 */
81
82 require_once (PATH_t3lib.'class.t3lib_parsehtml.php');
83
84
85
86
87
88
89
90
91
92
93
94
95 /**
96 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
97 *
98 * @author Kasper Skaarhoj <kasper@typo3.com>
99 * @package TYPO3
100 * @subpackage t3lib
101 */
102 class t3lib_parsehtml_proc extends t3lib_parsehtml {
103
104 // Static:
105 var $headListTags = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6'; // List of tags for header, pre and list containers
106
107 // Internal, static:
108 var $recPid = 0; // Set this to the pid of the record manipulated by the class.
109 var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
110 var $relPath=''; // Relative path
111 var $relBackPath=''; // Relative back-path
112 var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
113
114 // Internal, dynamic
115 var $TS_transform_db_safecounter=100; // Run-away brake for recursive calls.
116 var $rte_p=''; // Parameters from TCA types configuration related to the RTE
117 var $getKeepTags_cache=array(); // Data caching for processing function
118 var $allowedClasses=array(); // Storage of the allowed CSS class names in the RTE
119 var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
120
121
122
123
124
125
126
127
128
129
130 /**
131 * Initialize, setting element reference and record PID
132 *
133 * @param string Element reference, eg "tt_content:bodytext"
134 * @param integer PID of the record (page id)
135 * @return void
136 */
137 function init($elRef='',$recPid=0) {
138 $this->recPid=$recPid;
139 $this->elRef=$elRef;
140 }
141
142 /**
143 * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
144 * This is used when editing files with the RTE
145 *
146 * @param string The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
147 * @return void There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
148 */
149 function setRelPath($path) {
150 $path = trim($path);
151 $path = ereg_replace('^/','',$path);
152 $path = ereg_replace('/$','',$path);
153 if ($path) {
154 $this->relPath = $path;
155 $this->relBackPath = '';
156 $partsC=count(explode('/',$this->relPath));
157 for ($a=0;$a<$partsC;$a++) {
158 $this->relBackPath.='../';
159 }
160 $this->relPath.='/';
161 }
162 }
163
164 /**
165 * Evaluate the environment for editing a staticFileEdit file.
166 * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
167 *
168 * @param array Parameters for the current field as found in types-config
169 * @param array Current record we are editing.
170 * @return mixed On success an array with various information is returned, otherwise a string with an error message
171 * @see t3lib_TCEmain, t3lib_transferData
172 */
173 function evalWriteFile($pArr,$currentRecord) {
174
175 // Write file configuration:
176 if (is_array($pArr)) {
177 if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
178 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'],-1)=='/'
179 && @is_dir(PATH_site.$GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
180
181 $SW_p = $pArr['parameters'];
182 $SW_editFileField = trim($SW_p[0]);
183 $SW_editFile = $currentRecord[$SW_editFileField];
184 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
185 $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'].$SW_editFile;
186 $SW_editFile = PATH_site.$SW_relpath;
187 if (@is_file($SW_editFile)) {
188 return array(
189 'editFile' => $SW_editFile,
190 'relEditFile' => $SW_relpath,
191 'contentField' => trim($SW_p[1]),
192 'markerField' => trim($SW_p[2]),
193 'loadFromFileField' => trim($SW_p[3]),
194 'statusField' => trim($SW_p[4])
195 );
196 } else return "ERROR: Editfile '".$SW_relpath."' did not exist";
197 } else return "ERROR: Edit file name could not be found or was bad.";
198 } else return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
199 }
200 }
201
202
203
204
205
206
207
208
209
210
211
212
213
214 /**********************************************
215 *
216 * Main function
217 *
218 **********************************************/
219
220 /**
221 * Tranform value for RTE based on specConf in the direction specified by $direction (rte/db)
222 * This is the main function called from tcemain and transfer data classes
223 *
224 * @param string Input value
225 * @param array Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
226 * @param string Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
227 * @param array Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
228 * @return string Output value
229 * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
230 */
231 function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array()) {
232
233 // Init:
234 $this->procOptions=$thisConfig['proc.'];
235 $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
236
237 // Get parameters for rte_transformation:
238 $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
239
240 // Setting modes:
241 if (strcmp($this->procOptions['overruleMode'],'')) {
242 $modes=array_unique(t3lib_div::trimExplode(',',$this->procOptions['overruleMode']));
243 } else {
244 $modes=array_unique(t3lib_div::trimExplode('-',$p['mode']));
245 }
246 $revmodes=array_flip($modes);
247
248 // Find special modes and extract them:
249 if (isset($revmodes['ts'])) {
250 $modes[$revmodes['ts']]='ts_transform,ts_preserve,ts_images,ts_links';
251 }
252 // Find special modes and extract them:
253 if (isset($revmodes['ts_css'])) {
254 $modes[$revmodes['ts_css']]='css_transform,ts_images,ts_links';
255 }
256 $modes = array_unique(t3lib_div::trimExplode(',',implode(',',$modes),1));
257 if ($direction=='rte') {
258 $modes=array_reverse($modes);
259 }
260
261 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
262 $entry_HTMLparser = $this->procOptions['entryHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_'.$direction.'.']) : '';
263 $exit_HTMLparser = $this->procOptions['exitHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_'.$direction.'.']) : '';
264
265 // Line breaks of content is unified into char-10 only (removing char 13)
266 if (!$this->procOptions['disableUnifyLineBreaks']) {
267 $value = str_replace(chr(13).chr(10),chr(10),$value);
268 }
269
270 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
271 if (is_array($entry_HTMLparser)) {
272 $value = $this->HTMLcleaner($value,$entry_HTMLparser[0],$entry_HTMLparser[1],$entry_HTMLparser[2],$entry_HTMLparser[3]);
273 }
274
275 // Traverse modes:
276 foreach($modes as $cmd) {
277 // ->DB
278 if ($direction=='db') {
279 switch($cmd) {
280 case 'ts_images':
281 $value=$this->TS_images_db($value);
282 break;
283 case 'ts_reglinks':
284 $value=$this->TS_reglinks($value,'db');
285 break;
286 case 'ts_links':
287 $value=$this->TS_links_db($value);
288 break;
289 case 'ts_preserve':
290 $value=$this->TS_preserve_db($value);
291 break;
292 case 'ts_transform':
293 case 'css_transform':
294 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
295 $this->allowedClasses = t3lib_div::trimExplode(',',strtoupper($this->procOptions['allowedClasses']),1);
296 $value=$this->TS_transform_db($value,$cmd=='css_transform');
297 break;
298 case 'ts_strip':
299 $value=$this->TS_strip_db($value);
300 break;
301 case 'dummy':
302 break;
303 }
304 }
305 // ->RTE
306 if ($direction=='rte') {
307 switch($cmd) {
308 case 'ts_images':
309 $value=$this->TS_images_rte($value);
310 break;
311 case 'ts_reglinks':
312 $value=$this->TS_reglinks($value,'rte');
313 break;
314 case 'ts_links':
315 $value=$this->TS_links_rte($value);
316 break;
317 case 'ts_preserve':
318 $value=$this->TS_preserve_rte($value);
319 break;
320 case 'ts_transform':
321 case 'css_transform':
322 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
323 $value=$this->TS_transform_rte($value,$cmd=='css_transform');
324 break;
325 case 'dummy':
326 break;
327 }
328 }
329 }
330
331 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
332 if (is_array($exit_HTMLparser)) {
333 $value = $this->HTMLcleaner($value,$exit_HTMLparser[0],$exit_HTMLparser[1],$exit_HTMLparser[2],$exit_HTMLparser[3]);
334 }
335
336 // Final clean up of linebreaks:
337 if (!$this->procOptions['disableUnifyLineBreaks']) {
338 $value = str_replace(chr(13).chr(10),chr(10),$value); // Make sure no \r\n sequences has entered in the meantime...
339 $value = str_replace(chr(10),chr(13).chr(10),$value); // ... and then change all \n into \r\n
340 }
341
342 // Return value:
343 return $value;
344 }
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361 /************************************
362 *
363 * Specific RTE TRANSFORMATION functions
364 *
365 *************************************/
366
367 /**
368 * Transformation handler: 'ts_images' / direction: "db"
369 * Processing images inserted in the RTE.
370 * This is used when content goes from the RTE to the database.
371 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
372 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
373 * Also "magic" images are processed here.
374 *
375 * @param string The content from RTE going to Database
376 * @return string Processed content
377 */
378 function TS_images_db($value) {
379
380 // Split content by <img> tags and traverse the resulting array for processing:
381 $imgSplit = $this->splitTags('img',$value);
382 foreach($imgSplit as $k => $v) {
383 if ($k%2) { // image found, do processing:
384
385 // Init
386 $attribArray=$this->get_tag_attributes_classic($v,1);
387 $siteUrl = $this->siteUrl();
388 $absRef = trim($attribArray['src']);
389
390 // External image from another URL?
391 if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
392 $externalFile = $this->getUrl($absRef); // Get it
393 if ($externalFile) {
394 $pU = parse_url($absRef);
395 $pI=pathinfo($pU['path']);
396
397 if (t3lib_div::inList('gif,png,jpeg,jpg',strtolower($pI['extension']))) {
398 $filename = t3lib_div::shortMD5($absRef).'.'.$pI['extension'];
399 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
400 $C_origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
401 if (!@is_file($origFilePath)) {
402 t3lib_div::writeFile($origFilePath,$externalFile);
403 t3lib_div::writeFile($C_origFilePath,$externalFile);
404 }
405 $absRef = $siteUrl.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
406
407 $attribArray['src']=$absRef;
408 $params = t3lib_div::implodeParams($attribArray,1);
409 $imgSplit[$k]='<img '.$params.' />';
410 }
411 }
412 }
413 // Check image as local file
414 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
415 $path = substr($absRef,strlen($siteUrl));
416 $pathPre=$this->rteImageStorageDir().'RTEmagicC_';
417
418 if (t3lib_div::isFirstPartOfStr($path,$pathPre)) {
419 $filepath = PATH_site.$path;
420 if (@is_file($filepath)) {
421 // Find original file:
422 $pI=pathinfo(substr($path,strlen($pathPre)));
423 $filename = substr($pI['basename'],0,-strlen('.'.$pI['extension']));
424 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
425 if (@is_file($origFilePath)) {
426 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
427 $imgObj->init();
428 $imgObj->mayScaleUp=0;
429 $imgObj->tempPath=PATH_site.$imgObj->tempPath;
430
431 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
432 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
433 // Compare dimensions:
434 if ($curWH[0]!=$curInfo[0] || $curWH[1]!=$curInfo[1]) {
435 $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
436 $cW = $curWH[0];
437 $cH = $curWH[1];
438 $cH = 1000; // Make the image based on the width solely...
439 $imgI = $imgObj->imageMagickConvert($origFilePath,$pI['extension'],$cW.'m',$cH.'m');
440 if ($imgI[3]) {
441 $fI=pathinfo($imgI[3]);
442 @copy($imgI[3],$filepath); // Override the child file
443 unset($attribArray['style']);
444 $attribArray['width']=$imgI[0];
445 $attribArray['height']=$imgI[1];
446 if (!$attribArray['border']) $attribArray['border']=0;
447 $params = t3lib_div::implodeParams($attribArray,1);
448 $imgSplit[$k]='<img '.$params.' />';
449 }
450 }
451 }
452 }
453 }
454 }
455
456 // Convert abs to rel url
457 $attribArray=$this->get_tag_attributes_classic($imgSplit[$k],1);
458 $absRef = trim($attribArray['src']);
459 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
460 $attribArray['src'] = $this->relBackPath.substr($absRef,strlen($siteUrl));
461 if (!isset($attribArray['alt'])) $attribArray['alt']=''; // Must have alt-attribute for XHTML compliance.
462 $imgSplit[$k]='<img '.t3lib_div::implodeParams($attribArray,1,1).' />';
463 }
464 }
465 }
466 return implode('',$imgSplit);
467 }
468
469 /**
470 * Transformation handler: 'ts_images' / direction: "rte"
471 * Processing images from database content going into the RTE.
472 * Processing includes converting the src attribute to an absolute URL.
473 *
474 * @param string Content input
475 * @return string Content output
476 */
477 function TS_images_rte($value) {
478
479 // Split content by <img> tags and traverse the resulting array for processing:
480 $imgSplit = $this->splitTags('img',$value);
481 foreach($imgSplit as $k => $v) {
482 if ($k%2) { // image found:
483
484 // Init
485 $attribArray=$this->get_tag_attributes_classic($v,1);
486 $siteUrl = $this->siteUrl();
487 $absRef = trim($attribArray['src']);
488
489 // Unless the src attribute is already pointing to an external URL:
490 if (strtolower(substr($absRef,0,4))!='http') {
491 $attribArray['src'] = $siteUrl.substr($attribArray['src'],strlen($this->relBackPath));
492 if (!isset($attribArray['alt'])) $attribArray['alt']='';
493 $params = t3lib_div::implodeParams($attribArray);
494 $imgSplit[$k]='<img '.$params.' />';
495 }
496 }
497 }
498
499 // return processed content:
500 return implode('',$imgSplit);
501 }
502
503 /**
504 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
505 * Converting <A>-tags to/from abs/rel
506 *
507 * @param string Content input
508 * @param string Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
509 * @return string Content output
510 */
511 function TS_reglinks($value,$direction) {
512 switch($direction) {
513 case 'rte':
514 return $this->TS_AtagToAbs($value,1);
515 break;
516 case 'db':
517 $siteURL = $this->siteUrl();
518 $blockSplit = $this->splitIntoBlock('A',$value);
519 reset($blockSplit);
520 while(list($k,$v)=each($blockSplit)) {
521 if ($k%2) { // block:
522 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
523 // If the url is local, remove url-prefix
524 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
525 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
526 }
527 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
528 $eTag='</a>';
529 $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
530 }
531 }
532 return implode('',$blockSplit);
533 break;
534 }
535 }
536
537 /**
538 * Transformation handler: 'ts_links' / direction: "db"
539 * Converting <A>-tags to <LINK tags>
540 *
541 * @param string Content input
542 * @return string Content output
543 * @see TS_links_rte()
544 */
545 function TS_links_db($value) {
546
547 // Split content into <a> tag blocks and process:
548 $blockSplit = $this->splitIntoBlock('A',$value);
549 foreach($blockSplit as $k => $v) {
550 if ($k%2) { // If an A-tag was found:
551 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
552 $info = $this->urlInfoForLinkTags($attribArray['href']);
553
554 // Check options:
555 $attribArray_copy = $attribArray;
556 unset($attribArray_copy['href']);
557 unset($attribArray_copy['target']);
558 unset($attribArray_copy['class']);
559 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
560 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
561 $bTag='<LINK '.$info['url'].($attribArray['target']?' '.$attribArray['target']:($attribArray['class']?' -':'')).($attribArray['class']?' '.$attribArray['class']:'').'>';
562 $eTag='</LINK>';
563 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
564 } else { // ... otherwise store the link as a-tag.
565 // Unsetting 'rtekeep' attribute if that had been set.
566 unset($attribArray['rtekeep']);
567 // If the url is local, remove url-prefix
568 $siteURL = $this->siteUrl();
569 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
570 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
571 }
572 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
573 $eTag='</a>';
574 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
575 }
576 }
577 }
578 return implode('',$blockSplit);
579 }
580
581 /**
582 * Transformation handler: 'ts_links' / direction: "rte"
583 * Converting <LINK tags> to <A>-tags
584 *
585 * @param string Content input
586 * @return string Content output
587 * @see TS_links_rte()
588 */
589 function TS_links_rte($value) {
590 $value = $this->TS_AtagToAbs($value);
591
592 // Split content by the TYPO3 pseudo tag "<LINK>":
593 $blockSplit = $this->splitIntoBlock('link',$value,1);
594 foreach($blockSplit as $k => $v) {
595 if ($k%2) { // block:
596 $tagCode = t3lib_div::trimExplode(' ',trim(substr($this->getFirstTag($v),0,-1)),1);
597 $link_param=$tagCode[1];
598 $href='';
599 $siteUrl = $this->siteUrl();
600 // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
601 if(strstr($link_param,'@')) { // mailadr
602 $href = 'mailto:'.eregi_replace('^mailto:','',$link_param);
603 } elseif (substr($link_param,0,1)=='#') { // check if anchor
604 $href = $siteUrl.$link_param;
605 } else {
606 $fileChar=intval(strpos($link_param, '/'));
607 $urlChar=intval(strpos($link_param, '.'));
608
609 // Detects if a file is found in site-root OR is a simulateStaticDocument.
610 list($rootFileDat) = explode('?',$link_param);
611 $rFD_fI = pathinfo($rootFileDat);
612 if (trim($rootFileDat) && !strstr($link_param,'/') && (@is_file(PATH_site.$rootFileDat) || t3lib_div::inList('php,html,htm',strtolower($rFD_fI['extension'])))) {
613 $href = $siteUrl.$link_param;
614 } elseif($urlChar && (strstr($link_param,'//') || !$fileChar || $urlChar<$fileChar)) { // url (external): If doubleSlash or if a '.' comes before a '/'.
615 if (!ereg('^[a-z]*://',trim(strtolower($link_param)))) {$scheme='http://';} else {$scheme='';}
616 $href = $scheme.$link_param;
617 } elseif($fileChar) { // file (internal)
618 $href = $siteUrl.$link_param;
619 } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
620 $link_params_parts=explode('#',$link_param);
621 $idPart = trim($link_params_parts[0]); // Link-data del
622 if (!strcmp($idPart,'')) {$idPart=$this->recPid;} // If no id or alias is given, set it to class record pid
623 if ($link_params_parts[1] && !$sectionMark) {
624 $sectionMark='#'.trim($link_params_parts[1]);
625 }
626 // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
627 $pairParts = t3lib_div::trimExplode(',',$idPart);
628 if (count($pairParts)>1) {
629 $idPart = $pairParts[0];
630 // Type ? future support for?
631 }
632 // Checking if the id-parameter is an alias.
633 if (!t3lib_div::testInt($idPart)) {
634 list($idPartR) = t3lib_BEfunc::getRecordsByField('pages','alias',$idPart);
635 $idPart=intval($idPartR['uid']);
636 }
637 $page = t3lib_BEfunc::getRecord('pages',$idPart);
638 if (is_array($page)) { // Page must exist...
639 $href = $siteUrl.'?id='.$link_param;
640 } else {
641 $href='';
642 $error='no page: '.$idPart;
643 }
644 }
645 }
646
647 // Setting the A-tag:
648 $bTag='<a href="'.htmlspecialchars($href).'"'.($tagCode[2]&&$tagCode[2]!='-'?' target="'.htmlspecialchars($tagCode[2]).'"':'').($tagCode[3]?' class="'.htmlspecialchars($tagCode[3]).'"':'').'>';
649 $eTag='</a>';
650 $blockSplit[$k] = $bTag.$this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
651 }
652 }
653
654 // Return content:
655 return implode('',$blockSplit);
656 }
657
658 /**
659 * Preserve special tags
660 *
661 * @param string Content input
662 * @return string Content output
663 */
664 function TS_preserve_db($value) {
665 if (!$this->preserveTags) return $value;
666
667 // Splitting into blocks for processing (span-tags are used for special tags)
668 $blockSplit = $this->splitIntoBlock('span',$value);
669 foreach($blockSplit as $k => $v) {
670 if ($k%2) { // block:
671 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
672 if ($attribArray['specialtag']) {
673 $theTag = rawurldecode($attribArray['specialtag']);
674 $theTagName = $this->getFirstTagName($theTag);
675 $blockSplit[$k] = $theTag.$this->removeFirstAndLastTag($blockSplit[$k]).'</'.$theTagName.'>';
676 }
677 }
678 }
679 return implode('',$blockSplit);
680 }
681
682 /**
683 * Preserve special tags
684 *
685 * @param string Content input
686 * @return string Content output
687 */
688 function TS_preserve_rte($value) {
689 if (!$this->preserveTags) return $value;
690
691 $blockSplit = $this->splitIntoBlock($this->preserveTags,$value);
692 foreach($blockSplit as $k => $v) {
693 if ($k%2) { // block:
694 $blockSplit[$k] = '<span specialtag="'.rawurlencode($this->getFirstTag($v)).'">'.$this->removeFirstAndLastTag($blockSplit[$k]).'</span>';
695 }
696 }
697 return implode('',$blockSplit);
698 }
699
700 /**
701 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
702 * Cleaning (->db) for standard content elements (ts)
703 *
704 * @param string Content input
705 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
706 * @return string Content output
707 * @see TS_transform_rte()
708 */
709 function TS_transform_db($value,$css=FALSE) {
710
711 // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
712 $this->TS_transform_db_safecounter--;
713 if ($this->TS_transform_db_safecounter<0) return $value;
714
715 // Split the content from RTE by the occurence of these blocks:
716 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.$this->headListTags,$value);
717
718 $cc=0;
719 $aC = count($blockSplit);
720
721 // Traverse the blocks
722 foreach($blockSplit as $k => $v) {
723 $cc++;
724 $lastBR = $cc==$aC ? '' : chr(10);
725
726 if ($k%2) { // Inside block:
727
728 // Init:
729 $tag=$this->getFirstTag($v);
730 $tagName=strtolower($this->getFirstTagName($v));
731
732 // Process based on the tag:
733 switch($tagName) {
734 case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
735 $blockSplit[$k]='<'.$tagName.'>'.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
736 break;
737 case 'ol':
738 case 'ul': // Transform lists into <typolist>-tags:
739 if (!$css) {
740 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
741 $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
742 while(list($k2)=each($parts)) {
743 $parts[$k2]=ereg_replace(chr(10).'|'.chr(13),'',$parts[$k2]); // remove all linesbreaks!
744 $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
745 $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
746 $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
747 }
748 if ($tagName=='ol') { $params=' type="1"'; } else { $params=''; }
749 $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
750 }
751 } else {
752 $blockSplit[$k].=$lastBR;
753 }
754 break;
755 case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
756 if (!$this->procOptions['preserveTables'] && !$css) {
757 $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
758 } else {
759 $blockSplit[$k]=str_replace(chr(10),'',$blockSplit[$k]).$lastBR;
760 }
761 break;
762 case 'h1':
763 case 'h2':
764 case 'h3':
765 case 'h4':
766 case 'h5':
767 case 'h6':
768 if (!$css) {
769 $attribArray=$this->get_tag_attributes_classic($tag);
770 // Processing inner content here:
771 $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
772
773 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
774 $type = intval(substr($tagName,1));
775 $blockSplit[$k]='<typohead'.
776 ($type!=6?' type="'.$type.'"':'').
777 ($attribArray['align']?' align="'.$attribArray['align'].'"':'').
778 ($attribArray['class']?' class="'.$attribArray['class'].'"':'').
779 '>'.
780 $innerContent.
781 '</typohead>'.
782 $lastBR;
783 } else {
784 $blockSplit[$k]='<'.$tagName.
785 ($attribArray['align']?' align="'.htmlspecialchars($attribArray['align']).'"':'').
786 ($attribArray['class']?' class="'.htmlspecialchars($attribArray['class']).'"':'').
787 '>'.
788 $innerContent.
789 '</'.$tagName.'>'.
790 $lastBR;
791 }
792 } else {
793 $blockSplit[$k].=$lastBR;
794 }
795 break;
796 default:
797 $blockSplit[$k].=$lastBR;
798 break;
799 }
800 } else { // NON-block:
801 if (strcmp(trim($blockSplit[$k]),'')) {
802 $blockSplit[$k]=$this->divideIntoLines($blockSplit[$k]).$lastBR;
803 } else unset($blockSplit[$k]);
804 }
805 }
806 $this->TS_transform_db_safecounter++;
807
808 return implode('',$blockSplit);
809 }
810
811 /**
812 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
813 * Set (->rte) for standard content elements (ts)
814 *
815 * @param string Content input
816 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
817 * @return string Content output
818 * @see TS_transform_db()
819 */
820 function TS_transform_rte($value,$css=0) {
821
822 // Split the content from Database by the occurence of these blocks:
823 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$value);
824
825 // Traverse the blocks
826 foreach($blockSplit as $k => $v) {
827 if ($k%2) { // Inside one of the blocks:
828
829 // Init:
830 $tag=$this->getFirstTag($v);
831 $tagName=strtolower($this->getFirstTagName($v));
832 $attribArray=$this->get_tag_attributes_classic($tag);
833
834 // Based on tagname, we do transformations:
835 switch($tagName) {
836 case 'blockquote': // Keep blockquotes:
837 $blockSplit[$k] = $tag.
838 $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
839 '</'.$tagName.'>';
840 break;
841 case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
842 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
843 $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
844 $tListContent = ereg_replace('^[ ]*'.chr(10),'',$tListContent);
845 $tListContent = ereg_replace(chr(10).'[ ]*$','',$tListContent);
846 $lines=explode(chr(10),$tListContent);
847 $typ= $attribArray['type']==1?'ol':'ul';
848 $blockSplit[$k] = '<'.$typ.'>'.chr(10).
849 '<li>'.implode('</li>'.chr(10).'<li>',$lines).'</li>'.
850 '</'.$typ.'>';
851 }
852 break;
853 case 'typohead': // Transform typohead into Hx tags.
854 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
855 $tC=$this->removeFirstAndLastTag($blockSplit[$k]);
856 $typ=t3lib_div::intInRange($attribArray['type'],0,6);
857 if (!$typ) $typ=6;
858 $align = $attribArray['align']?' align="'.$attribArray['align'].'"': '';
859 $class = $attribArray['class']?' class="'.$attribArray['class'].'"': '';
860 $blockSplit[$k] = '<h'.$typ.$align.$class.'>'.
861 $tC.
862 '</h'.$typ.'>';
863 }
864 break;
865 }
866 $blockSplit[$k+1]=ereg_replace('^[ ]*'.chr(10),'',$blockSplit[$k+1]); // Removing linebreak if typohead
867 } else { // NON-block:
868 $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
869 $singleLineBreak = $blockSplit[$k]==chr(10);
870 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$nextFTN)) { // Removing linebreak if typolist/typohead
871 $blockSplit[$k]=ereg_replace(chr(10).'[ ]*$','',$blockSplit[$k]);
872 }
873 // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
874 if (!strcmp($blockSplit[$k],'') && !$singleLineBreak) {
875 unset($blockSplit[$k]);
876 } else {
877 $blockSplit[$k]=$this->setDivTags($blockSplit[$k],($this->procOptions['useDIVasParagraphTagForRTE']?'div':'p'));
878 }
879 }
880 }
881 return implode(chr(10),$blockSplit);
882 }
883
884 /**
885 * Transformation handler: 'ts_strip' / direction: "db"
886 * Removing all non-allowed tags
887 *
888 * @param string Content input
889 * @return string Content output
890 */
891 function TS_strip_db($value) {
892 $value = strip_tags($value,'<'.implode('><',explode(',','b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')).'>');
893 return $value;
894 }
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909 /***************************************************************
910 *
911 * Generic RTE transformation, analysis and helper functions
912 *
913 **************************************************************/
914
915 /**
916 * Reads the file or url $url and returns the content
917 *
918 * @param string Filepath/URL to read
919 * @return string The content from the resource given as input.
920 * @see t3lib_div::getURL()
921 */
922 function getURL($url) {
923 return t3lib_div::getURL($url);
924 }
925
926 /**
927 * Function for cleaning content going into the database.
928 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
929 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
930 *
931 * @param string Content to clean up
932 * @param string Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
933 * @return string Clean content
934 * @see getKeepTags()
935 */
936 function HTMLcleaner_db($content,$tagList='') {
937 if (!$tagList) {
938 $keepTags = $this->getKeepTags('db');
939 } else {
940 $keepTags = $this->getKeepTags('db',$tagList);
941 }
942 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
943 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
944
945 return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC);
946 }
947
948 /**
949 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
950 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
951 *
952 * @param string The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
953 * @param string Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
954 * @return array Configuration array
955 * @see HTMLcleaner_db()
956 */
957 function getKeepTags($direction='rte',$tagList='') {
958 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
959
960 // Setting up allowed tags:
961 if (strcmp($tagList,'')) { // If the $tagList input var is set, this will take precedence
962 $keepTags = array_flip(t3lib_div::trimExplode(',',$tagList,1));
963 } else { // Default is to get allowed/denied tags from internal array of processing options:
964 // Construct default list of tags to keep:
965 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
966 $keepTags = array_flip(t3lib_div::trimExplode(',',$typoScript_list.','.strtolower($this->procOptions['allowTags']),1));
967
968 // For tags to deny, remove them from $keepTags array:
969 $denyTags = t3lib_div::trimExplode(',',$this->procOptions['denyTags'],1);
970 foreach($denyTags as $dKe) {
971 unset($keepTags[$dKe]);
972 }
973 }
974
975 // Based on the direction of content, set further options:
976 switch ($direction) {
977
978 // GOING from database to Rich Text Editor:
979 case 'rte':
980 // Transform bold/italics tags to strong/em
981 if (isset($keepTags['b'])) {$keepTags['b']=array('remap'=>'STRONG');}
982 if (isset($keepTags['i'])) {$keepTags['i']=array('remap'=>'EM');}
983
984 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
985 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
986 break;
987
988 // GOING from RTE to database:
989 case 'db':
990 // Transform strong/em back to bold/italics:
991 if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
992 if (isset($keepTags['em'])) { $keepTags['em']=array('remap'=>'i'); }
993
994 // Setting up span tags if they are allowed:
995 if (isset($keepTags['span'])) {
996 $classes=array_merge(array(''),$this->allowedClasses);
997 $keepTags['span']=array(
998 'allowedAttribs'=>'class',
999 'fixAttrib' => Array(
1000 'class' => Array (
1001 'list' => $classes,
1002 'removeIfFalse' => 1
1003 )
1004 ),
1005 'rmTagIfNoAttrib' => 1
1006 );
1007 if (!$this->procOptions['allowedClasses']) unset($keepTags['span']['fixAttrib']['class']['list']);
1008 }
1009
1010 // Setting up font tags if they are allowed:
1011 if (isset($keepTags['font'])) {
1012 $colors=array_merge(array(''),t3lib_div::trimExplode(',',$this->procOptions['allowedFontColors'],1));
1013 $keepTags['font']=array(
1014 'allowedAttribs'=>'face,color,size',
1015 'fixAttrib' => Array(
1016 'face' => Array (
1017 'removeIfFalse' => 1
1018 ),
1019 'color' => Array (
1020 'removeIfFalse' => 1,
1021 'list'=>$colors
1022 ),
1023 'size' => Array (
1024 'removeIfFalse' => 1,
1025 )
1026 ),
1027 'rmTagIfNoAttrib' => 1
1028 );
1029 if (!$this->procOptions['allowedFontColors']) unset($keepTags['font']['fixAttrib']['color']['list']);
1030 }
1031
1032 // Setting further options, getting them from the processiong options:
1033 $TSc = $this->procOptions['HTMLparser_db.'];
1034 if (!$TSc['globalNesting']) $TSc['globalNesting']='b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1035 if (!$TSc['noAttrib']) $TSc['noAttrib']='b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1036
1037 // Transforming the array from TypoScript to regular array:
1038 list($keepTags) = $this->HTMLparserConfig($TSc,$keepTags);
1039 break;
1040 }
1041
1042 // Caching (internally, in object memory) the result unless tagList is set:
1043 if (!$tagList) {
1044 $this->getKeepTags_cache[$direction] = $keepTags;
1045 } else {
1046 return $keepTags;
1047 }
1048 }
1049
1050 // Return result:
1051 return $this->getKeepTags_cache[$direction];
1052 }
1053
1054 /**
1055 * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by chr(10).
1056 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1057 * The function ->setDivTags does the opposite.
1058 * This function processes content to go into the database.
1059 *
1060 * @param string Value to process.
1061 * @param integer Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1062 * @param boolean If true, an array with the lines is returned, otherwise a string of the processed input value.
1063 * @return string Processed input value.
1064 * @see setDivTags()
1065 */
1066 function divideIntoLines($value,$count=5,$returnArray=FALSE) {
1067
1068 // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
1069 if ($this->procOptions['internalizeFontTags']) {$value = $this->internalizeFontTags($value);}
1070
1071 // Setting configuration for processing:
1072 $allowTagsOutside = t3lib_div::trimExplode(',',strtolower($this->procOptions['allowTagsOutside']?$this->procOptions['allowTagsOutside']:'img'),1);
1073 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1074 $divSplit = $this->splitIntoBlock('div,p',$value,1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1075
1076 if ($this->procOptions['keepPDIVattribs']) {
1077 $keepAttribListArr = t3lib_div::trimExplode(',',strtolower($this->procOptions['keepPDIVattribs']),1);
1078 } else {
1079 $keepAttribListArr = array();
1080 }
1081
1082 // Returns plainly the value if there was no div/p sections in it
1083 if (count($divSplit)<=1 || $count<=0) {
1084 return $value;
1085 }
1086
1087 // Traverse the splitted sections:
1088 foreach($divSplit as $k => $v) {
1089 if ($k%2) { // Inside
1090 $v=$this->removeFirstAndLastTag($v);
1091
1092 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1093 $subLines = $this->divideIntoLines($v,$count-1,1);
1094 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1095 // No noting.
1096 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1097 $subLines=array($subLines);
1098 if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1099 $subLines = spliti('<br[[:space:]]*[\/]?>',$v);
1100 }
1101
1102 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1103 reset($subLines);
1104 while(list($sk)=each($subLines)) {
1105
1106 // Clear up the subline for DB.
1107 $subLines[$sk]=$this->HTMLcleaner_db($subLines[$sk]);
1108
1109 // Get first tag, attributes etc:
1110 $fTag = $this->getFirstTag($divSplit[$k]);
1111 $tagName=strtolower($this->getFirstTagName($divSplit[$k]));
1112 $attribs=$this->get_tag_attributes($fTag);
1113
1114 // Keep attributes (lowercase)
1115 $newAttribs=array();
1116 if (count($keepAttribListArr)) {
1117 foreach($keepAttribListArr as $keepA) {
1118 if (isset($attribs[0][$keepA])) { $newAttribs[$keepA] = $attribs[0][$keepA]; }
1119 }
1120 }
1121
1122 // ALIGN attribute:
1123 if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']),'') && strtolower($attribs[0]['align'])!='left') { // Set to value, but not 'left'
1124 $newAttribs['align']=strtolower($attribs[0]['align']);
1125 }
1126
1127 // CLASS attribute:
1128 if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']),'')) { // Set to whatever value
1129 if (!count($this->allowedClasses) || in_array(strtoupper($attribs[0]['class']),$this->allowedClasses)) {
1130 $newAttribs['class']=$attribs[0]['class'];
1131 }
1132 }
1133
1134 // Remove any line break char (10 or 13)
1135 $subLines[$sk]=ereg_replace(chr(10).'|'.chr(13),'',$subLines[$sk]);
1136
1137 // If there are any attributes or if we are supposed to remap the tag, then do so:
1138 if (count($newAttribs) && strcmp($remapParagraphTag,'1')) {
1139 if ($remapParagraphTag=='P') $tagName='p';
1140 if ($remapParagraphTag=='DIV') $tagName='div';
1141 $subLines[$sk]='<'.trim($tagName.' '.$this->compileTagAttribs($newAttribs)).'>'.$subLines[$sk].'</'.$tagName.'>';
1142 }
1143 }
1144 }
1145 // Add the processed line(s)
1146 $divSplit[$k] = implode(chr(10),$subLines);
1147
1148 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank:
1149 if (trim(strip_tags($divSplit[$k]))=='&nbsp;') $divSplit[$k]='';
1150 } else { // outside div:
1151 // Remove positions which are outside div/p tags and without content
1152 $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
1153 if (!strcmp($divSplit[$k],'')) unset($divSplit[$k]); // Remove part if it's empty
1154 }
1155 }
1156
1157 // Return value:
1158 return $returnArray ? $divSplit : implode(chr(10),$divSplit);
1159 }
1160
1161 /**
1162 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1163 * For processing of content going FROM database TO RTE.
1164 *
1165 * @param string Value to convert
1166 * @param string Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1167 * @return string Processed value.
1168 * @see divideIntoLines()
1169 */
1170 function setDivTags($value,$dT='p') {
1171
1172 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1173 $keepTags = $this->getKeepTags('rte');
1174 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
1175 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
1176 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte']?1:0;
1177
1178 // Divide the content into lines, based on chr(10):
1179 $parts = explode(chr(10),$value);
1180 foreach($parts as $k => $v) {
1181
1182 // Processing of line content:
1183 if (!strcmp(trim($parts[$k]),'')) { // If the line is blank, set it to &nbsp;
1184 $parts[$k]='&nbsp;';
1185 } else { // Clean the line content:
1186 $parts[$k]=$this->HTMLcleaner($parts[$k],$keepTags,$kUknown,$hSC);
1187 if ($convNBSP) $parts[$k]=str_replace('&amp;nbsp;','&nbsp;',$parts[$k]);
1188 }
1189
1190 // Wrapping the line in <$dT> is not already wrapped:
1191 $testStr = strtolower(trim($parts[$k]));
1192 if (substr($testStr,0,4)!='<div' || substr($testStr,-6)!='</div>') {
1193 if (substr($testStr,0,2)!='<p' || substr($testStr,-4)!='</p>') {
1194 // Only set p-tags if there is not already div or p tags:
1195 $parts[$k]='<'.$dT.'>'.$parts[$k].'</'.$dT.'>';
1196 }
1197 }
1198 }
1199
1200 // Implode result:
1201 return implode(chr(10),$parts);
1202 }
1203
1204 /**
1205 * This splits the $value in font-tag chunks.
1206 * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
1207 * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
1208 * In that case the font-tags are normally on the OUTSIDE of the sections.
1209 * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
1210 *
1211 * @param string Input content
1212 * @return string Output content
1213 * @see divideIntoLines()
1214 */
1215 function internalizeFontTags($value) {
1216
1217 // Splitting into font tag blocks:
1218 $fontSplit = $this->splitIntoBlock('font',$value);
1219
1220 foreach($fontSplit as $k => $v) {
1221 if ($k%2) { // Inside
1222 $fTag = $this->getFirstTag($v); // Fint font-tag
1223
1224 $divSplit_sub = $this->splitIntoBlock('div,p',$this->removeFirstAndLastTag($v),1);
1225 if (count($divSplit_sub)>1) { // If there were div/p sections inside the font-tag, do something about it...
1226 // traverse those sections:
1227 foreach($divSplit_sub as $k2 => $v2) {
1228 if ($k2%2) { // Inside
1229 $div_p = $this->getFirstTag($v2); // Fint font-tag
1230 $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
1231 $v2=$this->removeFirstAndLastTag($v2); // ... and remove it from original.
1232 $divSplit_sub[$k2]=$div_p.$fTag.$v2.'</font>'.'</'.$div_p_tagname.'>';
1233 } elseif (trim(strip_tags($v2))) {
1234 $divSplit_sub[$k2]=$fTag.$v2.'</font>';
1235 }
1236 }
1237 $fontSplit[$k]=implode('',$divSplit_sub);
1238 }
1239 }
1240 }
1241
1242 return implode('',$fontSplit);
1243 }
1244
1245 /**
1246 * Returns SiteURL based on thisScript.
1247 *
1248 * @return string Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1249 * @see t3lib_div::getIndpEnv()
1250 */
1251 function siteUrl() {
1252 return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1253 }
1254
1255 /**
1256 * Return the storage folder of RTE image files.
1257 * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
1258 *
1259 * @return string
1260 */
1261 function rteImageStorageDir() {
1262 return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
1263 }
1264
1265 /**
1266 * Remove all tables from incoming code
1267 * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
1268 *
1269 * @param string Input value
1270 * @param string Break character to use for linebreaks.
1271 * @return string Output value
1272 */
1273 function removeTables($value,$breakChar='<br />') {
1274
1275 // Splitting value into table blocks:
1276 $tableSplit = $this->splitIntoBlock('table',$value);
1277
1278 // Traverse blocks of tables:
1279 foreach($tableSplit as $k => $v) {
1280 if ($k%2) {
1281 $tableSplit[$k]='';
1282 $rowSplit = $this->splitIntoBlock('tr',$v);
1283 foreach($rowSplit as $k2 => $v2) {
1284 if ($k2%2) {
1285 $cellSplit = $this->getAllParts($this->splitIntoBlock('td',$v2),1,0);
1286 foreach($cellSplit as $k3 => $v3) {
1287 $tableSplit[$k].=$v3.$breakChar;
1288 }
1289 }
1290 }
1291 }
1292 }
1293
1294 // Implode it all again:
1295 return implode($breakChar,$tableSplit);
1296 }
1297
1298 /**
1299 * Default tag mapping for TS
1300 *
1301 * @param string Input code to process
1302 * @param string Direction To databsae (db) or from database to RTE (rte)
1303 * @return string Processed value
1304 */
1305 function defaultTStagMapping($code,$direction='rte') {
1306 if ($direction=='db') {
1307 $code=$this->mapTags($code,array( // Map tags
1308 'strong' => 'b',
1309 'em' => 'i'
1310 ));
1311 }
1312 if ($direction=='rte') {
1313 $code=$this->mapTags($code,array( // Map tags
1314 'b' => 'strong',
1315 'i' => 'em'
1316 ));
1317 }
1318 return $code;
1319 }
1320
1321 /**
1322 * Finds width and height from attrib-array
1323 * If the width and height is found in the style-attribute, use that!
1324 *
1325 * @param array Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1326 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1327 */
1328 function getWHFromAttribs($attribArray) {
1329 $style =trim($attribArray['style']);
1330 if ($style) {
1331 $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1332 // Width
1333 eregi('width'.$regex,$style,$reg);
1334 $w = intval($reg[1]);
1335 // Height
1336 eregi('height'.$regex,$style,$reg);
1337 $h = intval($reg[1]);
1338 }
1339 if (!$w) {
1340 $w = $attribArray['width'];
1341 }
1342 if (!$h) {
1343 $h = $attribArray['height'];
1344 }
1345 return array(intval($w),intval($h));
1346 }
1347
1348 /**
1349 * Parse <A>-tag href and return status of email,external,file or page
1350 *
1351 * @param string URL to analyse.
1352 * @return array Information in an array about the URL
1353 */
1354 function urlInfoForLinkTags($url) {
1355 $info = array();
1356 $url = trim($url);
1357 if (substr(strtolower($url),0,7)=='mailto:') {
1358 $info['url']=trim(substr($url,7));
1359 $info['type']='email';
1360 } else {
1361 $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
1362 for($a=0;$a<strlen($url);$a++) {
1363 if ($url[$a]!=$curURL[$a]) {
1364 break;
1365 }
1366 }
1367
1368 $info['relScriptPath']=substr($curURL,$a);
1369 $info['relUrl']=substr($url,$a);
1370 $info['url']=$url;
1371 $info['type']='ext';
1372
1373 $siteUrl_parts = parse_url($url);
1374 $curUrl_parts = parse_url($curURL);
1375
1376 if ($siteUrl_parts['host']==$curUrl_parts['host'] // Hosts should match
1377 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'],0,strlen(TYPO3_mainDir))==TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
1378
1379 // New processing order 100502
1380 $uP=parse_url($info['relUrl']);
1381
1382 if (!strcmp('#'.$siteUrl_parts['fragment'],$info['relUrl'])) {
1383 $info['url']=$info['relUrl'];
1384 $info['type']='anchor';
1385 } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php')) {
1386 $pp = explode('id=',$uP['query']);
1387 $id = trim($pp[1]);
1388 if ($id) {
1389 $info['pageid']=$id;
1390 $info['cElement']=$uP['fragment'];
1391 $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
1392 $info['type']='page';
1393 }
1394 } else {
1395 $info['url']=$info['relUrl'];
1396 $info['type']='file';
1397 }
1398 } else {
1399 unset($info['relScriptPath']);
1400 unset($info['relUrl']);
1401 }
1402 }
1403 return $info;
1404 }
1405
1406 /**
1407 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1408 *
1409 * @param string Content input
1410 * @param boolean If true, then the "rtekeep" attribute will not be set.
1411 * @return string Content output
1412 */
1413 function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE) {
1414 $blockSplit = $this->splitIntoBlock('A',$value);
1415 reset($blockSplit);
1416 while(list($k,$v)=each($blockSplit)) {
1417 if ($k%2) { // block:
1418 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
1419 // Checking if there is a scheme, and if not, prepend the current url.
1420 $uP = parse_url(strtolower($attribArray['href']));
1421 if (!$uP['scheme']) {
1422 $attribArray['href']=$this->siteUrl().substr($attribArray['href'],strlen($this->relBackPath));
1423 }
1424 if (!$dontSetRTEKEEP) $attribArray['rtekeep']=1;
1425
1426 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
1427 $eTag='</a>';
1428 $blockSplit[$k] = $bTag.$this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
1429 }
1430 }
1431 return implode('',$blockSplit);
1432 }
1433 }
1434
1435
1436 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']) {
1437 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
1438 }
1439 ?>