Various changes
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_parsehtml_proc.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2003 Kasper Skaarhoj (kasper@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 December/2003 by Kasper Skaarhoj
32 * XHTML compatible.
33 *
34 * @author Kasper Skaarhoj <kasper@typo3.com>
35 * @internal
36 */
37 /**
38 * [CLASS/FUNCTION INDEX of SCRIPT]
39 *
40 *
41 *
42 * 102: class t3lib_parsehtml_proc extends t3lib_parsehtml
43 * 137: function init($elRef='',$recPid=0)
44 * 149: function setRelPath($path)
45 * 173: function evalWriteFile($pArr,$currentRecord)
46 *
47 * SECTION: Main function
48 * 231: function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
49 *
50 * SECTION: Specific RTE TRANSFORMATION functions
51 * 378: function TS_images_db($value)
52 * 479: function TS_images_rte($value)
53 * 513: function TS_reglinks($value,$direction)
54 * 547: function TS_links_db($value)
55 * 591: function TS_links_rte($value)
56 * 666: function TS_preserve_db($value)
57 * 690: function TS_preserve_rte($value)
58 * 711: function TS_transform_db($value,$css=FALSE)
59 * 822: function TS_transform_rte($value,$css=0)
60 * 893: function TS_strip_db($value)
61 *
62 * SECTION: Generic RTE transformation, analysis and helper functions
63 * 924: function getURL($url)
64 * 938: function HTMLcleaner_db($content,$tagList='')
65 * 959: function getKeepTags($direction='rte',$tagList='')
66 * 1068: function divideIntoLines($value,$count=5,$returnArray=FALSE)
67 * 1172: function setDivTags($value,$dT='p')
68 * 1217: function internalizeFontTags($value)
69 * 1253: function siteUrl()
70 * 1263: function rteImageStorageDir()
71 * 1275: function removeTables($value,$breakChar='<br />')
72 * 1307: function defaultTStagMapping($code,$direction='rte')
73 * 1330: function getWHFromAttribs($attribArray)
74 * 1356: function urlInfoForLinkTags($url)
75 * 1415: function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
76 *
77 * TOTAL FUNCTIONS: 27
78 * (This index is automatically created/updated by the extension "extdeveval")
79 *
80 */
81
82 require_once (PATH_t3lib.'class.t3lib_parsehtml.php');
83
84
85
86
87
88
89
90
91
92
93
94
95 /**
96 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
97 *
98 * @author Kasper Skaarhoj <kasper@typo3.com>
99 * @package TYPO3
100 * @subpackage t3lib
101 */
102 class t3lib_parsehtml_proc extends t3lib_parsehtml {
103
104 // Static:
105 var $headListTags = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6'; // List of tags for header, pre and list containers
106
107 // Internal, static:
108 var $recPid = 0; // Set this to the pid of the record manipulated by the class.
109 var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
110 var $relPath=''; // Relative path
111 var $relBackPath=''; // Relative back-path
112 var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
113
114 // Internal, dynamic
115 var $TS_transform_db_safecounter=100; // Run-away brake for recursive calls.
116 var $rte_p=''; // Parameters from TCA types configuration related to the RTE
117 var $getKeepTags_cache=array(); // Data caching for processing function
118 var $allowedClasses=array(); // Storage of the allowed CSS class names in the RTE
119 var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
120
121
122
123
124
125
126
127
128
129
130 /**
131 * Initialize, setting element reference and record PID
132 *
133 * @param string Element reference, eg "tt_content:bodytext"
134 * @param integer PID of the record (page id)
135 * @return void
136 */
137 function init($elRef='',$recPid=0) {
138 $this->recPid=$recPid;
139 $this->elRef=$elRef;
140 }
141
142 /**
143 * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
144 * This is used when editing files with the RTE
145 *
146 * @param string The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
147 * @return void There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
148 */
149 function setRelPath($path) {
150 $path = trim($path);
151 $path = ereg_replace('^/','',$path);
152 $path = ereg_replace('/$','',$path);
153 if ($path) {
154 $this->relPath = $path;
155 $this->relBackPath = '';
156 $partsC=count(explode('/',$this->relPath));
157 for ($a=0;$a<$partsC;$a++) {
158 $this->relBackPath.='../';
159 }
160 $this->relPath.='/';
161 }
162 }
163
164 /**
165 * Evaluate the environment for editing a staticFileEdit file.
166 * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
167 *
168 * @param array Parameters for the current field as found in types-config
169 * @param array Current record we are editing.
170 * @return mixed On success an array with various information is returned, otherwise a string with an error message
171 * @see t3lib_TCEmain, t3lib_transferData
172 */
173 function evalWriteFile($pArr,$currentRecord) {
174
175 // Write file configuration:
176 if (is_array($pArr)) {
177 if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
178 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'],-1)=='/'
179 && @is_dir(PATH_site.$GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
180
181 $SW_p = $pArr['parameters'];
182 $SW_editFileField = trim($SW_p[0]);
183 $SW_editFile = $currentRecord[$SW_editFileField];
184 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
185 $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'].$SW_editFile;
186 $SW_editFile = PATH_site.$SW_relpath;
187 if (@is_file($SW_editFile)) {
188 return array(
189 'editFile' => $SW_editFile,
190 'relEditFile' => $SW_relpath,
191 'contentField' => trim($SW_p[1]),
192 'markerField' => trim($SW_p[2]),
193 'loadFromFileField' => trim($SW_p[3]),
194 'statusField' => trim($SW_p[4])
195 );
196 } else return "ERROR: Editfile '".$SW_relpath."' did not exist";
197 } else return "ERROR: Edit file name could not be found or was bad.";
198 } else return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
199 }
200 }
201
202
203
204
205
206
207
208
209
210
211
212
213
214 /**********************************************
215 *
216 * Main function
217 *
218 **********************************************/
219
220 /**
221 * Tranform value for RTE based on specConf in the direction specified by $direction (rte/db)
222 * This is the main function called from tcemain and transfer data classes
223 *
224 * @param string Input value
225 * @param array Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
226 * @param string Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
227 * @param array Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
228 * @return string Output value
229 * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
230 */
231 function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array()) {
232
233 // Init:
234 $this->procOptions=$thisConfig['proc.'];
235 $this->preserveTags = strtoupper(implode(',',t3lib_div::trimExplode(',',$this->procOptions['preserveTags'])));
236
237 // Get parameters for rte_transformation:
238 $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
239
240 // Setting modes:
241 if (strcmp($this->procOptions['overruleMode'],'')) {
242 $modes=array_unique(t3lib_div::trimExplode(',',$this->procOptions['overruleMode']));
243 } else {
244 $modes=array_unique(t3lib_div::trimExplode('-',$p['mode']));
245 }
246 $revmodes=array_flip($modes);
247
248 // Find special modes and extract them:
249 if (isset($revmodes['ts'])) {
250 $modes[$revmodes['ts']]='ts_transform,ts_preserve,ts_images,ts_links';
251 }
252 // Find special modes and extract them:
253 if (isset($revmodes['ts_css'])) {
254 $modes[$revmodes['ts_css']]='css_transform,ts_images,ts_links';
255 }
256 $modes = array_unique(t3lib_div::trimExplode(',',implode(',',$modes),1));
257 if ($direction=='rte') {
258 $modes=array_reverse($modes);
259 }
260
261 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
262 $entry_HTMLparser = $this->procOptions['entryHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_'.$direction.'.']) : '';
263 $exit_HTMLparser = $this->procOptions['exitHTMLparser_'.$direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_'.$direction.'.']) : '';
264
265 // Line breaks of content is unified into char-10 only (removing char 13)
266 if (!$this->procOptions['disableUnifyLineBreaks']) {
267 $value = str_replace(chr(13).chr(10),chr(10),$value);
268 }
269
270 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
271 if (is_array($entry_HTMLparser)) {
272 $value = $this->HTMLcleaner($value,$entry_HTMLparser[0],$entry_HTMLparser[1],$entry_HTMLparser[2],$entry_HTMLparser[3]);
273 }
274
275 // Traverse modes:
276 foreach($modes as $cmd) {
277 // ->DB
278 if ($direction=='db') {
279 switch($cmd) {
280 case 'ts_images':
281 $value=$this->TS_images_db($value);
282 break;
283 case 'ts_reglinks':
284 $value=$this->TS_reglinks($value,'db');
285 break;
286 case 'ts_links':
287 $value=$this->TS_links_db($value);
288 break;
289 case 'ts_preserve':
290 $value=$this->TS_preserve_db($value);
291 break;
292 case 'ts_transform':
293 case 'css_transform':
294 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
295 $this->allowedClasses = t3lib_div::trimExplode(',',strtoupper($this->procOptions['allowedClasses']),1);
296 $value=$this->TS_transform_db($value,$cmd=='css_transform');
297 break;
298 case 'ts_strip':
299 $value=$this->TS_strip_db($value);
300 break;
301 case 'dummy':
302 break;
303 }
304 }
305 // ->RTE
306 if ($direction=='rte') {
307 switch($cmd) {
308 case 'ts_images':
309 $value=$this->TS_images_rte($value);
310 break;
311 case 'ts_reglinks':
312 $value=$this->TS_reglinks($value,'rte');
313 break;
314 case 'ts_links':
315 $value=$this->TS_links_rte($value);
316 break;
317 case 'ts_preserve':
318 $value=$this->TS_preserve_rte($value);
319 break;
320 case 'ts_transform':
321 case 'css_transform':
322 $value = str_replace(chr(13),'',$value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
323 $value=$this->TS_transform_rte($value,$cmd=='css_transform');
324 break;
325 case 'dummy':
326 break;
327 }
328 }
329 }
330
331 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
332 if (is_array($exit_HTMLparser)) {
333 $value = $this->HTMLcleaner($value,$exit_HTMLparser[0],$exit_HTMLparser[1],$exit_HTMLparser[2],$exit_HTMLparser[3]);
334 }
335
336 // Final clean up of linebreaks:
337 if (!$this->procOptions['disableUnifyLineBreaks']) {
338 $value = str_replace(chr(13).chr(10),chr(10),$value); // Make sure no \r\n sequences has entered in the meantime...
339 $value = str_replace(chr(10),chr(13).chr(10),$value); // ... and then change all \n into \r\n
340 }
341
342 // Return value:
343 return $value;
344 }
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361 /************************************
362 *
363 * Specific RTE TRANSFORMATION functions
364 *
365 *************************************/
366
367 /**
368 * Transformation handler: 'ts_images' / direction: "db"
369 * Processing images inserted in the RTE.
370 * This is used when content goes from the RTE to the database.
371 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
372 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
373 * Also "magic" images are processed here.
374 *
375 * @param string The content from RTE going to Database
376 * @return string Processed content
377 */
378 function TS_images_db($value) {
379
380 // Split content by <img> tags and traverse the resulting array for processing:
381 $imgSplit = $this->splitTags('img',$value);
382 foreach($imgSplit as $k => $v) {
383 if ($k%2) { // image found, do processing:
384
385 // Init
386 $attribArray=$this->get_tag_attributes_classic($v,1);
387 $siteUrl = $this->siteUrl();
388 $absRef = trim($attribArray['src']);
389
390 // External image from another URL?
391 if (!t3lib_div::isFirstPartOfStr($absRef,$siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
392 $externalFile = $this->getUrl($absRef); // Get it
393 if ($externalFile) {
394 $pU = parse_url($absRef);
395 $pI=pathinfo($pU['path']);
396
397 if (t3lib_div::inList('gif,png,jpeg,jpg',strtolower($pI['extension']))) {
398 $filename = t3lib_div::shortMD5($absRef).'.'.$pI['extension'];
399 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
400 $C_origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
401 if (!@is_file($origFilePath)) {
402 t3lib_div::writeFile($origFilePath,$externalFile);
403 t3lib_div::writeFile($C_origFilePath,$externalFile);
404 }
405 $absRef = $siteUrl.$this->rteImageStorageDir().'RTEmagicC_'.$filename.'.'.$pI['extension'];
406
407 $attribArray['src']=$absRef;
408 if (!isset($attribArray['alt'])) $attribArray['alt']='';
409 $params = t3lib_div::implodeParams($attribArray,1);
410 $imgSplit[$k]='<img '.$params.' />';
411 }
412 }
413 }
414 // Check image as local file
415 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
416 $path = substr($absRef,strlen($siteUrl));
417 $pathPre=$this->rteImageStorageDir().'RTEmagicC_';
418
419 if (t3lib_div::isFirstPartOfStr($path,$pathPre)) {
420 $filepath = PATH_site.$path;
421 if (@is_file($filepath)) {
422 // Find original file:
423 $pI=pathinfo(substr($path,strlen($pathPre)));
424 $filename = substr($pI['basename'],0,-strlen('.'.$pI['extension']));
425 $origFilePath = PATH_site.$this->rteImageStorageDir().'RTEmagicP_'.$filename;
426 if (@is_file($origFilePath)) {
427 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
428 $imgObj->init();
429 $imgObj->mayScaleUp=0;
430 $imgObj->tempPath=PATH_site.$imgObj->tempPath;
431
432 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
433 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
434 // Compare dimensions:
435 if ($curWH[0]!=$curInfo[0] || $curWH[1]!=$curInfo[1]) {
436 $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
437 $cW = $curWH[0];
438 $cH = $curWH[1];
439 $cH = 1000; // Make the image based on the width solely...
440 $imgI = $imgObj->imageMagickConvert($origFilePath,$pI['extension'],$cW.'m',$cH.'m');
441 if ($imgI[3]) {
442 $fI=pathinfo($imgI[3]);
443 @copy($imgI[3],$filepath); // Override the child file
444 unset($attribArray['style']);
445 $attribArray['width']=$imgI[0];
446 $attribArray['height']=$imgI[1];
447 if (!$attribArray['border']) $attribArray['border']=0;
448 if (!isset($attribArray['alt'])) $attribArray['alt']='';
449 $params = t3lib_div::implodeParams($attribArray,1);
450 $imgSplit[$k]='<img '.$params.' />';
451 }
452 }
453 }
454 }
455 }
456 }
457
458 // Convert abs to rel url
459 $attribArray=$this->get_tag_attributes_classic($imgSplit[$k],1);
460 $absRef = trim($attribArray['src']);
461 if (t3lib_div::isFirstPartOfStr($absRef,$siteUrl)) {
462 $attribArray['src'] = $this->relBackPath.substr($absRef,strlen($siteUrl));
463 if (!isset($attribArray['alt'])) $attribArray['alt']='image'; // Must have value, otherwise the attribute is stripped by implodeParams()
464 $imgSplit[$k]='<img '.t3lib_div::implodeParams($attribArray,1).' />';
465 }
466 }
467 }
468 return implode('',$imgSplit);
469 }
470
471 /**
472 * Transformation handler: 'ts_images' / direction: "rte"
473 * Processing images from database content going into the RTE.
474 * Processing includes converting the src attribute to an absolute URL.
475 *
476 * @param string Content input
477 * @return string Content output
478 */
479 function TS_images_rte($value) {
480
481 // Split content by <img> tags and traverse the resulting array for processing:
482 $imgSplit = $this->splitTags('img',$value);
483 foreach($imgSplit as $k => $v) {
484 if ($k%2) { // image found:
485
486 // Init
487 $attribArray=$this->get_tag_attributes_classic($v,1);
488 $siteUrl = $this->siteUrl();
489 $absRef = trim($attribArray['src']);
490
491 // Unless the src attribute is already pointing to an external URL:
492 if (strtolower(substr($absRef,0,4))!='http') {
493 $attribArray['src'] = $siteUrl.substr($attribArray['src'],strlen($this->relBackPath));
494 if (!isset($attribArray['alt'])) $attribArray['alt']='';
495 $params = t3lib_div::implodeParams($attribArray);
496 $imgSplit[$k]='<img '.$params.' />';
497 }
498 }
499 }
500
501 // return processed content:
502 return implode('',$imgSplit);
503 }
504
505 /**
506 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
507 * Converting <A>-tags to/from abs/rel
508 *
509 * @param string Content input
510 * @param string Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
511 * @return string Content output
512 */
513 function TS_reglinks($value,$direction) {
514 switch($direction) {
515 case 'rte':
516 return $this->TS_AtagToAbs($value,1);
517 break;
518 case 'db':
519 $siteURL = $this->siteUrl();
520 $blockSplit = $this->splitIntoBlock('A',$value);
521 reset($blockSplit);
522 while(list($k,$v)=each($blockSplit)) {
523 if ($k%2) { // block:
524 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
525 // If the url is local, remove url-prefix
526 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
527 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
528 }
529 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
530 $eTag='</a>';
531 $blockSplit[$k] = $bTag.$this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]),$direction).$eTag;
532 }
533 }
534 return implode('',$blockSplit);
535 break;
536 }
537 }
538
539 /**
540 * Transformation handler: 'ts_links' / direction: "db"
541 * Converting <A>-tags to <LINK tags>
542 *
543 * @param string Content input
544 * @return string Content output
545 * @see TS_links_rte()
546 */
547 function TS_links_db($value) {
548
549 // Split content into <a> tag blocks and process:
550 $blockSplit = $this->splitIntoBlock('A',$value);
551 foreach($blockSplit as $k => $v) {
552 if ($k%2) { // If an A-tag was found:
553 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
554 $info = $this->urlInfoForLinkTags($attribArray['href']);
555
556 // Check options:
557 $attribArray_copy = $attribArray;
558 unset($attribArray_copy['href']);
559 unset($attribArray_copy['target']);
560 unset($attribArray_copy['class']);
561 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
562 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
563 $bTag='<LINK '.$info['url'].($attribArray['target']?' '.$attribArray['target']:($attribArray['class']?' -':'')).($attribArray['class']?' '.$attribArray['class']:'').'>';
564 $eTag='</LINK>';
565 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
566 } else { // ... otherwise store the link as a-tag.
567 // Unsetting 'rtekeep' attribute if that had been set.
568 unset($attribArray['rtekeep']);
569 // If the url is local, remove url-prefix
570 $siteURL = $this->siteUrl();
571 if ($siteURL && substr($attribArray['href'],0,strlen($siteURL))==$siteURL) {
572 $attribArray['href']=$this->relBackPath.substr($attribArray['href'],strlen($siteURL));
573 }
574 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
575 $eTag='</a>';
576 $blockSplit[$k] = $bTag.$this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
577 }
578 }
579 }
580 return implode('',$blockSplit);
581 }
582
583 /**
584 * Transformation handler: 'ts_links' / direction: "rte"
585 * Converting <LINK tags> to <A>-tags
586 *
587 * @param string Content input
588 * @return string Content output
589 * @see TS_links_rte()
590 */
591 function TS_links_rte($value) {
592 $value = $this->TS_AtagToAbs($value);
593
594 // Split content by the TYPO3 pseudo tag "<LINK>":
595 $blockSplit = $this->splitIntoBlock('link',$value,1);
596 foreach($blockSplit as $k => $v) {
597 if ($k%2) { // block:
598 $tagCode = t3lib_div::trimExplode(' ',trim(substr($this->getFirstTag($v),0,-1)),1);
599 $link_param=$tagCode[1];
600 $href='';
601 $siteUrl = $this->siteUrl();
602 // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
603 if(strstr($link_param,'@')) { // mailadr
604 $href = 'mailto:'.eregi_replace('^mailto:','',$link_param);
605 } elseif (substr($link_param,0,1)=='#') { // check if anchor
606 $href = $siteUrl.$link_param;
607 } else {
608 $fileChar=intval(strpos($link_param, '/'));
609 $urlChar=intval(strpos($link_param, '.'));
610
611 // Detects if a file is found in site-root OR is a simulateStaticDocument.
612 list($rootFileDat) = explode('?',$link_param);
613 $rFD_fI = pathinfo($rootFileDat);
614 if (trim($rootFileDat) && !strstr($link_param,'/') && (@is_file(PATH_site.$rootFileDat) || t3lib_div::inList('php,html,htm',strtolower($rFD_fI['extension'])))) {
615 $href = $siteUrl.$link_param;
616 } elseif($urlChar && (strstr($link_param,'//') || !$fileChar || $urlChar<$fileChar)) { // url (external): If doubleSlash or if a '.' comes before a '/'.
617 if (!ereg('^[a-z]*://',trim(strtolower($link_param)))) {$scheme='http://';} else {$scheme='';}
618 $href = $scheme.$link_param;
619 } elseif($fileChar) { // file (internal)
620 $href = $siteUrl.$link_param;
621 } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
622 $link_params_parts=explode('#',$link_param);
623 $idPart = trim($link_params_parts[0]); // Link-data del
624 if (!strcmp($idPart,'')) {$idPart=$this->recPid;} // If no id or alias is given, set it to class record pid
625 if ($link_params_parts[1] && !$sectionMark) {
626 $sectionMark='#'.trim($link_params_parts[1]);
627 }
628 // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/? pair
629 $pairParts = t3lib_div::trimExplode(',',$idPart);
630 if (count($pairParts)>1) {
631 $idPart = $pairParts[0];
632 // Type ? future support for?
633 }
634 // Checking if the id-parameter is an alias.
635 if (!t3lib_div::testInt($idPart)) {
636 list($idPartR) = t3lib_BEfunc::getRecordsByField('pages','alias',$idPart);
637 $idPart=intval($idPartR['uid']);
638 }
639 $page = t3lib_BEfunc::getRecord('pages',$idPart);
640 if (is_array($page)) { // Page must exist...
641 $href = $siteUrl.'?id='.$link_param;
642 } else {
643 $href='';
644 $error='no page: '.$idPart;
645 }
646 }
647 }
648
649 // Setting the A-tag:
650 $bTag='<a href="'.htmlspecialchars($href).'"'.($tagCode[2]&&$tagCode[2]!='-'?' target="'.htmlspecialchars($tagCode[2]).'"':'').($tagCode[3]?' class="'.htmlspecialchars($tagCode[3]).'"':'').'>';
651 $eTag='</a>';
652 $blockSplit[$k] = $bTag.$this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
653 }
654 }
655
656 // Return content:
657 return implode('',$blockSplit);
658 }
659
660 /**
661 * Preserve special tags
662 *
663 * @param string Content input
664 * @return string Content output
665 */
666 function TS_preserve_db($value) {
667 if (!$this->preserveTags) return $value;
668
669 // Splitting into blocks for processing (span-tags are used for special tags)
670 $blockSplit = $this->splitIntoBlock('span',$value);
671 foreach($blockSplit as $k => $v) {
672 if ($k%2) { // block:
673 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
674 if ($attribArray['specialtag']) {
675 $theTag = rawurldecode($attribArray['specialtag']);
676 $theTagName = $this->getFirstTagName($theTag);
677 $blockSplit[$k] = $theTag.$this->removeFirstAndLastTag($blockSplit[$k]).'</'.$theTagName.'>';
678 }
679 }
680 }
681 return implode('',$blockSplit);
682 }
683
684 /**
685 * Preserve special tags
686 *
687 * @param string Content input
688 * @return string Content output
689 */
690 function TS_preserve_rte($value) {
691 if (!$this->preserveTags) return $value;
692
693 $blockSplit = $this->splitIntoBlock($this->preserveTags,$value);
694 foreach($blockSplit as $k => $v) {
695 if ($k%2) { // block:
696 $blockSplit[$k] = '<span specialtag="'.rawurlencode($this->getFirstTag($v)).'">'.$this->removeFirstAndLastTag($blockSplit[$k]).'</span>';
697 }
698 }
699 return implode('',$blockSplit);
700 }
701
702 /**
703 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
704 * Cleaning (->db) for standard content elements (ts)
705 *
706 * @param string Content input
707 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
708 * @return string Content output
709 * @see TS_transform_rte()
710 */
711 function TS_transform_db($value,$css=FALSE) {
712
713 // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
714 $this->TS_transform_db_safecounter--;
715 if ($this->TS_transform_db_safecounter<0) return $value;
716
717 // Split the content from RTE by the occurence of these blocks:
718 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,'.$this->headListTags,$value);
719
720 $cc=0;
721 $aC = count($blockSplit);
722
723 // Traverse the blocks
724 foreach($blockSplit as $k => $v) {
725 $cc++;
726 $lastBR = $cc==$aC ? '' : chr(10);
727
728 if ($k%2) { // Inside block:
729
730 // Init:
731 $tag=$this->getFirstTag($v);
732 $tagName=strtolower($this->getFirstTagName($v));
733
734 // Process based on the tag:
735 switch($tagName) {
736 case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
737 $blockSplit[$k]='<'.$tagName.'>'.$this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]),$css).'</'.$tagName.'>'.$lastBR;
738 break;
739 case 'ol':
740 case 'ul': // Transform lists into <typolist>-tags:
741 if (!$css) {
742 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
743 $parts = $this->getAllParts($this->splitIntoBlock('LI',$this->removeFirstAndLastTag($blockSplit[$k])),1,0);
744 while(list($k2)=each($parts)) {
745 $parts[$k2]=ereg_replace(chr(10).'|'.chr(13),'',$parts[$k2]); // remove all linesbreaks!
746 $parts[$k2]=$this->defaultTStagMapping($parts[$k2],'db');
747 $parts[$k2]=$this->cleanFontTags($parts[$k2],0,0,0);
748 $parts[$k2] = $this->HTMLcleaner_db($parts[$k2],strtolower($this->procOptions['allowTagsInTypolists']?$this->procOptions['allowTagsInTypolists']:'br,font,b,i,u,a,img,span,strong,em'));
749 }
750 if ($tagName=='ol') { $params=' type="1"'; } else { $params=''; }
751 $blockSplit[$k]='<typolist'.$params.'>'.chr(10).implode(chr(10),$parts).chr(10).'</typolist>'.$lastBR;
752 }
753 } else {
754 $blockSplit[$k].=$lastBR;
755 }
756 break;
757 case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
758 if (!$this->procOptions['preserveTables'] && !$css) {
759 $blockSplit[$k]=$this->TS_transform_db($this->removeTables($blockSplit[$k]));
760 } else {
761 $blockSplit[$k]=str_replace(chr(10),'',$blockSplit[$k]).$lastBR;
762 }
763 break;
764 case 'h1':
765 case 'h2':
766 case 'h3':
767 case 'h4':
768 case 'h5':
769 case 'h6':
770 if (!$css) {
771 $attribArray=$this->get_tag_attributes_classic($tag);
772 // Processing inner content here:
773 $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
774
775 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
776 $type = intval(substr($tagName,1));
777 $blockSplit[$k]='<typohead'.
778 ($type!=6?' type="'.$type.'"':'').
779 ($attribArray['align']?' align="'.$attribArray['align'].'"':'').
780 ($attribArray['class']?' class="'.$attribArray['class'].'"':'').
781 '>'.
782 $innerContent.
783 '</typohead>'.
784 $lastBR;
785 } else {
786 $blockSplit[$k]='<'.$tagName.
787 ($attribArray['align']?' align="'.htmlspecialchars($attribArray['align']).'"':'').
788 ($attribArray['class']?' class="'.htmlspecialchars($attribArray['class']).'"':'').
789 '>'.
790 $innerContent.
791 '</'.$tagName.'>'.
792 $lastBR;
793 }
794 } else {
795 $blockSplit[$k].=$lastBR;
796 }
797 break;
798 default:
799 $blockSplit[$k].=$lastBR;
800 break;
801 }
802 } else { // NON-block:
803 if (strcmp(trim($blockSplit[$k]),'')) {
804 $blockSplit[$k]=$this->divideIntoLines($blockSplit[$k]).$lastBR;
805 } else unset($blockSplit[$k]);
806 }
807 }
808 $this->TS_transform_db_safecounter++;
809
810 return implode('',$blockSplit);
811 }
812
813 /**
814 * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
815 * Set (->rte) for standard content elements (ts)
816 *
817 * @param string Content input
818 * @param boolean If true, the transformation was "css_transform", otherwise "ts_transform"
819 * @return string Content output
820 * @see TS_transform_db()
821 */
822 function TS_transform_rte($value,$css=0) {
823
824 // Split the content from Database by the occurence of these blocks:
825 $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$value);
826
827 // Traverse the blocks
828 foreach($blockSplit as $k => $v) {
829 if ($k%2) { // Inside one of the blocks:
830
831 // Init:
832 $tag=$this->getFirstTag($v);
833 $tagName=strtolower($this->getFirstTagName($v));
834 $attribArray=$this->get_tag_attributes_classic($tag);
835
836 // Based on tagname, we do transformations:
837 switch($tagName) {
838 case 'blockquote': // Keep blockquotes:
839 $blockSplit[$k] = $tag.
840 $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]),$css).
841 '</'.$tagName.'>';
842 break;
843 case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
844 if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
845 $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
846 $tListContent = ereg_replace('^[ ]*'.chr(10),'',$tListContent);
847 $tListContent = ereg_replace(chr(10).'[ ]*$','',$tListContent);
848 $lines=explode(chr(10),$tListContent);
849 $typ= $attribArray['type']==1?'ol':'ul';
850 $blockSplit[$k] = '<'.$typ.'>'.chr(10).
851 '<li>'.implode('</li>'.chr(10).'<li>',$lines).'</li>'.
852 '</'.$typ.'>';
853 }
854 break;
855 case 'typohead': // Transform typohead into Hx tags.
856 if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
857 $tC=$this->removeFirstAndLastTag($blockSplit[$k]);
858 $typ=t3lib_div::intInRange($attribArray['type'],0,6);
859 if (!$typ) $typ=6;
860 $align = $attribArray['align']?' align="'.$attribArray['align'].'"': '';
861 $class = $attribArray['class']?' class="'.$attribArray['class'].'"': '';
862 $blockSplit[$k] = '<h'.$typ.$align.$class.'>'.
863 $tC.
864 '</h'.$typ.'>';
865 }
866 break;
867 }
868 $blockSplit[$k+1]=ereg_replace('^[ ]*'.chr(10),'',$blockSplit[$k+1]); // Removing linebreak if typohead
869 } else { // NON-block:
870 $nextFTN = $this->getFirstTagName($blockSplit[$k+1]);
871 $singleLineBreak = $blockSplit[$k]==chr(10);
872 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,'.$this->headListTags,$nextFTN)) { // Removing linebreak if typolist/typohead
873 $blockSplit[$k]=ereg_replace(chr(10).'[ ]*$','',$blockSplit[$k]);
874 }
875 // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
876 if (!strcmp($blockSplit[$k],'') && !$singleLineBreak) {
877 unset($blockSplit[$k]);
878 } else {
879 $blockSplit[$k]=$this->setDivTags($blockSplit[$k],($this->procOptions['useDIVasParagraphTagForRTE']?'div':'p'));
880 }
881 }
882 }
883 return implode(chr(10),$blockSplit);
884 }
885
886 /**
887 * Transformation handler: 'ts_strip' / direction: "db"
888 * Removing all non-allowed tags
889 *
890 * @param string Content input
891 * @return string Content output
892 */
893 function TS_strip_db($value) {
894 $value = strip_tags($value,'<'.implode('><',explode(',','b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')).'>');
895 return $value;
896 }
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911 /***************************************************************
912 *
913 * Generic RTE transformation, analysis and helper functions
914 *
915 **************************************************************/
916
917 /**
918 * Reads the file or url $url and returns the content
919 *
920 * @param string Filepath/URL to read
921 * @return string The content from the resource given as input.
922 * @see t3lib_div::getURL()
923 */
924 function getURL($url) {
925 return t3lib_div::getURL($url);
926 }
927
928 /**
929 * Function for cleaning content going into the database.
930 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
931 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
932 *
933 * @param string Content to clean up
934 * @param string Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
935 * @return string Clean content
936 * @see getKeepTags()
937 */
938 function HTMLcleaner_db($content,$tagList='') {
939 if (!$tagList) {
940 $keepTags = $this->getKeepTags('db');
941 } else {
942 $keepTags = $this->getKeepTags('db',$tagList);
943 }
944 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
945 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
946
947 return $this->HTMLcleaner($content,$keepTags,$kUknown,$hSC);
948 }
949
950 /**
951 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
952 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
953 *
954 * @param string The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
955 * @param string Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
956 * @return array Configuration array
957 * @see HTMLcleaner_db()
958 */
959 function getKeepTags($direction='rte',$tagList='') {
960 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
961
962 // Setting up allowed tags:
963 if (strcmp($tagList,'')) { // If the $tagList input var is set, this will take precedence
964 $keepTags = array_flip(t3lib_div::trimExplode(',',$tagList,1));
965 } else { // Default is to get allowed/denied tags from internal array of processing options:
966 // Construct default list of tags to keep:
967 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
968 $keepTags = array_flip(t3lib_div::trimExplode(',',$typoScript_list.','.strtolower($this->procOptions['allowTags']),1));
969
970 // For tags to deny, remove them from $keepTags array:
971 $denyTags = t3lib_div::trimExplode(',',$this->procOptions['denyTags'],1);
972 foreach($denyTags as $dKe) {
973 unset($keepTags[$dKe]);
974 }
975 }
976
977 // Based on the direction of content, set further options:
978 switch ($direction) {
979
980 // GOING from database to Rich Text Editor:
981 case 'rte':
982 // Transform bold/italics tags to strong/em
983 if (isset($keepTags['b'])) {$keepTags['b']=array('remap'=>'STRONG');}
984 if (isset($keepTags['i'])) {$keepTags['i']=array('remap'=>'EM');}
985
986 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
987 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'],$keepTags);
988 break;
989
990 // GOING from RTE to database:
991 case 'db':
992 // Transform strong/em back to bold/italics:
993 if (isset($keepTags['strong'])) { $keepTags['strong']=array('remap'=>'b'); }
994 if (isset($keepTags['em'])) { $keepTags['em']=array('remap'=>'i'); }
995
996 // Setting up span tags if they are allowed:
997 if (isset($keepTags['span'])) {
998 $classes=array_merge(array(''),$this->allowedClasses);
999 $keepTags['span']=array(
1000 'allowedAttribs'=>'class',
1001 'fixAttrib' => Array(
1002 'class' => Array (
1003 'list' => $classes,
1004 'removeIfFalse' => 1
1005 )
1006 ),
1007 'rmTagIfNoAttrib' => 1
1008 );
1009 if (!$this->procOptions['allowedClasses']) unset($keepTags['span']['fixAttrib']['class']['list']);
1010 }
1011
1012 // Setting up font tags if they are allowed:
1013 if (isset($keepTags['font'])) {
1014 $colors=array_merge(array(''),t3lib_div::trimExplode(',',$this->procOptions['allowedFontColors'],1));
1015 $keepTags['font']=array(
1016 'allowedAttribs'=>'face,color,size',
1017 'fixAttrib' => Array(
1018 'face' => Array (
1019 'removeIfFalse' => 1
1020 ),
1021 'color' => Array (
1022 'removeIfFalse' => 1,
1023 'list'=>$colors
1024 ),
1025 'size' => Array (
1026 'removeIfFalse' => 1,
1027 )
1028 ),
1029 'rmTagIfNoAttrib' => 1
1030 );
1031 if (!$this->procOptions['allowedFontColors']) unset($keepTags['font']['fixAttrib']['color']['list']);
1032 }
1033
1034 // Setting further options, getting them from the processiong options:
1035 $TSc = $this->procOptions['HTMLparser_db.'];
1036 if (!$TSc['globalNesting']) $TSc['globalNesting']='b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1037 if (!$TSc['noAttrib']) $TSc['noAttrib']='b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1038
1039 // Transforming the array from TypoScript to regular array:
1040 list($keepTags) = $this->HTMLparserConfig($TSc,$keepTags);
1041 break;
1042 }
1043
1044 // Caching (internally, in object memory) the result unless tagList is set:
1045 if (!$tagList) {
1046 $this->getKeepTags_cache[$direction] = $keepTags;
1047 } else {
1048 return $keepTags;
1049 }
1050 }
1051
1052 // Return result:
1053 return $this->getKeepTags_cache[$direction];
1054 }
1055
1056 /**
1057 * This resolves the $value into parts based on <div></div>-sections and <P>-sections and <BR>-tags. These are returned as lines separated by chr(10).
1058 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1059 * The function ->setDivTags does the opposite.
1060 * This function processes content to go into the database.
1061 *
1062 * @param string Value to process.
1063 * @param integer Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1064 * @param boolean If true, an array with the lines is returned, otherwise a string of the processed input value.
1065 * @return string Processed input value.
1066 * @see setDivTags()
1067 */
1068 function divideIntoLines($value,$count=5,$returnArray=FALSE) {
1069
1070 // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
1071 if ($this->procOptions['internalizeFontTags']) {$value = $this->internalizeFontTags($value);}
1072
1073 // Setting configuration for processing:
1074 $allowTagsOutside = t3lib_div::trimExplode(',',strtolower($this->procOptions['allowTagsOutside']?$this->procOptions['allowTagsOutside']:'img'),1);
1075 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1076 $divSplit = $this->splitIntoBlock('div,p',$value,1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1077
1078 if ($this->procOptions['keepPDIVattribs']) {
1079 $keepAttribListArr = t3lib_div::trimExplode(',',strtolower($this->procOptions['keepPDIVattribs']),1);
1080 } else {
1081 $keepAttribListArr = array();
1082 }
1083
1084 // Returns plainly the value if there was no div/p sections in it
1085 if (count($divSplit)<=1 || $count<=0) {
1086 return $value;
1087 }
1088
1089 // Traverse the splitted sections:
1090 foreach($divSplit as $k => $v) {
1091 if ($k%2) { // Inside
1092 $v=$this->removeFirstAndLastTag($v);
1093
1094 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1095 $subLines = $this->divideIntoLines($v,$count-1,1);
1096 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1097 // No noting.
1098 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1099 $subLines=array($subLines);
1100 if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1101 $subLines = spliti('<br[[:space:]]*[\/]?>',$v);
1102 }
1103
1104 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1105 reset($subLines);
1106 while(list($sk)=each($subLines)) {
1107
1108 // Clear up the subline for DB.
1109 $subLines[$sk]=$this->HTMLcleaner_db($subLines[$sk]);
1110
1111 // Get first tag, attributes etc:
1112 $fTag = $this->getFirstTag($divSplit[$k]);
1113 $tagName=$this->getFirstTagName($divSplit[$k]);
1114 $attribs=$this->get_tag_attributes($fTag);
1115
1116 // Keep attributes (lowercase)
1117 $newAttribs=array();
1118 if (count($keepAttribListArr)) {
1119 foreach($keepAttribListArr as $keepA) {
1120 if (isset($attribs[0][$keepA])) { $newAttribs[$keepA] = $attribs[0][$keepA]; }
1121 }
1122 }
1123
1124 // ALIGN attribute:
1125 if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']),'') && strtolower($attribs[0]['align'])!='left') { // Set to value, but not 'left'
1126 $newAttribs['align']=strtolower($attribs[0]['align']);
1127 }
1128
1129 // CLASS attribute:
1130 if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']),'')) { // Set to whatever value
1131 if (!count($this->allowedClasses) || in_array(strtoupper($attribs[0]['class']),$this->allowedClasses)) {
1132 $newAttribs['class']=$attribs[0]['class'];
1133 }
1134 }
1135
1136 // Remove any line break char (10 or 13)
1137 $subLines[$sk]=ereg_replace(chr(10).'|'.chr(13),'',$subLines[$sk]);
1138
1139 // If there are any attributes or if we are supposed to remap the tag, then do so:
1140 if (count($newAttribs) && strcmp($remapParagraphTag,'1')) {
1141 if ($remapParagraphTag=='P') $tagName='p';
1142 if ($remapParagraphTag=='DIV') $tagName='div';
1143 $subLines[$sk]='<'.trim($tagName.' '.$this->compileTagAttribs($newAttribs)).'>'.$subLines[$sk].'</'.$tagName.'>';
1144 }
1145 }
1146 }
1147 // Add the processed line(s)
1148 $divSplit[$k] = implode(chr(10),$subLines);
1149
1150 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank:
1151 if (trim(strip_tags($divSplit[$k]))=='&nbsp;') $divSplit[$k]='';
1152 } else { // outside div:
1153 // Remove positions which are outside div/p tags and without content
1154 $divSplit[$k]=trim(strip_tags($divSplit[$k],'<'.implode('><',$allowTagsOutside).'>'));
1155 if (!strcmp($divSplit[$k],'')) unset($divSplit[$k]); // Remove part if it's empty
1156 }
1157 }
1158
1159 // Return value:
1160 return $returnArray ? $divSplit : implode(chr(10),$divSplit);
1161 }
1162
1163 /**
1164 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1165 * For processing of content going FROM database TO RTE.
1166 *
1167 * @param string Value to convert
1168 * @param string Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1169 * @return string Processed value.
1170 * @see divideIntoLines()
1171 */
1172 function setDivTags($value,$dT='p') {
1173
1174 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1175 $keepTags = $this->getKeepTags('rte');
1176 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
1177 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
1178 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte']?1:0;
1179
1180 // Divide the content into lines, based on chr(10):
1181 $parts = explode(chr(10),$value);
1182 foreach($parts as $k => $v) {
1183
1184 // Processing of line content:
1185 if (!strcmp(trim($parts[$k]),'')) { // If the line is blank, set it to &nbsp;
1186 $parts[$k]='&nbsp;';
1187 } else { // Clean the line content:
1188 $parts[$k]=$this->HTMLcleaner($parts[$k],$keepTags,$kUknown,$hSC);
1189 if ($convNBSP) $parts[$k]=str_replace('&amp;nbsp;','&nbsp;',$parts[$k]);
1190 }
1191
1192 // Wrapping the line in <$dT> is not already wrapped:
1193 $testStr = strtolower(trim($parts[$k]));
1194 if (substr($testStr,0,4)!='<div' || substr($testStr,-6)!='</div>') {
1195 if (substr($testStr,0,2)!='<p' || substr($testStr,-4)!='</p>') {
1196 // Only set p-tags if there is not already div or p tags:
1197 $parts[$k]='<'.$dT.'>'.$parts[$k].'</'.$dT.'>';
1198 }
1199 }
1200 }
1201
1202 // Implode result:
1203 return implode(chr(10),$parts);
1204 }
1205
1206 /**
1207 * This splits the $value in font-tag chunks.
1208 * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
1209 * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
1210 * In that case the font-tags are normally on the OUTSIDE of the sections.
1211 * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
1212 *
1213 * @param string Input content
1214 * @return string Output content
1215 * @see divideIntoLines()
1216 */
1217 function internalizeFontTags($value) {
1218
1219 // Splitting into font tag blocks:
1220 $fontSplit = $this->splitIntoBlock('font',$value);
1221
1222 foreach($fontSplit as $k => $v) {
1223 if ($k%2) { // Inside
1224 $fTag = $this->getFirstTag($v); // Fint font-tag
1225
1226 $divSplit_sub = $this->splitIntoBlock('div,p',$this->removeFirstAndLastTag($v),1);
1227 if (count($divSplit_sub)>1) { // If there were div/p sections inside the font-tag, do something about it...
1228 // traverse those sections:
1229 foreach($divSplit_sub as $k2 => $v2) {
1230 if ($k2%2) { // Inside
1231 $div_p = $this->getFirstTag($v2); // Fint font-tag
1232 $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
1233 $v2=$this->removeFirstAndLastTag($v2); // ... and remove it from original.
1234 $divSplit_sub[$k2]=$div_p.$fTag.$v2.'</font>'.'</'.$div_p_tagname.'>';
1235 } elseif (trim(strip_tags($v2))) {
1236 $divSplit_sub[$k2]=$fTag.$v2.'</font>';
1237 }
1238 }
1239 $fontSplit[$k]=implode('',$divSplit_sub);
1240 }
1241 }
1242 }
1243
1244 return implode('',$fontSplit);
1245 }
1246
1247 /**
1248 * Returns SiteURL based on thisScript.
1249 *
1250 * @return string Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1251 * @see t3lib_div::getIndpEnv()
1252 */
1253 function siteUrl() {
1254 return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
1255 }
1256
1257 /**
1258 * Return the storage folder of RTE image files.
1259 * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
1260 *
1261 * @return string
1262 */
1263 function rteImageStorageDir() {
1264 return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
1265 }
1266
1267 /**
1268 * Remove all tables from incoming code
1269 * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
1270 *
1271 * @param string Input value
1272 * @param string Break character to use for linebreaks.
1273 * @return string Output value
1274 */
1275 function removeTables($value,$breakChar='<br />') {
1276
1277 // Splitting value into table blocks:
1278 $tableSplit = $this->splitIntoBlock('table',$value);
1279
1280 // Traverse blocks of tables:
1281 foreach($tableSplit as $k => $v) {
1282 if ($k%2) {
1283 $tableSplit[$k]='';
1284 $rowSplit = $this->splitIntoBlock('tr',$v);
1285 foreach($rowSplit as $k2 => $v2) {
1286 if ($k2%2) {
1287 $cellSplit = $this->getAllParts($this->splitIntoBlock('td',$v2),1,0);
1288 foreach($cellSplit as $k3 => $v3) {
1289 $tableSplit[$k].=$v3.$breakChar;
1290 }
1291 }
1292 }
1293 }
1294 }
1295
1296 // Implode it all again:
1297 return implode($breakChar,$tableSplit);
1298 }
1299
1300 /**
1301 * Default tag mapping for TS
1302 *
1303 * @param string Input code to process
1304 * @param string Direction To databsae (db) or from database to RTE (rte)
1305 * @return string Processed value
1306 */
1307 function defaultTStagMapping($code,$direction='rte') {
1308 if ($direction=='db') {
1309 $code=$this->mapTags($code,array( // Map tags
1310 'strong' => 'b',
1311 'em' => 'i'
1312 ));
1313 }
1314 if ($direction=='rte') {
1315 $code=$this->mapTags($code,array( // Map tags
1316 'b' => 'strong',
1317 'i' => 'em'
1318 ));
1319 }
1320 return $code;
1321 }
1322
1323 /**
1324 * Finds width and height from attrib-array
1325 * If the width and height is found in the style-attribute, use that!
1326 *
1327 * @param array Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1328 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1329 */
1330 function getWHFromAttribs($attribArray) {
1331 $style =trim($attribArray['style']);
1332 if ($style) {
1333 $regex='[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1334 // Width
1335 eregi('width'.$regex,$style,$reg);
1336 $w = intval($reg[1]);
1337 // Height
1338 eregi('height'.$regex,$style,$reg);
1339 $h = intval($reg[1]);
1340 }
1341 if (!$w) {
1342 $w = $attribArray['width'];
1343 }
1344 if (!$h) {
1345 $h = $attribArray['height'];
1346 }
1347 return array(intval($w),intval($h));
1348 }
1349
1350 /**
1351 * Parse <A>-tag href and return status of email,external,file or page
1352 *
1353 * @param string URL to analyse.
1354 * @return array Information in an array about the URL
1355 */
1356 function urlInfoForLinkTags($url) {
1357 $info = array();
1358 $url = trim($url);
1359 if (substr(strtolower($url),0,7)=='mailto:') {
1360 $info['url']=trim(substr($url,7));
1361 $info['type']='email';
1362 } else {
1363 $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
1364 for($a=0;$a<strlen($url);$a++) {
1365 if ($url[$a]!=$curURL[$a]) {
1366 break;
1367 }
1368 }
1369
1370 $info['relScriptPath']=substr($curURL,$a);
1371 $info['relUrl']=substr($url,$a);
1372 $info['url']=$url;
1373 $info['type']='ext';
1374
1375 $siteUrl_parts = parse_url($url);
1376 $curUrl_parts = parse_url($curURL);
1377
1378 if ($siteUrl_parts['host']==$curUrl_parts['host'] // Hosts should match
1379 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'],0,strlen(TYPO3_mainDir))==TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
1380
1381 // New processing order 100502
1382 $uP=parse_url($info['relUrl']);
1383
1384 if (!strcmp('#'.$siteUrl_parts['fragment'],$info['relUrl'])) {
1385 $info['url']=$info['relUrl'];
1386 $info['type']='anchor';
1387 } elseif (!trim($uP['path']) || !strcmp($uP['path'],'index.php')) {
1388 $pp = explode('id=',$uP['query']);
1389 $id = trim($pp[1]);
1390 if ($id) {
1391 $info['pageid']=$id;
1392 $info['cElement']=$uP['fragment'];
1393 $info['url']=$id.($info['cElement']?'#'.$info['cElement']:'');
1394 $info['type']='page';
1395 }
1396 } else {
1397 $info['url']=$info['relUrl'];
1398 $info['type']='file';
1399 }
1400 } else {
1401 unset($info['relScriptPath']);
1402 unset($info['relUrl']);
1403 }
1404 }
1405 return $info;
1406 }
1407
1408 /**
1409 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1410 *
1411 * @param string Content input
1412 * @param boolean If true, then the "rtekeep" attribute will not be set.
1413 * @return string Content output
1414 */
1415 function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE) {
1416 $blockSplit = $this->splitIntoBlock('A',$value);
1417 reset($blockSplit);
1418 while(list($k,$v)=each($blockSplit)) {
1419 if ($k%2) { // block:
1420 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v),1);
1421 // Checking if there is a scheme, and if not, prepend the current url.
1422 $uP = parse_url(strtolower($attribArray['href']));
1423 if (!$uP['scheme']) {
1424 $attribArray['href']=$this->siteUrl().substr($attribArray['href'],strlen($this->relBackPath));
1425 }
1426 if (!$dontSetRTEKEEP) $attribArray['rtekeep']=1;
1427
1428 $bTag='<a '.t3lib_div::implodeParams($attribArray,1).'>';
1429 $eTag='</a>';
1430 $blockSplit[$k] = $bTag.$this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])).$eTag;
1431 }
1432 }
1433 return implode('',$blockSplit);
1434 }
1435 }
1436
1437
1438 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']) {
1439 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
1440 }
1441 ?>