7600c5c24b95c2a606bd78359c0d03e447523f3a
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\Log\LogManager;
19 use TYPO3\CMS\Core\Resource;
20 use TYPO3\CMS\Core\Utility\GeneralUtility;
21 use TYPO3\CMS\Core\Utility\MathUtility;
22 use TYPO3\CMS\Core\Utility\StringUtility;
23 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
24
25 /**
26 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
27 */
28 class RteHtmlParser extends HtmlParser
29 {
30 /**
31 * List of elements that are not wrapped into a "p" tag while doing the transformation.
32 * @var string
33 */
34 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
35
36 /**
37 * List of all tags that are allowed by default
38 * @var string
39 */
40 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
41
42 /**
43 * Set this to the pid of the record manipulated by the class.
44 *
45 * @var int
46 */
47 public $recPid = 0;
48
49 /**
50 * Element reference [table]:[field], eg. "tt_content:bodytext"
51 *
52 * @var string
53 */
54 public $elRef = '';
55
56 /**
57 * Current Page TSConfig
58 *
59 * @var array
60 */
61 public $tsConfig = array();
62
63 /**
64 * Set to the TSconfig options coming from Page TSconfig
65 *
66 * @var array
67 */
68 public $procOptions = array();
69
70 /**
71 * Run-away brake for recursive calls.
72 *
73 * @var int
74 */
75 public $TS_transform_db_safecounter = 100;
76
77 /**
78 * Data caching for processing function
79 *
80 * @var array
81 */
82 public $getKeepTags_cache = array();
83
84 /**
85 * Storage of the allowed CSS class names in the RTE
86 *
87 * @var array
88 */
89 public $allowedClasses = array();
90
91 /**
92 * Set to tags to preserve from Page TSconfig configuration
93 *
94 * @var string
95 */
96 public $preserveTags = '';
97
98 /**
99 * Initialize, setting element reference and record PID
100 *
101 * @param string $elRef Element reference, eg "tt_content:bodytext
102 * @param int $recPid PID of the record (page id)
103 * @return void
104 */
105 public function init($elRef = '', $recPid = 0)
106 {
107 $this->recPid = $recPid;
108 $this->elRef = $elRef;
109 }
110
111 /**********************************************
112 *
113 * Main function
114 *
115 **********************************************/
116 /**
117 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
118 * This is the main function called from tcemain and transfer data classes
119 *
120 * @param string $value Input value
121 * @param array $specConf Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
122 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
123 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
124 * @return string Output value
125 */
126 public function RTE_transform($value, $specConf, $direction = 'rte', $thisConfig = array())
127 {
128 // Init:
129 $this->tsConfig = $thisConfig;
130 $this->procOptions = (array)$thisConfig['proc.'];
131 $this->preserveTags = strtoupper(implode(',', GeneralUtility::trimExplode(',', $this->procOptions['preserveTags'])));
132 // dynamic configuration of blockElementList
133 if ($this->procOptions['blockElementList']) {
134 $this->blockElementList = $this->procOptions['blockElementList'];
135 }
136 // Get parameters for rte_transformation:
137 $specialFieldConfiguration = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
138 // Setting modes:
139 if ((string)$this->procOptions['overruleMode'] !== '') {
140 $modes = array_unique(GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']));
141 } else {
142 $modes = array_unique(GeneralUtility::trimExplode('-', $specialFieldConfiguration['mode']));
143 }
144 $revmodes = array_flip($modes);
145 // Find special modes and extract them:
146 if (isset($revmodes['ts_css'])) {
147 $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
148 }
149 // Make list unique
150 $modes = array_unique(GeneralUtility::trimExplode(',', implode(',', $modes), true));
151 // Reverse order if direction is "rte"
152 if ($direction === 'rte') {
153 $modes = array_reverse($modes);
154 }
155 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independent processing options you can set up:
156 $entry_HTMLparser = $this->procOptions['entryHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_' . $direction . '.']) : '';
157 $exit_HTMLparser = $this->procOptions['exitHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_' . $direction . '.']) : '';
158 // Line breaks of content is unified into char-10 only (removing char 13)
159 if (!$this->procOptions['disableUnifyLineBreaks']) {
160 $value = str_replace(CRLF, LF, $value);
161 }
162 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
163 if (is_array($entry_HTMLparser)) {
164 $value = $this->HTMLcleaner($value, $entry_HTMLparser[0], $entry_HTMLparser[1], $entry_HTMLparser[2], $entry_HTMLparser[3]);
165 }
166 // Traverse modes:
167 foreach ($modes as $cmd) {
168 // ->DB
169 if ($direction == 'db') {
170 // Checking for user defined transformation:
171 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
172 $_procObj = GeneralUtility::getUserObj($_classRef);
173 $_procObj->pObj = $this;
174 $_procObj->transformationKey = $cmd;
175 $value = $_procObj->transform_db($value, $this);
176 } else {
177 // ... else use defaults:
178 switch ($cmd) {
179 case 'ts_images':
180 $value = $this->TS_images_db($value);
181 break;
182 case 'ts_reglinks':
183 $value = $this->TS_reglinks($value, 'db');
184 break;
185 case 'ts_links':
186 $value = $this->TS_links_db($value);
187 break;
188 case 'ts_preserve':
189 $value = $this->TS_preserve_db($value);
190 break;
191 case 'css_transform':
192 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
193 // CR has a very disturbing effect, so just remove all CR and rely on LF
194 $value = str_replace(CR, '', $value);
195 // Transform empty paragraphs into spacing paragraphs
196 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
197 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
198 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
199 $value = $this->TS_transform_db($value);
200 break;
201 default:
202 // Do nothing
203 }
204 }
205 }
206 // ->RTE
207 if ($direction == 'rte') {
208 // Checking for user defined transformation:
209 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
210 $_procObj = GeneralUtility::getUserObj($_classRef);
211 $_procObj->pObj = $this;
212 $value = $_procObj->transform_rte($value, $this);
213 } else {
214 // ... else use defaults:
215 switch ($cmd) {
216 case 'ts_images':
217 $value = $this->TS_images_rte($value);
218 break;
219 case 'ts_reglinks':
220 $value = $this->TS_reglinks($value, 'rte');
221 break;
222 case 'ts_links':
223 $value = $this->TS_links_rte($value);
224 break;
225 case 'ts_preserve':
226 $value = $this->TS_preserve_rte($value);
227 break;
228 case 'css_transform':
229 // Has a very disturbing effect, so just remove all '13' - depend on '10'
230 $value = str_replace(CR, '', $value);
231 $value = $this->TS_transform_rte($value);
232 break;
233 default:
234 // Do nothing
235 }
236 }
237 }
238 }
239 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
240 if (is_array($exit_HTMLparser)) {
241 $value = $this->HTMLcleaner($value, $exit_HTMLparser[0], $exit_HTMLparser[1], $exit_HTMLparser[2], $exit_HTMLparser[3]);
242 }
243 // Final clean up of linebreaks:
244 if (!$this->procOptions['disableUnifyLineBreaks']) {
245 // Make sure no \r\n sequences has entered in the meantime...
246 $value = str_replace(CRLF, LF, $value);
247 // ... and then change all \n into \r\n
248 $value = str_replace(LF, CRLF, $value);
249 }
250 // Return value:
251 return $value;
252 }
253
254 /************************************
255 *
256 * Specific RTE TRANSFORMATION functions
257 *
258 *************************************/
259 /**
260 * Transformation handler: 'ts_images' / direction: "db"
261 * Processing images inserted in the RTE.
262 * This is used when content goes from the RTE to the database.
263 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
264 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
265 * Also "magic" images are processed here.
266 *
267 * @param string $value The content from RTE going to Database
268 * @return string Processed content
269 */
270 public function TS_images_db($value)
271 {
272 // Split content by <img> tags and traverse the resulting array for processing:
273 $imgSplit = $this->splitTags('img', $value);
274 if (count($imgSplit) > 1) {
275 $siteUrl = $this->siteUrl();
276 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
277 /** @var $resourceFactory Resource\ResourceFactory */
278 $resourceFactory = Resource\ResourceFactory::getInstance();
279 /** @var $magicImageService Resource\Service\MagicImageService */
280 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
281 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
282 foreach ($imgSplit as $k => $v) {
283 // Image found, do processing:
284 if ($k % 2) {
285 // Get attributes
286 list($attribArray) = $this->get_tag_attributes($v, true);
287 // It's always an absolute URL coming from the RTE into the Database.
288 $absoluteUrl = trim($attribArray['src']);
289 // Make path absolute if it is relative and we have a site path which is not '/'
290 $pI = pathinfo($absoluteUrl);
291 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
292 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
293 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
294 $absoluteUrl = $siteUrl . $absoluteUrl;
295 }
296 // Image dimensions set in the img tag, if any
297 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
298 if ($imgTagDimensions[0]) {
299 $attribArray['width'] = $imgTagDimensions[0];
300 }
301 if ($imgTagDimensions[1]) {
302 $attribArray['height'] = $imgTagDimensions[1];
303 }
304 $originalImageFile = null;
305 if ($attribArray['data-htmlarea-file-uid']) {
306 // An original image file uid is available
307 try {
308 /** @var $originalImageFile Resource\File */
309 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
310 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
311 // Log the fact the file could not be retrieved.
312 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
313 $this->getLogger()->error($message);
314 }
315 }
316 if ($originalImageFile instanceof Resource\File) {
317 // Public url of local file is relative to the site url, absolute otherwise
318 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
319 // This is a plain image, i.e. reference to the original image
320 if ($this->procOptions['plainImageMode']) {
321 // "plain image mode" is configured
322 // Find the dimensions of the original image
323 $imageInfo = array(
324 $originalImageFile->getProperty('width'),
325 $originalImageFile->getProperty('height')
326 );
327 if (!$imageInfo[0] || !$imageInfo[1]) {
328 $filePath = $originalImageFile->getForLocalProcessing(false);
329 $imageInfo = @getimagesize($filePath);
330 }
331 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
332 }
333 } else {
334 // Magic image case: get a processed file with the requested configuration
335 $imageConfiguration = array(
336 'width' => $imgTagDimensions[0],
337 'height' => $imgTagDimensions[1]
338 );
339 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
340 $attribArray['width'] = $magicImage->getProperty('width');
341 $attribArray['height'] = $magicImage->getProperty('height');
342 $attribArray['src'] = $magicImage->getPublicUrl();
343 }
344 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
345 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
346 // Fetch the external image
347 $externalFile = $this->getUrl($absoluteUrl);
348 if ($externalFile) {
349 $pU = parse_url($absoluteUrl);
350 $pI = pathinfo($pU['path']);
351 $extension = strtolower($pI['extension']);
352 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
353 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
354 // We insert this image into the user default upload folder
355 list($table, $field) = explode(':', $this->elRef);
356 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
357 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
358 $imageConfiguration = array(
359 'width' => $attribArray['width'],
360 'height' => $attribArray['height']
361 );
362 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
363 $attribArray['width'] = $magicImage->getProperty('width');
364 $attribArray['height'] = $magicImage->getProperty('height');
365 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
366 $attribArray['src'] = $magicImage->getPublicUrl();
367 }
368 }
369 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
370 // Finally, check image as local file (siteURL equals the one of the image)
371 // Image has no data-htmlarea-file-uid attribute
372 // Relative path, rawurldecoded for special characters.
373 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
374 // Absolute filepath, locked to relative path of this project
375 $filepath = GeneralUtility::getFileAbsFileName($path);
376 // Check file existence (in relative directory to this installation!)
377 if ($filepath && @is_file($filepath)) {
378 // Treat it as a plain image
379 if ($this->procOptions['plainImageMode']) {
380 // If "plain image mode" has been configured
381 // Find the original dimensions of the image
382 $imageInfo = @getimagesize($filepath);
383 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
384 }
385 // Let's try to find a file uid for this image
386 try {
387 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
388 if ($fileOrFolderObject instanceof Resource\FileInterface) {
389 $fileIdentifier = $fileOrFolderObject->getIdentifier();
390 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
391 // @todo if the retrieved file is a processed file, get the original file...
392 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
393 }
394 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
395 // Nothing to be done if file/folder not found
396 }
397 }
398 }
399 // Remove width and height from style attribute
400 $attribArray['style'] = preg_replace('/((?:^|)\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
401 // Must have alt attribute
402 if (!isset($attribArray['alt'])) {
403 $attribArray['alt'] = '';
404 }
405 // Convert absolute to relative url
406 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
407 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
408 }
409 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
410 }
411 }
412 }
413 return implode('', $imgSplit);
414 }
415
416 /**
417 * Transformation handler: 'ts_images' / direction: "rte"
418 * Processing images from database content going into the RTE.
419 * Processing includes converting the src attribute to an absolute URL.
420 *
421 * @param string $value Content input
422 * @return string Content output
423 */
424 public function TS_images_rte($value)
425 {
426 // Split content by <img> tags and traverse the resulting array for processing:
427 $imgSplit = $this->splitTags('img', $value);
428 if (count($imgSplit) > 1) {
429 $siteUrl = $this->siteUrl();
430 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
431 foreach ($imgSplit as $k => $v) {
432 // Image found
433 if ($k % 2) {
434 // Get the attributes of the img tag
435 list($attribArray) = $this->get_tag_attributes($v, true);
436 $absoluteUrl = trim($attribArray['src']);
437 // Transform the src attribute into an absolute url, if it not already
438 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
439 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
440 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
441 $attribArray['src'] = $siteUrl . $attribArray['src'];
442 }
443 // Must have alt attribute
444 if (!isset($attribArray['alt'])) {
445 $attribArray['alt'] = '';
446 }
447 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
448 }
449 }
450 }
451 // Return processed content:
452 return implode('', $imgSplit);
453 }
454
455 /**
456 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
457 * Converting <A>-tags to/from abs/rel
458 *
459 * @param string $value Content input
460 * @param string $direction Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
461 * @return string Content output
462 */
463 public function TS_reglinks($value, $direction)
464 {
465 $retVal = '';
466 switch ($direction) {
467 case 'rte':
468 $retVal = $this->TS_AtagToAbs($value, 1);
469 break;
470 case 'db':
471 $siteURL = $this->siteUrl();
472 $blockSplit = $this->splitIntoBlock('A', $value);
473 foreach ($blockSplit as $k => $v) {
474 // Block
475 if ($k % 2) {
476 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
477 // If the url is local, remove url-prefix
478 if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
479 $attribArray['href'] = substr($attribArray['href'], strlen($siteURL));
480 }
481 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
482 $eTag = '</a>';
483 $blockSplit[$k] = $bTag . $this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]), $direction) . $eTag;
484 }
485 }
486 $retVal = implode('', $blockSplit);
487 break;
488 }
489 return $retVal;
490 }
491
492 /**
493 * Transformation handler: 'ts_links' / direction: "db"
494 * Converting <A>-tags to <link tags>
495 *
496 * @param string $value Content input
497 * @return string Content output
498 * @see TS_links_rte()
499 */
500 public function TS_links_db($value)
501 {
502 $conf = array();
503 // Split content into <a> tag blocks and process:
504 $blockSplit = $this->splitIntoBlock('A', $value);
505 foreach ($blockSplit as $k => $v) {
506 // If an A-tag was found:
507 if ($k % 2) {
508 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
509 $info = $this->urlInfoForLinkTags($attribArray['href']);
510 // Check options:
511 $attribArray_copy = $attribArray;
512 unset($attribArray_copy['href']);
513 unset($attribArray_copy['target']);
514 unset($attribArray_copy['class']);
515 unset($attribArray_copy['title']);
516 unset($attribArray_copy['data-htmlarea-external']);
517 // Unset "rteerror" and "style" attributes if "rteerror" is set!
518 if ($attribArray_copy['rteerror']) {
519 unset($attribArray_copy['style']);
520 unset($attribArray_copy['rteerror']);
521 }
522 // Remove additional parameters
523 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'])) {
524 $parameters = array(
525 'conf' => &$conf,
526 'aTagParams' => &$attribArray_copy
527 );
528 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'] as $objRef) {
529 $processor = GeneralUtility::getUserObj($objRef);
530 $attribArray_copy = $processor->removeParams($parameters, $this);
531 }
532 }
533 // Only if href, target, class and tile are the only attributes, we can alter the link!
534 if (empty($attribArray_copy)) {
535 // Quoting class and title attributes if they contain spaces
536 $attribArray['class'] = preg_match('/ /', $attribArray['class']) ? '"' . $attribArray['class'] . '"' : $attribArray['class'];
537 $attribArray['title'] = preg_match('/ /', $attribArray['title']) ? '"' . $attribArray['title'] . '"' : $attribArray['title'];
538 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
539 // If data-htmlarea-external attribute is set, keep the href unchanged
540 if ($attribArray['data-htmlarea-external']) {
541 $href = $attribArray['href'];
542 } else {
543 $href = $info['url'] . ($info['query'] ? ',0,' . $info['query'] : '');
544 }
545 $bTag = '<link ' . $href . ($attribArray['target'] ? ' ' . $attribArray['target'] : ($attribArray['class'] || $attribArray['title'] ? ' -' : '')) . ($attribArray['class'] ? ' ' . $attribArray['class'] : ($attribArray['title'] ? ' -' : '')) . ($attribArray['title'] ? ' ' . $attribArray['title'] : '') . '>';
546 $eTag = '</link>';
547 // Modify parameters
548 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
549 $parameters = array(
550 'conf' => &$conf,
551 'currentBlock' => $v,
552 'url' => $href,
553 'attributes' => $attribArray
554 );
555 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
556 $processor = GeneralUtility::getUserObj($objRef);
557 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
558 }
559 } else {
560 $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
561 }
562 } else {
563 // ... otherwise store the link as a-tag.
564 // Unsetting 'rtekeep' attribute if that had been set.
565 unset($attribArray['rtekeep']);
566 if (!$attribArray['data-htmlarea-external']) {
567 $siteURL = $this->siteUrl();
568 // If the url is local, remove url-prefix
569 if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
570 $attribArray['href'] = substr($attribArray['href'], strlen($siteURL));
571 }
572 // Check for FAL link-handler keyword
573 list($linkHandlerKeyword, $linkHandlerValue) = explode(':', $attribArray['href'], 2);
574 if ($linkHandlerKeyword === '?file') {
575 try {
576 $fileOrFolderObject = Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject(rawurldecode($linkHandlerValue));
577 if ($fileOrFolderObject instanceof Resource\FileInterface || $fileOrFolderObject instanceof Resource\Folder) {
578 $attribArray['href'] = $fileOrFolderObject->getPublicUrl();
579 }
580 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
581 // The identifier inserted in the RTE is already gone...
582 }
583 }
584 }
585 unset($attribArray['data-htmlarea-external']);
586 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
587 $eTag = '</a>';
588 $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
589 }
590 }
591 }
592 return implode('', $blockSplit);
593 }
594
595 /**
596 * Transformation handler: 'ts_links' / direction: "rte"
597 * Converting <link tags> to <A>-tags
598 *
599 * @param string $value Content input
600 * @return string Content output
601 * @see TS_links_rte()
602 */
603 public function TS_links_rte($value)
604 {
605 $conf = array();
606 $value = $this->TS_AtagToAbs($value);
607 // Split content by the TYPO3 pseudo tag "<link>":
608 $blockSplit = $this->splitIntoBlock('link', $value, 1);
609 $siteUrl = $this->siteUrl();
610 foreach ($blockSplit as $k => $v) {
611 $error = '';
612 $external = false;
613 // Block
614 if ($k % 2) {
615 // split away the first "<link" part
616 $typolink = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
617 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typolink);
618
619 $link_param = $tagCode['url'];
620 // Parsing the typolink data. This parsing is roughly done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
621 // Parse URL:
622 $pU = parse_url($link_param);
623 if (strstr($link_param, '@') && (!$pU['scheme'] || $pU['scheme'] == 'mailto')) {
624 // mailadr
625 $href = 'mailto:' . preg_replace('/^mailto:/i', '', $link_param);
626 } elseif ($link_param[0] === '#') {
627 // check if anchor
628 $href = $siteUrl . $link_param;
629 } else {
630 // Check for FAL link-handler keyword:
631 list($linkHandlerKeyword, $linkHandlerValue) = explode(':', trim($link_param), 2);
632 if ($linkHandlerKeyword === 'file' && !StringUtility::beginsWith($link_param, 'file://')) {
633 $href = $siteUrl . '?' . $linkHandlerKeyword . ':' . rawurlencode($linkHandlerValue);
634 } else {
635 $fileChar = (int)strpos($link_param, '/');
636 $urlChar = (int)strpos($link_param, '.');
637 // Detects if a file is found in site-root.
638 list($rootFileDat) = explode('?', $link_param);
639 $rFD_fI = pathinfo($rootFileDat);
640 $fileExtension = strtolower($rFD_fI['extension']);
641 if (strpos($link_param, '/') === false && trim($rootFileDat) && (@is_file(PATH_site . $rootFileDat) || $fileExtension === 'php' || $fileExtension === 'html' || $fileExtension === 'htm')) {
642 $href = $siteUrl . $link_param;
643 } elseif (
644 (
645 $pU['scheme']
646 && !isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][$pU['scheme']])
647 )
648 || $urlChar && (!$fileChar || $urlChar < $fileChar)
649 ) {
650 // url (external): if has scheme or if a '.' comes before a '/'.
651 $href = $link_param;
652 if (!$pU['scheme']) {
653 $href = 'http://' . $href;
654 }
655 $external = true;
656 } elseif ($fileChar) {
657 // It is an internal file or folder
658 // Try to transform the href into a FAL reference
659 try {
660 $fileOrFolderObject = Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject($link_param);
661 } catch (Resource\Exception $exception) {
662 // Nothing to be done if file/folder not found or path invalid
663 $fileOrFolderObject = null;
664 }
665 if ($fileOrFolderObject instanceof Resource\Folder) {
666 // It's a folder
667 $folderIdentifier = $fileOrFolderObject->getIdentifier();
668 $href = $siteUrl . '?file:' . rawurlencode($folderIdentifier);
669 } elseif ($fileOrFolderObject instanceof Resource\FileInterface) {
670 // It's a file
671 $fileIdentifier = $fileOrFolderObject->getIdentifier();
672 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
673 $href = $siteUrl . '?file:' . $fileObject->getUid();
674 } else {
675 $href = $siteUrl . $link_param;
676 }
677 } else {
678 // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
679 // Splitting the parameter by ',' and if the array counts more than 1 element it's an id/type/parameters triplet
680 $pairParts = GeneralUtility::trimExplode(',', $link_param, true);
681 $idPart = $pairParts[0];
682 $link_params_parts = explode('#', $idPart);
683 $idPart = trim($link_params_parts[0]);
684 $sectionMark = trim($link_params_parts[1]);
685 if ((string)$idPart === '') {
686 $idPart = $this->recPid;
687 }
688 // If no id or alias is given, set it to class record pid
689 // Checking if the id-parameter is an alias.
690 if (!MathUtility::canBeInterpretedAsInteger($idPart)) {
691 list($idPartR) = BackendUtility::getRecordsByField('pages', 'alias', $idPart);
692 $idPart = (int)$idPartR['uid'];
693 }
694 $page = BackendUtility::getRecord('pages', $idPart);
695 if (is_array($page)) {
696 // Page must exist...
697 $href = $siteUrl . '?id=' . $idPart . ($pairParts[2] ? $pairParts[2] : '') . ($sectionMark ? '#' . $sectionMark : '');
698 } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][array_shift(explode(':', $link_param))])) {
699 $href = $link_param;
700 } else {
701 $href = $siteUrl . '?id=' . $link_param;
702 $error = 'No page found: ' . $idPart;
703 }
704 }
705 }
706 }
707 // Setting the A-tag:
708 $bTag = '<a href="' . htmlspecialchars($href) . '"'
709 . ($tagCode['target'] ? ' target="' . htmlspecialchars($tagCode['target']) . '"' : '')
710 . ($tagCode['class'] ? ' class="' . htmlspecialchars($tagCode['class']) . '"' : '')
711 . ($tagCode['title'] ? ' title="' . htmlspecialchars($tagCode['title']) . '"' : '')
712 . ($external ? ' data-htmlarea-external="1"' : '')
713 . ($error ? ' rteerror="' . htmlspecialchars($error) . '" style="background-color: yellow; border:2px red solid; color: black;"' : '') . '>';
714 $eTag = '</a>';
715 // Modify parameters
716 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
717 $parameters = array(
718 'conf' => &$conf,
719 'currentBlock' => $v,
720 'url' => $href,
721 'tagCode' => $tagCode,
722 'external' => $external,
723 'error' => $error
724 );
725 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
726 $processor = GeneralUtility::getUserObj($objRef);
727 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
728 }
729 } else {
730 $blockSplit[$k] = $bTag . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
731 }
732 }
733 }
734 // Return content:
735 return implode('', $blockSplit);
736 }
737
738 /**
739 * Preserve special tags
740 *
741 * @param string $value Content input
742 * @return string Content output
743 */
744 public function TS_preserve_db($value)
745 {
746 if (!$this->preserveTags) {
747 return $value;
748 }
749 // Splitting into blocks for processing (span-tags are used for special tags)
750 $blockSplit = $this->splitIntoBlock('span', $value);
751 foreach ($blockSplit as $k => $v) {
752 // Block
753 if ($k % 2) {
754 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v));
755 if ($attribArray['specialtag']) {
756 $theTag = rawurldecode($attribArray['specialtag']);
757 $theTagName = $this->getFirstTagName($theTag);
758 $blockSplit[$k] = $theTag . $this->removeFirstAndLastTag($blockSplit[$k]) . '</' . $theTagName . '>';
759 }
760 }
761 }
762 return implode('', $blockSplit);
763 }
764
765 /**
766 * Preserve special tags
767 *
768 * @param string $value Content input
769 * @return string Content output
770 */
771 public function TS_preserve_rte($value)
772 {
773 if (!$this->preserveTags) {
774 return $value;
775 }
776 $blockSplit = $this->splitIntoBlock($this->preserveTags, $value);
777 foreach ($blockSplit as $k => $v) {
778 // Block
779 if ($k % 2) {
780 $blockSplit[$k] = '<span specialtag="' . rawurlencode($this->getFirstTag($v)) . '">' . $this->removeFirstAndLastTag($blockSplit[$k]) . '</span>';
781 }
782 }
783 return implode('', $blockSplit);
784 }
785
786 /**
787 * Transformation handler: 'css_transform' / direction: "db"
788 * Cleaning (->db) for standard content elements (ts)
789 *
790 * @param string $value Content input
791 * @return string Content output
792 * @see TS_transform_rte()
793 */
794 public function TS_transform_db($value)
795 {
796 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
797 $this->TS_transform_db_safecounter--;
798 if ($this->TS_transform_db_safecounter < 0) {
799 return $value;
800 }
801 // Split the content from RTE by the occurrence of these blocks:
802 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
803 $cc = 0;
804 $aC = count($blockSplit);
805 // Avoid superfluous linebreaks by transform_db after ending headListTag
806 while ($aC && trim($blockSplit[$aC - 1]) === '') {
807 unset($blockSplit[$aC - 1]);
808 $aC = count($blockSplit);
809 }
810 // Traverse the blocks
811 foreach ($blockSplit as $k => $v) {
812 $cc++;
813 $lastBR = $cc == $aC ? '' : LF;
814 if ($k % 2) {
815 // Inside block:
816 // Init:
817 $tag = $this->getFirstTag($v);
818 $tagName = strtolower($this->getFirstTagName($v));
819 // Process based on the tag:
820 switch ($tagName) {
821 case 'blockquote':
822 case 'dd':
823 case 'div':
824 case 'header':
825 case 'section':
826 case 'footer':
827 case 'nav':
828 case 'article':
829 case 'aside':
830 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>' . $lastBR;
831 break;
832 case 'ol':
833 case 'ul':
834 case 'table':
835 $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
836 break;
837 case 'h1':
838 case 'h2':
839 case 'h3':
840 case 'h4':
841 case 'h5':
842 case 'h6':
843 default:
844 // Eliminate true linebreaks inside Hx and other headlist tags
845 $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
846 }
847 } else {
848 // NON-block:
849 if (trim($blockSplit[$k]) !== '') {
850 $blockSplit[$k] = preg_replace('/<hr\\/>/', '<hr />', $blockSplit[$k]);
851 // Remove linebreaks preceding hr tags
852 $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
853 // Remove linebreaks following hr tags
854 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . CR . ']+/', '<$1$2/>', $blockSplit[$k]);
855 // Replace other linebreaks with space
856 $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+/', ' ', $blockSplit[$k]);
857 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]) . $lastBR;
858 $blockSplit[$k] = $this->transformStyledATags($blockSplit[$k]);
859 } else {
860 unset($blockSplit[$k]);
861 }
862 }
863 }
864 $this->TS_transform_db_safecounter++;
865 return implode('', $blockSplit);
866 }
867
868 /**
869 * Wraps a-tags that contain a style attribute with a span-tag
870 *
871 * @param string $value Content input
872 * @return string Content output
873 */
874 public function transformStyledATags($value)
875 {
876 $blockSplit = $this->splitIntoBlock('A', $value);
877 foreach ($blockSplit as $k => $v) {
878 // If an A-tag was found
879 if ($k % 2) {
880 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
881 // If "style" attribute is set and rteerror is not set!
882 if ($attribArray['style'] && !$attribArray['rteerror']) {
883 $attribArray_copy['style'] = $attribArray['style'];
884 unset($attribArray['style']);
885 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, 1) . '><a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
886 $eTag = '</a></span>';
887 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
888 }
889 }
890 }
891 return implode('', $blockSplit);
892 }
893
894 /**
895 * Transformation handler: css_transform / direction: "rte"
896 * Set (->rte) for standard content elements (ts)
897 *
898 * @param string $value Content input
899 * @return string Content output
900 * @see TS_transform_db()
901 */
902 public function TS_transform_rte($value)
903 {
904 // Split the content from database by the occurrence of the block elements
905 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
906 // Traverse the blocks
907 foreach ($blockSplit as $k => $v) {
908 if ($k % 2) {
909 // Inside one of the blocks:
910 // Init:
911 $tag = $this->getFirstTag($v);
912 $tagName = strtolower($this->getFirstTagName($v));
913 // Based on tagname, we do transformations:
914 switch ($tagName) {
915 case 'blockquote':
916 case 'dd':
917 case 'div':
918 case 'header':
919 case 'section':
920 case 'footer':
921 case 'nav':
922 case 'article':
923 case 'aside':
924 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
925 break;
926 }
927 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
928 } else {
929 // NON-block:
930 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
931 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
932 // If the line is followed by a block or is the last line:
933 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
934 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
935 if (!$onlyLineBreaks) {
936 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
937 } else {
938 // If the line contains only linebreaks, remove the leading linebreak
939 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
940 }
941 }
942 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
943 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
944 unset($blockSplit[$k]);
945 } else {
946 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
947 }
948 }
949 }
950 return implode(LF, $blockSplit);
951 }
952
953 /***************************************************************
954 *
955 * Generic RTE transformation, analysis and helper functions
956 *
957 **************************************************************/
958 /**
959 * Reads the file or url $url and returns the content
960 *
961 * @param string $url Filepath/URL to read
962 * @return string The content from the resource given as input.
963 * @see \TYPO3\CMS\Core\Utility\GeneralUtility::getUrl()
964 */
965 public function getUrl($url)
966 {
967 return GeneralUtility::getUrl($url);
968 }
969
970 /**
971 * Function for cleaning content going into the database.
972 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
973 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
974 *
975 * @param string $content Content to clean up
976 * @param string $tagList Comma list of tags to specifically allow. Default comes from getKeepTags and is
977 * @return string Clean content
978 * @see getKeepTags()
979 */
980 public function HTMLcleaner_db($content, $tagList = '')
981 {
982 if (!$tagList) {
983 $keepTags = $this->getKeepTags('db');
984 } else {
985 $keepTags = $this->getKeepTags('db', $tagList);
986 }
987 // Default: remove unknown tags.
988 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0;
989 // Default: re-convert literals to characters (that is &lt; to <)
990 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1;
991 return $this->HTMLcleaner($content, $keepTags, $kUknown, $hSC);
992 }
993
994 /**
995 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
996 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
997 *
998 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
999 * @param string $tagList Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
1000 * @return array Configuration array
1001 * @see HTMLcleaner_db()
1002 */
1003 public function getKeepTags($direction = 'rte', $tagList = '')
1004 {
1005 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1006 // Setting up allowed tags:
1007 // If the $tagList input var is set, this will take precedence
1008 if ((string)$tagList !== '') {
1009 $keepTags = array_flip(GeneralUtility::trimExplode(',', $tagList, true));
1010 } else {
1011 // Default is to get allowed/denied tags from internal array of processing options:
1012 // Construct default list of tags to keep:
1013 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($this->procOptions['allowTags']), true));
1014 // For tags to deny, remove them from $keepTags array:
1015 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
1016 foreach ($denyTags as $dKe) {
1017 unset($keepTags[$dKe]);
1018 }
1019 }
1020 // Based on the direction of content, set further options:
1021 switch ($direction) {
1022 case 'rte':
1023 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1024 // Transform bold/italics tags to strong/em
1025 if (isset($keepTags['b'])) {
1026 $keepTags['b'] = array('remap' => 'STRONG');
1027 }
1028 if (isset($keepTags['i'])) {
1029 $keepTags['i'] = array('remap' => 'EM');
1030 }
1031 }
1032 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1033 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
1034 break;
1035 case 'db':
1036 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1037 // Transform strong/em back to bold/italics:
1038 if (isset($keepTags['strong'])) {
1039 $keepTags['strong'] = array('remap' => 'b');
1040 }
1041 if (isset($keepTags['em'])) {
1042 $keepTags['em'] = array('remap' => 'i');
1043 }
1044 }
1045 // Setting up span tags if they are allowed:
1046 if (isset($keepTags['span'])) {
1047 $keepTags['span'] = array(
1048 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
1049 'fixAttrib' => array(
1050 'class' => array(
1051 'removeIfFalse' => 1
1052 )
1053 ),
1054 'rmTagIfNoAttrib' => 1
1055 );
1056 if (!empty($this->allowedClasses)) {
1057 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
1058 }
1059 }
1060 // Setting further options, getting them from the processiong options:
1061 $TSc = $this->procOptions['HTMLparser_db.'];
1062 if (!$TSc['globalNesting']) {
1063 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1064 }
1065 if (!$TSc['noAttrib']) {
1066 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1067 }
1068 // Transforming the array from TypoScript to regular array:
1069 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
1070 break;
1071 }
1072 // Caching (internally, in object memory) the result unless tagList is set:
1073 if (!$tagList) {
1074 $this->getKeepTags_cache[$direction] = $keepTags;
1075 } else {
1076 return $keepTags;
1077 }
1078 }
1079 // Return result:
1080 return $this->getKeepTags_cache[$direction];
1081 }
1082
1083 /**
1084 * This resolves the $value into parts based on <p>-sections and <br />-tags. These are returned as lines separated by LF.
1085 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1086 * The function ->setDivTags does the opposite.
1087 * This function processes content to go into the database.
1088 *
1089 * @param string $value Value to process.
1090 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1091 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
1092 * @return string Processed input value.
1093 * @see setDivTags()
1094 */
1095 public function divideIntoLines($value, $count = 5, $returnArray = false)
1096 {
1097 // Setting configuration for processing:
1098 $allowTagsOutside = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside'] ? 'hr,' . $this->procOptions['allowTagsOutside'] : 'hr,img'), true);
1099 $divSplit = $this->splitIntoBlock('p', $value, true);
1100 // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1101 if ($this->procOptions['keepPDIVattribs']) {
1102 $keepAttribListArr = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
1103 } else {
1104 $keepAttribListArr = array();
1105 }
1106 // Returns plainly the value if there was no div/p sections in it
1107 if (count($divSplit) <= 1 || $count <= 0) {
1108 // Wrap hr tags with LF's
1109 $newValue = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $value);
1110 $newValue = preg_replace('/' . LF . LF . '/i', LF, $newValue);
1111 $newValue = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $newValue);
1112 return $newValue;
1113 }
1114 // Traverse the splitted sections:
1115 foreach ($divSplit as $k => $v) {
1116 if ($k % 2) {
1117 // Inside
1118 $v = $this->removeFirstAndLastTag($v);
1119 // Fetching 'sub-lines' - which will explode any further p nesting...
1120 $subLines = $this->divideIntoLines($v, $count - 1, true);
1121 // So, if there happend to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
1122 if (!is_array($subLines)) {
1123 //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1124 $subLines = array($subLines);
1125 // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1126 if (!$this->procOptions['dontConvBRtoParagraph']) {
1127 $subLines = preg_split('/<br[[:space:]]*[\\/]?>/i', $v);
1128 }
1129 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1130 foreach ($subLines as $sk => $value) {
1131 // Clear up the subline for DB.
1132 $subLines[$sk] = $this->HTMLcleaner_db($subLines[$sk]);
1133 // Get first tag, attributes etc:
1134 $fTag = $this->getFirstTag($divSplit[$k]);
1135 $attribs = $this->get_tag_attributes($fTag);
1136 // Keep attributes (lowercase)
1137 $newAttribs = array();
1138 if (!empty($keepAttribListArr)) {
1139 foreach ($keepAttribListArr as $keepA) {
1140 if (isset($attribs[0][$keepA])) {
1141 $newAttribs[$keepA] = $attribs[0][$keepA];
1142 }
1143 }
1144 }
1145 // ALIGN attribute:
1146 if (!$this->procOptions['skipAlign'] && trim($attribs[0]['align']) !== '' && strtolower($attribs[0]['align']) != 'left') {
1147 // Set to value, but not 'left'
1148 $newAttribs['align'] = strtolower($attribs[0]['align']);
1149 }
1150 // CLASS attribute:
1151 // Set to whatever value
1152 if (!$this->procOptions['skipClass'] && trim($attribs[0]['class']) !== '') {
1153 if (empty($this->allowedClasses) || in_array($attribs[0]['class'], $this->allowedClasses)) {
1154 $newAttribs['class'] = $attribs[0]['class'];
1155 } else {
1156 $classes = GeneralUtility::trimExplode(' ', $attribs[0]['class'], true);
1157 $newClasses = array();
1158 foreach ($classes as $class) {
1159 if (in_array($class, $this->allowedClasses)) {
1160 $newClasses[] = $class;
1161 }
1162 }
1163 if (!empty($newClasses)) {
1164 $newAttribs['class'] = implode(' ', $newClasses);
1165 }
1166 }
1167 }
1168 // Remove any line break char (10 or 13)
1169 $subLines[$sk] = preg_replace('/' . LF . '|' . CR . '/', '', $subLines[$sk]);
1170 // If there are any attributes, then do so:
1171 if (!empty($newAttribs)) {
1172 $subLines[$sk] = '<' . trim('p ' . $this->compileTagAttribs($newAttribs)) . '>' . $subLines[$sk] . '</p>';
1173 }
1174 }
1175 }
1176 // Add the processed line(s)
1177 $divSplit[$k] = implode(LF, $subLines);
1178 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1179 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1180 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1181 if (trim(strip_tags($divSplit[$k])) == '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $divSplit[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1182 $divSplit[$k] = '';
1183 }
1184 } else {
1185 // outside div:
1186 // Remove positions which are outside p tags and without content
1187 $divSplit[$k] = trim(strip_tags($divSplit[$k], '<' . implode('><', $allowTagsOutside) . '>'));
1188 // Wrap hr tags with LF's
1189 $divSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $divSplit[$k]);
1190 $divSplit[$k] = preg_replace('/' . LF . LF . '/i', LF, $divSplit[$k]);
1191 $divSplit[$k] = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $divSplit[$k]);
1192 if ((string)$divSplit[$k] === '') {
1193 unset($divSplit[$k]);
1194 }
1195 }
1196 }
1197 // Return value:
1198 return $returnArray ? $divSplit : implode(LF, $divSplit);
1199 }
1200
1201 /**
1202 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
1203 * For processing of content going FROM database TO RTE.
1204 *
1205 * @param string $value Value to convert
1206 * @return string Processed value.
1207 * @see divideIntoLines()
1208 */
1209 public function setDivTags($value)
1210 {
1211 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1212 $keepTags = $this->getKeepTags('rte');
1213 // Default: remove unknown tags.
1214 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect';
1215 // Default: re-convert literals to characters (that is &lt; to <)
1216 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1;
1217 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte'] ? 1 : 0;
1218 // Divide the content into lines, based on LF:
1219 $parts = explode(LF, $value);
1220 foreach ($parts as $k => $v) {
1221 // Processing of line content:
1222 // If the line is blank, set it to &nbsp;
1223 if (trim($parts[$k]) === '') {
1224 $parts[$k] = '&nbsp;';
1225 } else {
1226 // Clean the line content:
1227 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, $kUknown, $hSC);
1228 if ($convNBSP) {
1229 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
1230 }
1231 }
1232 // Wrapping the line in <$dT> if not already wrapped and does not contain an hr tag
1233 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
1234 $testStr = strtolower(trim($parts[$k]));
1235 if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
1236 if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
1237 // Only set p-tags if there is not already div or p tags:
1238 $parts[$k] = '<p>' . $parts[$k] . '</p>';
1239 }
1240 }
1241 }
1242 }
1243 // Implode result:
1244 return implode(LF, $parts);
1245 }
1246
1247 /**
1248 * Returns SiteURL based on thisScript.
1249 *
1250 * @return string Value of GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1251 * @see \TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv()
1252 */
1253 public function siteUrl()
1254 {
1255 return GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1256 }
1257
1258 /**
1259 * Default tag mapping for TS
1260 *
1261 * @param string $code Input code to process
1262 * @param string $direction Direction To databsae (db) or from database to RTE (rte)
1263 * @return string Processed value
1264 */
1265 public function defaultTStagMapping($code, $direction = 'rte')
1266 {
1267 if ($direction == 'db') {
1268 $code = $this->mapTags($code, array(
1269 // Map tags
1270 'strong' => 'b',
1271 'em' => 'i'
1272 ));
1273 }
1274 if ($direction == 'rte') {
1275 $code = $this->mapTags($code, array(
1276 // Map tags
1277 'b' => 'strong',
1278 'i' => 'em'
1279 ));
1280 }
1281 return $code;
1282 }
1283
1284 /**
1285 * Finds width and height from attrib-array
1286 * If the width and height is found in the style-attribute, use that!
1287 *
1288 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1289 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1290 */
1291 public function getWHFromAttribs($attribArray)
1292 {
1293 $style = trim($attribArray['style']);
1294 $w = 0;
1295 $h = 0;
1296 if ($style) {
1297 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1298 // Width
1299 $reg = array();
1300 preg_match('/width' . $regex . '/i', $style, $reg);
1301 $w = (int)$reg[1];
1302 // Height
1303 preg_match('/height' . $regex . '/i', $style, $reg);
1304 $h = (int)$reg[1];
1305 }
1306 if (!$w) {
1307 $w = $attribArray['width'];
1308 }
1309 if (!$h) {
1310 $h = $attribArray['height'];
1311 }
1312 return array((int)$w, (int)$h);
1313 }
1314
1315 /**
1316 * Parse <A>-tag href and return status of email,external,file or page
1317 *
1318 * @param string $url URL to analyse.
1319 * @return array Information in an array about the URL
1320 */
1321 public function urlInfoForLinkTags($url)
1322 {
1323 $info = array();
1324 $url = trim($url);
1325 if (substr(strtolower($url), 0, 7) == 'mailto:') {
1326 $info['url'] = trim(substr($url, 7));
1327 $info['type'] = 'email';
1328 } elseif (strpos($url, '?file:') !== false) {
1329 $info['type'] = 'file';
1330 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1331 } else {
1332 $curURL = $this->siteUrl();
1333 $urlLength = strlen($url);
1334 for ($a = 0; $a < $urlLength; $a++) {
1335 if ($url[$a] != $curURL[$a]) {
1336 break;
1337 }
1338 }
1339 $info['relScriptPath'] = substr($curURL, $a);
1340 $info['relUrl'] = substr($url, $a);
1341 $info['url'] = $url;
1342 $info['type'] = 'ext';
1343 $siteUrl_parts = parse_url($url);
1344 $curUrl_parts = parse_url($curURL);
1345 // Hosts should match
1346 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1347 // If the script path seems to match or is empty (FE-EDIT)
1348 // New processing order 100502
1349 $uP = parse_url($info['relUrl']);
1350 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1351 $info['url'] = $info['relUrl'];
1352 $info['type'] = 'anchor';
1353 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1354 // URL is a page (id parameter)
1355 $pp = preg_split('/^id=/', $uP['query']);
1356 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1357 $parameters = explode('&', $pp[1]);
1358 $id = array_shift($parameters);
1359 if ($id) {
1360 $info['pageid'] = $id;
1361 $info['cElement'] = $uP['fragment'];
1362 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1363 $info['type'] = 'page';
1364 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1365 }
1366 } else {
1367 $info['url'] = $info['relUrl'];
1368 $info['type'] = 'file';
1369 }
1370 } else {
1371 unset($info['relScriptPath']);
1372 unset($info['relUrl']);
1373 }
1374 }
1375 return $info;
1376 }
1377
1378 /**
1379 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1380 *
1381 * @param string $value Content input
1382 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set.
1383 * @return string Content output
1384 */
1385 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1386 {
1387 $blockSplit = $this->splitIntoBlock('A', $value);
1388 foreach ($blockSplit as $k => $v) {
1389 // Block
1390 if ($k % 2) {
1391 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1392 // Checking if there is a scheme, and if not, prepend the current url.
1393 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1394 if ($attribArray['href'] !== '') {
1395 $uP = parse_url(strtolower($attribArray['href']));
1396 if (!$uP['scheme']) {
1397 $attribArray['href'] = $this->siteUrl() . $attribArray['href'];
1398 } elseif ($uP['scheme'] != 'mailto') {
1399 $attribArray['data-htmlarea-external'] = 1;
1400 }
1401 } else {
1402 $attribArray['rtekeep'] = 1;
1403 }
1404 if (!$dontSetRTEKEEP) {
1405 $attribArray['rtekeep'] = 1;
1406 }
1407 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
1408 $eTag = '</a>';
1409 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1410 }
1411 }
1412 return implode('', $blockSplit);
1413 }
1414
1415 /**
1416 * Apply plain image settings to the dimensions of the image
1417 *
1418 * @param array $imageInfo: info array of the image
1419 * @param array $attribArray: array of attributes of an image tag
1420 *
1421 * @return array a modified attributes array
1422 */
1423 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1424 {
1425 if ($this->procOptions['plainImageMode']) {
1426 // Perform corrections to aspect ratio based on configuration
1427 switch ((string)$this->procOptions['plainImageMode']) {
1428 case 'lockDimensions':
1429 $attribArray['width'] = $imageInfo[0];
1430 $attribArray['height'] = $imageInfo[1];
1431 break;
1432 case 'lockRatioWhenSmaller':
1433 if ($attribArray['width'] > $imageInfo[0]) {
1434 $attribArray['width'] = $imageInfo[0];
1435 }
1436 case 'lockRatio':
1437 if ($imageInfo[0] > 0) {
1438 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1439 }
1440 break;
1441 }
1442 }
1443 return $attribArray;
1444 }
1445
1446 /**
1447 * Instantiates a logger
1448 *
1449 * @return \TYPO3\CMS\Core\Log\Logger
1450 */
1451 protected function getLogger()
1452 {
1453 /** @var $logManager LogManager */
1454 $logManager = GeneralUtility::makeInstance(LogManager::class);
1455 return $logManager->getLogger(get_class($this));
1456 }
1457 }