[!!!][TASK] RTE transformations: Allow div sections by default
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\Log\LogManager;
19 use TYPO3\CMS\Core\Resource;
20 use TYPO3\CMS\Core\Utility\GeneralUtility;
21 use TYPO3\CMS\Core\Utility\MathUtility;
22 use TYPO3\CMS\Core\Utility\StringUtility;
23 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
24
25 /**
26 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
27 */
28 class RteHtmlParser extends HtmlParser
29 {
30 /**
31 * List of elements that are not wrapped into a "p" tag while doing the transformation.
32 * @var string
33 */
34 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
35
36 /**
37 * Set this to the pid of the record manipulated by the class.
38 *
39 * @var int
40 */
41 public $recPid = 0;
42
43 /**
44 * Element reference [table]:[field], eg. "tt_content:bodytext"
45 *
46 * @var string
47 */
48 public $elRef = '';
49
50 /**
51 * Current Page TSConfig
52 *
53 * @var array
54 */
55 public $tsConfig = array();
56
57 /**
58 * Set to the TSconfig options coming from Page TSconfig
59 *
60 * @var array
61 */
62 public $procOptions = array();
63
64 /**
65 * Run-away brake for recursive calls.
66 *
67 * @var int
68 */
69 public $TS_transform_db_safecounter = 100;
70
71 /**
72 * Parameters from TCA types configuration related to the RTE
73 *
74 * @var string
75 */
76 public $rte_p = '';
77
78 /**
79 * Data caching for processing function
80 *
81 * @var array
82 */
83 public $getKeepTags_cache = array();
84
85 /**
86 * Storage of the allowed CSS class names in the RTE
87 *
88 * @var array
89 */
90 public $allowedClasses = array();
91
92 /**
93 * Set to tags to preserve from Page TSconfig configuration
94 *
95 * @var string
96 */
97 public $preserveTags = '';
98
99 /**
100 * Initialize, setting element reference and record PID
101 *
102 * @param string $elRef Element reference, eg "tt_content:bodytext
103 * @param int $recPid PID of the record (page id)
104 * @return void
105 */
106 public function init($elRef = '', $recPid = 0)
107 {
108 $this->recPid = $recPid;
109 $this->elRef = $elRef;
110 }
111
112 /**********************************************
113 *
114 * Main function
115 *
116 **********************************************/
117 /**
118 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
119 * This is the main function called from tcemain and transfer data classes
120 *
121 * @param string $value Input value
122 * @param array $specConf Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
123 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
124 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
125 * @return string Output value
126 */
127 public function RTE_transform($value, $specConf, $direction = 'rte', $thisConfig = array())
128 {
129 // Init:
130 $this->tsConfig = $thisConfig;
131 $this->procOptions = (array)$thisConfig['proc.'];
132 $this->preserveTags = strtoupper(implode(',', GeneralUtility::trimExplode(',', $this->procOptions['preserveTags'])));
133 // dynamic configuration of blockElementList
134 if ($this->procOptions['blockElementList']) {
135 $this->blockElementList = $this->procOptions['blockElementList'];
136 }
137 // Get parameters for rte_transformation:
138 $p = ($this->rte_p = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']));
139 // Setting modes:
140 if ((string)$this->procOptions['overruleMode'] !== '') {
141 $modes = array_unique(GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']));
142 } else {
143 $modes = array_unique(GeneralUtility::trimExplode('-', $p['mode']));
144 }
145 $revmodes = array_flip($modes);
146 // Find special modes and extract them:
147 if (isset($revmodes['ts_css'])) {
148 $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
149 }
150 // Make list unique
151 $modes = array_unique(GeneralUtility::trimExplode(',', implode(',', $modes), true));
152 // Reverse order if direction is "rte"
153 if ($direction === 'rte') {
154 $modes = array_reverse($modes);
155 }
156 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independent processing options you can set up:
157 $entry_HTMLparser = $this->procOptions['entryHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_' . $direction . '.']) : '';
158 $exit_HTMLparser = $this->procOptions['exitHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_' . $direction . '.']) : '';
159 // Line breaks of content is unified into char-10 only (removing char 13)
160 if (!$this->procOptions['disableUnifyLineBreaks']) {
161 $value = str_replace(CRLF, LF, $value);
162 }
163 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
164 if (is_array($entry_HTMLparser)) {
165 $value = $this->HTMLcleaner($value, $entry_HTMLparser[0], $entry_HTMLparser[1], $entry_HTMLparser[2], $entry_HTMLparser[3]);
166 }
167 // Traverse modes:
168 foreach ($modes as $cmd) {
169 // ->DB
170 if ($direction == 'db') {
171 // Checking for user defined transformation:
172 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
173 $_procObj = GeneralUtility::getUserObj($_classRef);
174 $_procObj->pObj = $this;
175 $_procObj->transformationKey = $cmd;
176 $value = $_procObj->transform_db($value, $this);
177 } else {
178 // ... else use defaults:
179 switch ($cmd) {
180 case 'ts_images':
181 $value = $this->TS_images_db($value);
182 break;
183 case 'ts_reglinks':
184 $value = $this->TS_reglinks($value, 'db');
185 break;
186 case 'ts_links':
187 $value = $this->TS_links_db($value);
188 break;
189 case 'ts_preserve':
190 $value = $this->TS_preserve_db($value);
191 break;
192 case 'css_transform':
193 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
194 // CR has a very disturbing effect, so just remove all CR and rely on LF
195 $value = str_replace(CR, '', $value);
196 // Transform empty paragraphs into spacing paragraphs
197 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
198 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
199 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
200 $value = $this->TS_transform_db($value);
201 break;
202 default:
203 // Do nothing
204 }
205 }
206 }
207 // ->RTE
208 if ($direction == 'rte') {
209 // Checking for user defined transformation:
210 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
211 $_procObj = GeneralUtility::getUserObj($_classRef);
212 $_procObj->pObj = $this;
213 $value = $_procObj->transform_rte($value, $this);
214 } else {
215 // ... else use defaults:
216 switch ($cmd) {
217 case 'ts_images':
218 $value = $this->TS_images_rte($value);
219 break;
220 case 'ts_reglinks':
221 $value = $this->TS_reglinks($value, 'rte');
222 break;
223 case 'ts_links':
224 $value = $this->TS_links_rte($value);
225 break;
226 case 'ts_preserve':
227 $value = $this->TS_preserve_rte($value);
228 break;
229 case 'css_transform':
230 // Has a very disturbing effect, so just remove all '13' - depend on '10'
231 $value = str_replace(CR, '', $value);
232 $value = $this->TS_transform_rte($value);
233 break;
234 default:
235 // Do nothing
236 }
237 }
238 }
239 }
240 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
241 if (is_array($exit_HTMLparser)) {
242 $value = $this->HTMLcleaner($value, $exit_HTMLparser[0], $exit_HTMLparser[1], $exit_HTMLparser[2], $exit_HTMLparser[3]);
243 }
244 // Final clean up of linebreaks:
245 if (!$this->procOptions['disableUnifyLineBreaks']) {
246 // Make sure no \r\n sequences has entered in the meantime...
247 $value = str_replace(CRLF, LF, $value);
248 // ... and then change all \n into \r\n
249 $value = str_replace(LF, CRLF, $value);
250 }
251 // Return value:
252 return $value;
253 }
254
255 /************************************
256 *
257 * Specific RTE TRANSFORMATION functions
258 *
259 *************************************/
260 /**
261 * Transformation handler: 'ts_images' / direction: "db"
262 * Processing images inserted in the RTE.
263 * This is used when content goes from the RTE to the database.
264 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
265 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
266 * Also "magic" images are processed here.
267 *
268 * @param string $value The content from RTE going to Database
269 * @return string Processed content
270 */
271 public function TS_images_db($value)
272 {
273 // Split content by <img> tags and traverse the resulting array for processing:
274 $imgSplit = $this->splitTags('img', $value);
275 if (count($imgSplit) > 1) {
276 $siteUrl = $this->siteUrl();
277 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
278 /** @var $resourceFactory Resource\ResourceFactory */
279 $resourceFactory = Resource\ResourceFactory::getInstance();
280 /** @var $magicImageService Resource\Service\MagicImageService */
281 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
282 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
283 foreach ($imgSplit as $k => $v) {
284 // Image found, do processing:
285 if ($k % 2) {
286 // Get attributes
287 list($attribArray) = $this->get_tag_attributes($v, true);
288 // It's always an absolute URL coming from the RTE into the Database.
289 $absoluteUrl = trim($attribArray['src']);
290 // Make path absolute if it is relative and we have a site path which is not '/'
291 $pI = pathinfo($absoluteUrl);
292 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
293 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
294 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
295 $absoluteUrl = $siteUrl . $absoluteUrl;
296 }
297 // Image dimensions set in the img tag, if any
298 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
299 if ($imgTagDimensions[0]) {
300 $attribArray['width'] = $imgTagDimensions[0];
301 }
302 if ($imgTagDimensions[1]) {
303 $attribArray['height'] = $imgTagDimensions[1];
304 }
305 $originalImageFile = null;
306 if ($attribArray['data-htmlarea-file-uid']) {
307 // An original image file uid is available
308 try {
309 /** @var $originalImageFile Resource\File */
310 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
311 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
312 // Log the fact the file could not be retrieved.
313 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
314 $this->getLogger()->error($message);
315 }
316 }
317 if ($originalImageFile instanceof Resource\File) {
318 // Public url of local file is relative to the site url, absolute otherwise
319 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
320 // This is a plain image, i.e. reference to the original image
321 if ($this->procOptions['plainImageMode']) {
322 // "plain image mode" is configured
323 // Find the dimensions of the original image
324 $imageInfo = array(
325 $originalImageFile->getProperty('width'),
326 $originalImageFile->getProperty('height')
327 );
328 if (!$imageInfo[0] || !$imageInfo[1]) {
329 $filePath = $originalImageFile->getForLocalProcessing(false);
330 $imageInfo = @getimagesize($filePath);
331 }
332 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
333 }
334 } else {
335 // Magic image case: get a processed file with the requested configuration
336 $imageConfiguration = array(
337 'width' => $imgTagDimensions[0],
338 'height' => $imgTagDimensions[1]
339 );
340 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
341 $attribArray['width'] = $magicImage->getProperty('width');
342 $attribArray['height'] = $magicImage->getProperty('height');
343 $attribArray['src'] = $magicImage->getPublicUrl();
344 }
345 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
346 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
347 // Fetch the external image
348 $externalFile = $this->getUrl($absoluteUrl);
349 if ($externalFile) {
350 $pU = parse_url($absoluteUrl);
351 $pI = pathinfo($pU['path']);
352 $extension = strtolower($pI['extension']);
353 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
354 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
355 // We insert this image into the user default upload folder
356 list($table, $field) = explode(':', $this->elRef);
357 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
358 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
359 $imageConfiguration = array(
360 'width' => $attribArray['width'],
361 'height' => $attribArray['height']
362 );
363 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
364 $attribArray['width'] = $magicImage->getProperty('width');
365 $attribArray['height'] = $magicImage->getProperty('height');
366 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
367 $attribArray['src'] = $magicImage->getPublicUrl();
368 }
369 }
370 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
371 // Finally, check image as local file (siteURL equals the one of the image)
372 // Image has no data-htmlarea-file-uid attribute
373 // Relative path, rawurldecoded for special characters.
374 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
375 // Absolute filepath, locked to relative path of this project
376 $filepath = GeneralUtility::getFileAbsFileName($path);
377 // Check file existence (in relative directory to this installation!)
378 if ($filepath && @is_file($filepath)) {
379 // Treat it as a plain image
380 if ($this->procOptions['plainImageMode']) {
381 // If "plain image mode" has been configured
382 // Find the original dimensions of the image
383 $imageInfo = @getimagesize($filepath);
384 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
385 }
386 // Let's try to find a file uid for this image
387 try {
388 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
389 if ($fileOrFolderObject instanceof Resource\FileInterface) {
390 $fileIdentifier = $fileOrFolderObject->getIdentifier();
391 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
392 // @todo if the retrieved file is a processed file, get the original file...
393 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
394 }
395 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
396 // Nothing to be done if file/folder not found
397 }
398 }
399 }
400 // Remove width and height from style attribute
401 $attribArray['style'] = preg_replace('/((?:^|)\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
402 // Must have alt attribute
403 if (!isset($attribArray['alt'])) {
404 $attribArray['alt'] = '';
405 }
406 // Convert absolute to relative url
407 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
408 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
409 }
410 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
411 }
412 }
413 }
414 return implode('', $imgSplit);
415 }
416
417 /**
418 * Transformation handler: 'ts_images' / direction: "rte"
419 * Processing images from database content going into the RTE.
420 * Processing includes converting the src attribute to an absolute URL.
421 *
422 * @param string $value Content input
423 * @return string Content output
424 */
425 public function TS_images_rte($value)
426 {
427 // Split content by <img> tags and traverse the resulting array for processing:
428 $imgSplit = $this->splitTags('img', $value);
429 if (count($imgSplit) > 1) {
430 $siteUrl = $this->siteUrl();
431 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
432 foreach ($imgSplit as $k => $v) {
433 // Image found
434 if ($k % 2) {
435 // Get the attributes of the img tag
436 list($attribArray) = $this->get_tag_attributes($v, true);
437 $absoluteUrl = trim($attribArray['src']);
438 // Transform the src attribute into an absolute url, if it not already
439 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
440 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
441 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
442 $attribArray['src'] = $siteUrl . $attribArray['src'];
443 }
444 // Must have alt attribute
445 if (!isset($attribArray['alt'])) {
446 $attribArray['alt'] = '';
447 }
448 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
449 }
450 }
451 }
452 // Return processed content:
453 return implode('', $imgSplit);
454 }
455
456 /**
457 * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
458 * Converting <A>-tags to/from abs/rel
459 *
460 * @param string $value Content input
461 * @param string $direction Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
462 * @return string Content output
463 */
464 public function TS_reglinks($value, $direction)
465 {
466 $retVal = '';
467 switch ($direction) {
468 case 'rte':
469 $retVal = $this->TS_AtagToAbs($value, 1);
470 break;
471 case 'db':
472 $siteURL = $this->siteUrl();
473 $blockSplit = $this->splitIntoBlock('A', $value);
474 foreach ($blockSplit as $k => $v) {
475 // Block
476 if ($k % 2) {
477 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
478 // If the url is local, remove url-prefix
479 if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
480 $attribArray['href'] = substr($attribArray['href'], strlen($siteURL));
481 }
482 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
483 $eTag = '</a>';
484 $blockSplit[$k] = $bTag . $this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]), $direction) . $eTag;
485 }
486 }
487 $retVal = implode('', $blockSplit);
488 break;
489 }
490 return $retVal;
491 }
492
493 /**
494 * Transformation handler: 'ts_links' / direction: "db"
495 * Converting <A>-tags to <link tags>
496 *
497 * @param string $value Content input
498 * @return string Content output
499 * @see TS_links_rte()
500 */
501 public function TS_links_db($value)
502 {
503 $conf = array();
504 // Split content into <a> tag blocks and process:
505 $blockSplit = $this->splitIntoBlock('A', $value);
506 foreach ($blockSplit as $k => $v) {
507 // If an A-tag was found:
508 if ($k % 2) {
509 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
510 $info = $this->urlInfoForLinkTags($attribArray['href']);
511 // Check options:
512 $attribArray_copy = $attribArray;
513 unset($attribArray_copy['href']);
514 unset($attribArray_copy['target']);
515 unset($attribArray_copy['class']);
516 unset($attribArray_copy['title']);
517 unset($attribArray_copy['data-htmlarea-external']);
518 // Unset "rteerror" and "style" attributes if "rteerror" is set!
519 if ($attribArray_copy['rteerror']) {
520 unset($attribArray_copy['style']);
521 unset($attribArray_copy['rteerror']);
522 }
523 // Remove additional parameters
524 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'])) {
525 $parameters = array(
526 'conf' => &$conf,
527 'aTagParams' => &$attribArray_copy
528 );
529 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'] as $objRef) {
530 $processor = GeneralUtility::getUserObj($objRef);
531 $attribArray_copy = $processor->removeParams($parameters, $this);
532 }
533 }
534 // Only if href, target, class and tile are the only attributes, we can alter the link!
535 if (empty($attribArray_copy)) {
536 // Quoting class and title attributes if they contain spaces
537 $attribArray['class'] = preg_match('/ /', $attribArray['class']) ? '"' . $attribArray['class'] . '"' : $attribArray['class'];
538 $attribArray['title'] = preg_match('/ /', $attribArray['title']) ? '"' . $attribArray['title'] . '"' : $attribArray['title'];
539 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
540 // If data-htmlarea-external attribute is set, keep the href unchanged
541 if ($attribArray['data-htmlarea-external']) {
542 $href = $attribArray['href'];
543 } else {
544 $href = $info['url'] . ($info['query'] ? ',0,' . $info['query'] : '');
545 }
546 $bTag = '<link ' . $href . ($attribArray['target'] ? ' ' . $attribArray['target'] : ($attribArray['class'] || $attribArray['title'] ? ' -' : '')) . ($attribArray['class'] ? ' ' . $attribArray['class'] : ($attribArray['title'] ? ' -' : '')) . ($attribArray['title'] ? ' ' . $attribArray['title'] : '') . '>';
547 $eTag = '</link>';
548 // Modify parameters
549 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
550 $parameters = array(
551 'conf' => &$conf,
552 'currentBlock' => $v,
553 'url' => $href,
554 'attributes' => $attribArray
555 );
556 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
557 $processor = GeneralUtility::getUserObj($objRef);
558 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
559 }
560 } else {
561 $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
562 }
563 } else {
564 // ... otherwise store the link as a-tag.
565 // Unsetting 'rtekeep' attribute if that had been set.
566 unset($attribArray['rtekeep']);
567 if (!$attribArray['data-htmlarea-external']) {
568 $siteURL = $this->siteUrl();
569 // If the url is local, remove url-prefix
570 if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
571 $attribArray['href'] = substr($attribArray['href'], strlen($siteURL));
572 }
573 // Check for FAL link-handler keyword
574 list($linkHandlerKeyword, $linkHandlerValue) = explode(':', $attribArray['href'], 2);
575 if ($linkHandlerKeyword === '?file') {
576 try {
577 $fileOrFolderObject = Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject(rawurldecode($linkHandlerValue));
578 if ($fileOrFolderObject instanceof Resource\FileInterface || $fileOrFolderObject instanceof Resource\Folder) {
579 $attribArray['href'] = $fileOrFolderObject->getPublicUrl();
580 }
581 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
582 // The identifier inserted in the RTE is already gone...
583 }
584 }
585 }
586 unset($attribArray['data-htmlarea-external']);
587 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
588 $eTag = '</a>';
589 $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
590 }
591 }
592 }
593 return implode('', $blockSplit);
594 }
595
596 /**
597 * Transformation handler: 'ts_links' / direction: "rte"
598 * Converting <link tags> to <A>-tags
599 *
600 * @param string $value Content input
601 * @return string Content output
602 * @see TS_links_rte()
603 */
604 public function TS_links_rte($value)
605 {
606 $conf = array();
607 $value = $this->TS_AtagToAbs($value);
608 // Split content by the TYPO3 pseudo tag "<link>":
609 $blockSplit = $this->splitIntoBlock('link', $value, 1);
610 $siteUrl = $this->siteUrl();
611 foreach ($blockSplit as $k => $v) {
612 $error = '';
613 $external = false;
614 // Block
615 if ($k % 2) {
616 // split away the first "<link" part
617 $typolink = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
618 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typolink);
619
620 $link_param = $tagCode['url'];
621 // Parsing the typolink data. This parsing is roughly done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
622 // Parse URL:
623 $pU = parse_url($link_param);
624 if (strstr($link_param, '@') && (!$pU['scheme'] || $pU['scheme'] == 'mailto')) {
625 // mailadr
626 $href = 'mailto:' . preg_replace('/^mailto:/i', '', $link_param);
627 } elseif ($link_param[0] === '#') {
628 // check if anchor
629 $href = $siteUrl . $link_param;
630 } else {
631 // Check for FAL link-handler keyword:
632 list($linkHandlerKeyword, $linkHandlerValue) = explode(':', trim($link_param), 2);
633 if ($linkHandlerKeyword === 'file' && !StringUtility::beginsWith($link_param, 'file://')) {
634 $href = $siteUrl . '?' . $linkHandlerKeyword . ':' . rawurlencode($linkHandlerValue);
635 } else {
636 $fileChar = (int)strpos($link_param, '/');
637 $urlChar = (int)strpos($link_param, '.');
638 // Detects if a file is found in site-root.
639 list($rootFileDat) = explode('?', $link_param);
640 $rFD_fI = pathinfo($rootFileDat);
641 $fileExtension = strtolower($rFD_fI['extension']);
642 if (strpos($link_param, '/') === false && trim($rootFileDat) && (@is_file(PATH_site . $rootFileDat) || $fileExtension === 'php' || $fileExtension === 'html' || $fileExtension === 'htm')) {
643 $href = $siteUrl . $link_param;
644 } elseif (
645 (
646 $pU['scheme']
647 && !isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][$pU['scheme']])
648 )
649 || $urlChar && (!$fileChar || $urlChar < $fileChar)
650 ) {
651 // url (external): if has scheme or if a '.' comes before a '/'.
652 $href = $link_param;
653 if (!$pU['scheme']) {
654 $href = 'http://' . $href;
655 }
656 $external = true;
657 } elseif ($fileChar) {
658 // It is an internal file or folder
659 // Try to transform the href into a FAL reference
660 try {
661 $fileOrFolderObject = Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject($link_param);
662 } catch (Resource\Exception $exception) {
663 // Nothing to be done if file/folder not found or path invalid
664 $fileOrFolderObject = null;
665 }
666 if ($fileOrFolderObject instanceof Resource\Folder) {
667 // It's a folder
668 $folderIdentifier = $fileOrFolderObject->getIdentifier();
669 $href = $siteUrl . '?file:' . rawurlencode($folderIdentifier);
670 } elseif ($fileOrFolderObject instanceof Resource\FileInterface) {
671 // It's a file
672 $fileIdentifier = $fileOrFolderObject->getIdentifier();
673 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
674 $href = $siteUrl . '?file:' . $fileObject->getUid();
675 } else {
676 $href = $siteUrl . $link_param;
677 }
678 } else {
679 // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
680 // Splitting the parameter by ',' and if the array counts more than 1 element it's an id/type/parameters triplet
681 $pairParts = GeneralUtility::trimExplode(',', $link_param, true);
682 $idPart = $pairParts[0];
683 $link_params_parts = explode('#', $idPart);
684 $idPart = trim($link_params_parts[0]);
685 $sectionMark = trim($link_params_parts[1]);
686 if ((string)$idPart === '') {
687 $idPart = $this->recPid;
688 }
689 // If no id or alias is given, set it to class record pid
690 // Checking if the id-parameter is an alias.
691 if (!MathUtility::canBeInterpretedAsInteger($idPart)) {
692 list($idPartR) = BackendUtility::getRecordsByField('pages', 'alias', $idPart);
693 $idPart = (int)$idPartR['uid'];
694 }
695 $page = BackendUtility::getRecord('pages', $idPart);
696 if (is_array($page)) {
697 // Page must exist...
698 $href = $siteUrl . '?id=' . $idPart . ($pairParts[2] ? $pairParts[2] : '') . ($sectionMark ? '#' . $sectionMark : '');
699 } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][array_shift(explode(':', $link_param))])) {
700 $href = $link_param;
701 } else {
702 $href = $siteUrl . '?id=' . $link_param;
703 $error = 'No page found: ' . $idPart;
704 }
705 }
706 }
707 }
708 // Setting the A-tag:
709 $bTag = '<a href="' . htmlspecialchars($href) . '"'
710 . ($tagCode['target'] ? ' target="' . htmlspecialchars($tagCode['target']) . '"' : '')
711 . ($tagCode['class'] ? ' class="' . htmlspecialchars($tagCode['class']) . '"' : '')
712 . ($tagCode['title'] ? ' title="' . htmlspecialchars($tagCode['title']) . '"' : '')
713 . ($external ? ' data-htmlarea-external="1"' : '')
714 . ($error ? ' rteerror="' . htmlspecialchars($error) . '" style="background-color: yellow; border:2px red solid; color: black;"' : '') . '>';
715 $eTag = '</a>';
716 // Modify parameters
717 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
718 $parameters = array(
719 'conf' => &$conf,
720 'currentBlock' => $v,
721 'url' => $href,
722 'tagCode' => $tagCode,
723 'external' => $external,
724 'error' => $error
725 );
726 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
727 $processor = GeneralUtility::getUserObj($objRef);
728 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
729 }
730 } else {
731 $blockSplit[$k] = $bTag . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
732 }
733 }
734 }
735 // Return content:
736 return implode('', $blockSplit);
737 }
738
739 /**
740 * Preserve special tags
741 *
742 * @param string $value Content input
743 * @return string Content output
744 */
745 public function TS_preserve_db($value)
746 {
747 if (!$this->preserveTags) {
748 return $value;
749 }
750 // Splitting into blocks for processing (span-tags are used for special tags)
751 $blockSplit = $this->splitIntoBlock('span', $value);
752 foreach ($blockSplit as $k => $v) {
753 // Block
754 if ($k % 2) {
755 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v));
756 if ($attribArray['specialtag']) {
757 $theTag = rawurldecode($attribArray['specialtag']);
758 $theTagName = $this->getFirstTagName($theTag);
759 $blockSplit[$k] = $theTag . $this->removeFirstAndLastTag($blockSplit[$k]) . '</' . $theTagName . '>';
760 }
761 }
762 }
763 return implode('', $blockSplit);
764 }
765
766 /**
767 * Preserve special tags
768 *
769 * @param string $value Content input
770 * @return string Content output
771 */
772 public function TS_preserve_rte($value)
773 {
774 if (!$this->preserveTags) {
775 return $value;
776 }
777 $blockSplit = $this->splitIntoBlock($this->preserveTags, $value);
778 foreach ($blockSplit as $k => $v) {
779 // Block
780 if ($k % 2) {
781 $blockSplit[$k] = '<span specialtag="' . rawurlencode($this->getFirstTag($v)) . '">' . $this->removeFirstAndLastTag($blockSplit[$k]) . '</span>';
782 }
783 }
784 return implode('', $blockSplit);
785 }
786
787 /**
788 * Transformation handler: 'css_transform' / direction: "db"
789 * Cleaning (->db) for standard content elements (ts)
790 *
791 * @param string $value Content input
792 * @return string Content output
793 * @see TS_transform_rte()
794 */
795 public function TS_transform_db($value)
796 {
797 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
798 $this->TS_transform_db_safecounter--;
799 if ($this->TS_transform_db_safecounter < 0) {
800 return $value;
801 }
802 // Split the content from RTE by the occurrence of these blocks:
803 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
804 $cc = 0;
805 $aC = count($blockSplit);
806 // Avoid superfluous linebreaks by transform_db after ending headListTag
807 while ($aC && trim($blockSplit[$aC - 1]) === '') {
808 unset($blockSplit[$aC - 1]);
809 $aC = count($blockSplit);
810 }
811 // Traverse the blocks
812 foreach ($blockSplit as $k => $v) {
813 $cc++;
814 $lastBR = $cc == $aC ? '' : LF;
815 if ($k % 2) {
816 // Inside block:
817 // Init:
818 $tag = $this->getFirstTag($v);
819 $tagName = strtolower($this->getFirstTagName($v));
820 // Process based on the tag:
821 switch ($tagName) {
822 case 'blockquote':
823 case 'dd':
824 case 'div':
825 case 'header':
826 case 'section':
827 case 'footer':
828 case 'nav':
829 case 'article':
830 case 'aside':
831 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>' . $lastBR;
832 break;
833 case 'ol':
834 case 'ul':
835 case 'table':
836 $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
837 break;
838 case 'h1':
839 case 'h2':
840 case 'h3':
841 case 'h4':
842 case 'h5':
843 case 'h6':
844 default:
845 // Eliminate true linebreaks inside Hx and other headlist tags
846 $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
847 }
848 } else {
849 // NON-block:
850 if (trim($blockSplit[$k]) !== '') {
851 $blockSplit[$k] = preg_replace('/<hr\\/>/', '<hr />', $blockSplit[$k]);
852 // Remove linebreaks preceding hr tags
853 $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
854 // Remove linebreaks following hr tags
855 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . CR . ']+/', '<$1$2/>', $blockSplit[$k]);
856 // Replace other linebreaks with space
857 $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+/', ' ', $blockSplit[$k]);
858 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]) . $lastBR;
859 $blockSplit[$k] = $this->transformStyledATags($blockSplit[$k]);
860 } else {
861 unset($blockSplit[$k]);
862 }
863 }
864 }
865 $this->TS_transform_db_safecounter++;
866 return implode('', $blockSplit);
867 }
868
869 /**
870 * Wraps a-tags that contain a style attribute with a span-tag
871 *
872 * @param string $value Content input
873 * @return string Content output
874 */
875 public function transformStyledATags($value)
876 {
877 $blockSplit = $this->splitIntoBlock('A', $value);
878 foreach ($blockSplit as $k => $v) {
879 // If an A-tag was found
880 if ($k % 2) {
881 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
882 // If "style" attribute is set and rteerror is not set!
883 if ($attribArray['style'] && !$attribArray['rteerror']) {
884 $attribArray_copy['style'] = $attribArray['style'];
885 unset($attribArray['style']);
886 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, 1) . '><a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
887 $eTag = '</a></span>';
888 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
889 }
890 }
891 }
892 return implode('', $blockSplit);
893 }
894
895 /**
896 * Transformation handler: css_transform / direction: "rte"
897 * Set (->rte) for standard content elements (ts)
898 *
899 * @param string $value Content input
900 * @return string Content output
901 * @see TS_transform_db()
902 */
903 public function TS_transform_rte($value)
904 {
905 // Split the content from database by the occurrence of the block elements
906 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
907 // Traverse the blocks
908 foreach ($blockSplit as $k => $v) {
909 if ($k % 2) {
910 // Inside one of the blocks:
911 // Init:
912 $tag = $this->getFirstTag($v);
913 $tagName = strtolower($this->getFirstTagName($v));
914 // Based on tagname, we do transformations:
915 switch ($tagName) {
916 case 'blockquote':
917 case 'dd':
918 case 'div':
919 case 'header':
920 case 'section':
921 case 'footer':
922 case 'nav':
923 case 'article':
924 case 'aside':
925 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
926 break;
927 }
928 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
929 } else {
930 // NON-block:
931 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
932 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
933 // If the line is followed by a block or is the last line:
934 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
935 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
936 if (!$onlyLineBreaks) {
937 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
938 } else {
939 // If the line contains only linebreaks, remove the leading linebreak
940 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
941 }
942 }
943 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
944 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
945 unset($blockSplit[$k]);
946 } else {
947 $blockSplit[$k] = $this->setDivTags($blockSplit[$k], $this->procOptions['useDIVasParagraphTagForRTE'] ? 'div' : 'p');
948 }
949 }
950 }
951 return implode(LF, $blockSplit);
952 }
953
954 /***************************************************************
955 *
956 * Generic RTE transformation, analysis and helper functions
957 *
958 **************************************************************/
959 /**
960 * Reads the file or url $url and returns the content
961 *
962 * @param string $url Filepath/URL to read
963 * @return string The content from the resource given as input.
964 * @see \TYPO3\CMS\Core\Utility\GeneralUtility::getUrl()
965 */
966 public function getUrl($url)
967 {
968 return GeneralUtility::getUrl($url);
969 }
970
971 /**
972 * Function for cleaning content going into the database.
973 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
974 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
975 *
976 * @param string $content Content to clean up
977 * @param string $tagList Comma list of tags to specifically allow. Default comes from getKeepTags and is
978 * @return string Clean content
979 * @see getKeepTags()
980 */
981 public function HTMLcleaner_db($content, $tagList = '')
982 {
983 if (!$tagList) {
984 $keepTags = $this->getKeepTags('db');
985 } else {
986 $keepTags = $this->getKeepTags('db', $tagList);
987 }
988 // Default: remove unknown tags.
989 $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0;
990 // Default: re-convert literals to characters (that is &lt; to <)
991 $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1;
992 return $this->HTMLcleaner($content, $keepTags, $kUknown, $hSC);
993 }
994
995 /**
996 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
997 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
998 *
999 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
1000 * @param string $tagList Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
1001 * @return array Configuration array
1002 * @see HTMLcleaner_db()
1003 */
1004 public function getKeepTags($direction = 'rte', $tagList = '')
1005 {
1006 if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1007 // Setting up allowed tags:
1008 // If the $tagList input var is set, this will take precedence
1009 if ((string)$tagList !== '') {
1010 $keepTags = array_flip(GeneralUtility::trimExplode(',', $tagList, true));
1011 } else {
1012 // Default is to get allowed/denied tags from internal array of processing options:
1013 // Construct default list of tags to keep:
1014 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1015 $keepTags = array_flip(GeneralUtility::trimExplode(',', $typoScript_list . ',' . strtolower($this->procOptions['allowTags']), true));
1016 // For tags to deny, remove them from $keepTags array:
1017 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
1018 foreach ($denyTags as $dKe) {
1019 unset($keepTags[$dKe]);
1020 }
1021 }
1022 // Based on the direction of content, set further options:
1023 switch ($direction) {
1024 case 'rte':
1025 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1026 // Transform bold/italics tags to strong/em
1027 if (isset($keepTags['b'])) {
1028 $keepTags['b'] = array('remap' => 'STRONG');
1029 }
1030 if (isset($keepTags['i'])) {
1031 $keepTags['i'] = array('remap' => 'EM');
1032 }
1033 }
1034 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1035 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
1036 break;
1037 case 'db':
1038 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1039 // Transform strong/em back to bold/italics:
1040 if (isset($keepTags['strong'])) {
1041 $keepTags['strong'] = array('remap' => 'b');
1042 }
1043 if (isset($keepTags['em'])) {
1044 $keepTags['em'] = array('remap' => 'i');
1045 }
1046 }
1047 // Setting up span tags if they are allowed:
1048 if (isset($keepTags['span'])) {
1049 $keepTags['span'] = array(
1050 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
1051 'fixAttrib' => array(
1052 'class' => array(
1053 'removeIfFalse' => 1
1054 )
1055 ),
1056 'rmTagIfNoAttrib' => 1
1057 );
1058 if (!empty($this->allowedClasses)) {
1059 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
1060 }
1061 }
1062 // Setting further options, getting them from the processiong options:
1063 $TSc = $this->procOptions['HTMLparser_db.'];
1064 if (!$TSc['globalNesting']) {
1065 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1066 }
1067 if (!$TSc['noAttrib']) {
1068 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1069 }
1070 // Transforming the array from TypoScript to regular array:
1071 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
1072 break;
1073 }
1074 // Caching (internally, in object memory) the result unless tagList is set:
1075 if (!$tagList) {
1076 $this->getKeepTags_cache[$direction] = $keepTags;
1077 } else {
1078 return $keepTags;
1079 }
1080 }
1081 // Return result:
1082 return $this->getKeepTags_cache[$direction];
1083 }
1084
1085 /**
1086 * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by LF.
1087 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1088 * The function ->setDivTags does the opposite.
1089 * This function processes content to go into the database.
1090 *
1091 * @param string $value Value to process.
1092 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
1093 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
1094 * @return string Processed input value.
1095 * @see setDivTags()
1096 */
1097 public function divideIntoLines($value, $count = 5, $returnArray = false)
1098 {
1099 // Setting configuration for processing:
1100 $allowTagsOutside = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside'] ? 'hr,' . $this->procOptions['allowTagsOutside'] : 'hr,img'), true);
1101 $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1102 $divSplit = $this->splitIntoBlock('div,p', $value, 1);
1103 // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1104 if ($this->procOptions['keepPDIVattribs']) {
1105 $keepAttribListArr = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
1106 } else {
1107 $keepAttribListArr = array();
1108 }
1109 // Returns plainly the value if there was no div/p sections in it
1110 if (count($divSplit) <= 1 || $count <= 0) {
1111 // Wrap hr tags with LF's
1112 $newValue = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $value);
1113 $newValue = preg_replace('/' . LF . LF . '/i', LF, $newValue);
1114 $newValue = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $newValue);
1115 return $newValue;
1116 }
1117 // Traverse the splitted sections:
1118 foreach ($divSplit as $k => $v) {
1119 if ($k % 2) {
1120 // Inside
1121 $v = $this->removeFirstAndLastTag($v);
1122 // Fetching 'sub-lines' - which will explode any further p/div nesting...
1123 $subLines = $this->divideIntoLines($v, $count - 1, 1);
1124 // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1125 if (is_array($subLines)) {
1126 } else {
1127 //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1128 $subLines = array($subLines);
1129 // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1130 if (!$this->procOptions['dontConvBRtoParagraph']) {
1131 $subLines = preg_split('/<br[[:space:]]*[\\/]?>/i', $v);
1132 }
1133 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1134 foreach ($subLines as $sk => $value) {
1135 // Clear up the subline for DB.
1136 $subLines[$sk] = $this->HTMLcleaner_db($subLines[$sk]);
1137 // Get first tag, attributes etc:
1138 $fTag = $this->getFirstTag($divSplit[$k]);
1139 $tagName = strtolower($this->getFirstTagName($divSplit[$k]));
1140 $attribs = $this->get_tag_attributes($fTag);
1141 // Keep attributes (lowercase)
1142 $newAttribs = array();
1143 if (!empty($keepAttribListArr)) {
1144 foreach ($keepAttribListArr as $keepA) {
1145 if (isset($attribs[0][$keepA])) {
1146 $newAttribs[$keepA] = $attribs[0][$keepA];
1147 }
1148 }
1149 }
1150 // ALIGN attribute:
1151 if (!$this->procOptions['skipAlign'] && trim($attribs[0]['align']) !== '' && strtolower($attribs[0]['align']) != 'left') {
1152 // Set to value, but not 'left'
1153 $newAttribs['align'] = strtolower($attribs[0]['align']);
1154 }
1155 // CLASS attribute:
1156 // Set to whatever value
1157 if (!$this->procOptions['skipClass'] && trim($attribs[0]['class']) !== '') {
1158 if (empty($this->allowedClasses) || in_array($attribs[0]['class'], $this->allowedClasses)) {
1159 $newAttribs['class'] = $attribs[0]['class'];
1160 } else {
1161 $classes = GeneralUtility::trimExplode(' ', $attribs[0]['class'], true);
1162 $newClasses = array();
1163 foreach ($classes as $class) {
1164 if (in_array($class, $this->allowedClasses)) {
1165 $newClasses[] = $class;
1166 }
1167 }
1168 if (!empty($newClasses)) {
1169 $newAttribs['class'] = implode(' ', $newClasses);
1170 }
1171 }
1172 }
1173 // Remove any line break char (10 or 13)
1174 $subLines[$sk] = preg_replace('/' . LF . '|' . CR . '/', '', $subLines[$sk]);
1175 // If there are any attributes or if we are supposed to remap the tag, then do so:
1176 if (!empty($newAttribs) && $remapParagraphTag !== '1') {
1177 if ($remapParagraphTag === 'P') {
1178 $tagName = 'p';
1179 }
1180 if ($remapParagraphTag === 'DIV') {
1181 $tagName = 'div';
1182 }
1183 $subLines[$sk] = '<' . trim($tagName . ' ' . $this->compileTagAttribs($newAttribs)) . '>' . $subLines[$sk] . '</' . $tagName . '>';
1184 }
1185 }
1186 }
1187 // Add the processed line(s)
1188 $divSplit[$k] = implode(LF, $subLines);
1189 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1190 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1191 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1192 if (trim(strip_tags($divSplit[$k])) == '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $divSplit[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1193 $divSplit[$k] = '';
1194 }
1195 } else {
1196 // outside div:
1197 // Remove positions which are outside div/p tags and without content
1198 $divSplit[$k] = trim(strip_tags($divSplit[$k], '<' . implode('><', $allowTagsOutside) . '>'));
1199 // Wrap hr tags with LF's
1200 $divSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $divSplit[$k]);
1201 $divSplit[$k] = preg_replace('/' . LF . LF . '/i', LF, $divSplit[$k]);
1202 $divSplit[$k] = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $divSplit[$k]);
1203 if ((string)$divSplit[$k] === '') {
1204 unset($divSplit[$k]);
1205 }
1206 }
1207 }
1208 // Return value:
1209 return $returnArray ? $divSplit : implode(LF, $divSplit);
1210 }
1211
1212 /**
1213 * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
1214 * For processing of content going FROM database TO RTE.
1215 *
1216 * @param string $value Value to convert
1217 * @param string $dT Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
1218 * @return string Processed value.
1219 * @see divideIntoLines()
1220 */
1221 public function setDivTags($value, $dT = 'p')
1222 {
1223 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1224 $keepTags = $this->getKeepTags('rte');
1225 // Default: remove unknown tags.
1226 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect';
1227 // Default: re-convert literals to characters (that is &lt; to <)
1228 $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1;
1229 $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte'] ? 1 : 0;
1230 // Divide the content into lines, based on LF:
1231 $parts = explode(LF, $value);
1232 foreach ($parts as $k => $v) {
1233 // Processing of line content:
1234 // If the line is blank, set it to &nbsp;
1235 if (trim($parts[$k]) === '') {
1236 $parts[$k] = '&nbsp;';
1237 } else {
1238 // Clean the line content:
1239 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, $kUknown, $hSC);
1240 if ($convNBSP) {
1241 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
1242 }
1243 }
1244 // Wrapping the line in <$dT> if not already wrapped and does not contain an hr tag
1245 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
1246 $testStr = strtolower(trim($parts[$k]));
1247 if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
1248 if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
1249 // Only set p-tags if there is not already div or p tags:
1250 $parts[$k] = '<' . $dT . '>' . $parts[$k] . '</' . $dT . '>';
1251 }
1252 }
1253 }
1254 }
1255 // Implode result:
1256 return implode(LF, $parts);
1257 }
1258
1259 /**
1260 * Returns SiteURL based on thisScript.
1261 *
1262 * @return string Value of GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1263 * @see \TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv()
1264 */
1265 public function siteUrl()
1266 {
1267 return GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1268 }
1269
1270 /**
1271 * Default tag mapping for TS
1272 *
1273 * @param string $code Input code to process
1274 * @param string $direction Direction To databsae (db) or from database to RTE (rte)
1275 * @return string Processed value
1276 */
1277 public function defaultTStagMapping($code, $direction = 'rte')
1278 {
1279 if ($direction == 'db') {
1280 $code = $this->mapTags($code, array(
1281 // Map tags
1282 'strong' => 'b',
1283 'em' => 'i'
1284 ));
1285 }
1286 if ($direction == 'rte') {
1287 $code = $this->mapTags($code, array(
1288 // Map tags
1289 'b' => 'strong',
1290 'i' => 'em'
1291 ));
1292 }
1293 return $code;
1294 }
1295
1296 /**
1297 * Finds width and height from attrib-array
1298 * If the width and height is found in the style-attribute, use that!
1299 *
1300 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1301 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1302 */
1303 public function getWHFromAttribs($attribArray)
1304 {
1305 $style = trim($attribArray['style']);
1306 $w = 0;
1307 $h = 0;
1308 if ($style) {
1309 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1310 // Width
1311 $reg = array();
1312 preg_match('/width' . $regex . '/i', $style, $reg);
1313 $w = (int)$reg[1];
1314 // Height
1315 preg_match('/height' . $regex . '/i', $style, $reg);
1316 $h = (int)$reg[1];
1317 }
1318 if (!$w) {
1319 $w = $attribArray['width'];
1320 }
1321 if (!$h) {
1322 $h = $attribArray['height'];
1323 }
1324 return array((int)$w, (int)$h);
1325 }
1326
1327 /**
1328 * Parse <A>-tag href and return status of email,external,file or page
1329 *
1330 * @param string $url URL to analyse.
1331 * @return array Information in an array about the URL
1332 */
1333 public function urlInfoForLinkTags($url)
1334 {
1335 $info = array();
1336 $url = trim($url);
1337 if (substr(strtolower($url), 0, 7) == 'mailto:') {
1338 $info['url'] = trim(substr($url, 7));
1339 $info['type'] = 'email';
1340 } elseif (strpos($url, '?file:') !== false) {
1341 $info['type'] = 'file';
1342 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1343 } else {
1344 $curURL = $this->siteUrl();
1345 $urlLength = strlen($url);
1346 for ($a = 0; $a < $urlLength; $a++) {
1347 if ($url[$a] != $curURL[$a]) {
1348 break;
1349 }
1350 }
1351 $info['relScriptPath'] = substr($curURL, $a);
1352 $info['relUrl'] = substr($url, $a);
1353 $info['url'] = $url;
1354 $info['type'] = 'ext';
1355 $siteUrl_parts = parse_url($url);
1356 $curUrl_parts = parse_url($curURL);
1357 // Hosts should match
1358 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1359 // If the script path seems to match or is empty (FE-EDIT)
1360 // New processing order 100502
1361 $uP = parse_url($info['relUrl']);
1362 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1363 $info['url'] = $info['relUrl'];
1364 $info['type'] = 'anchor';
1365 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1366 // URL is a page (id parameter)
1367 $pp = preg_split('/^id=/', $uP['query']);
1368 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1369 $parameters = explode('&', $pp[1]);
1370 $id = array_shift($parameters);
1371 if ($id) {
1372 $info['pageid'] = $id;
1373 $info['cElement'] = $uP['fragment'];
1374 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1375 $info['type'] = 'page';
1376 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1377 }
1378 } else {
1379 $info['url'] = $info['relUrl'];
1380 $info['type'] = 'file';
1381 }
1382 } else {
1383 unset($info['relScriptPath']);
1384 unset($info['relUrl']);
1385 }
1386 }
1387 return $info;
1388 }
1389
1390 /**
1391 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1392 *
1393 * @param string $value Content input
1394 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set.
1395 * @return string Content output
1396 */
1397 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1398 {
1399 $blockSplit = $this->splitIntoBlock('A', $value);
1400 foreach ($blockSplit as $k => $v) {
1401 // Block
1402 if ($k % 2) {
1403 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1404 // Checking if there is a scheme, and if not, prepend the current url.
1405 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1406 if ($attribArray['href'] !== '') {
1407 $uP = parse_url(strtolower($attribArray['href']));
1408 if (!$uP['scheme']) {
1409 $attribArray['href'] = $this->siteUrl() . $attribArray['href'];
1410 } elseif ($uP['scheme'] != 'mailto') {
1411 $attribArray['data-htmlarea-external'] = 1;
1412 }
1413 } else {
1414 $attribArray['rtekeep'] = 1;
1415 }
1416 if (!$dontSetRTEKEEP) {
1417 $attribArray['rtekeep'] = 1;
1418 }
1419 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
1420 $eTag = '</a>';
1421 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1422 }
1423 }
1424 return implode('', $blockSplit);
1425 }
1426
1427 /**
1428 * Apply plain image settings to the dimensions of the image
1429 *
1430 * @param array $imageInfo: info array of the image
1431 * @param array $attribArray: array of attributes of an image tag
1432 *
1433 * @return array a modified attributes array
1434 */
1435 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1436 {
1437 if ($this->procOptions['plainImageMode']) {
1438 // Perform corrections to aspect ratio based on configuration
1439 switch ((string)$this->procOptions['plainImageMode']) {
1440 case 'lockDimensions':
1441 $attribArray['width'] = $imageInfo[0];
1442 $attribArray['height'] = $imageInfo[1];
1443 break;
1444 case 'lockRatioWhenSmaller':
1445 if ($attribArray['width'] > $imageInfo[0]) {
1446 $attribArray['width'] = $imageInfo[0];
1447 }
1448 case 'lockRatio':
1449 if ($imageInfo[0] > 0) {
1450 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1451 }
1452 break;
1453 }
1454 }
1455 return $attribArray;
1456 }
1457
1458 /**
1459 * Instantiates a logger
1460 *
1461 * @return \TYPO3\CMS\Core\Log\Logger
1462 */
1463 protected function getLogger()
1464 {
1465 /** @var $logManager LogManager */
1466 $logManager = GeneralUtility::makeInstance(LogManager::class);
1467 return $logManager->getLogger(get_class($this));
1468 }
1469 }