bbcf4f8dbc963090bc7b4197b378cbb1b2b64078
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\LinkHandling\LinkService;
19 use TYPO3\CMS\Core\Log\LogManager;
20 use TYPO3\CMS\Core\Resource;
21 use TYPO3\CMS\Core\Utility\GeneralUtility;
22 use TYPO3\CMS\Core\Utility\MathUtility;
23 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
24
25 /**
26 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
27 *
28 * Concerning line breaks:
29 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
30 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
31 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
32 */
33 class RteHtmlParser extends HtmlParser
34 {
35 /**
36 * List of elements that are not wrapped into a "p" tag while doing the transformation.
37 * @var string
38 */
39 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
40
41 /**
42 * List of all tags that are allowed by default
43 * @var string
44 */
45 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
46
47 /**
48 * Set this to the pid of the record manipulated by the class.
49 *
50 * @var int
51 */
52 public $recPid = 0;
53
54 /**
55 * Element reference [table]:[field], eg. "tt_content:bodytext"
56 *
57 * @var string
58 */
59 public $elRef = '';
60
61 /**
62 * Current Page TSConfig
63 *
64 * @var array
65 */
66 public $tsConfig = [];
67
68 /**
69 * Set to the TSconfig options coming from Page TSconfig
70 *
71 * @var array
72 */
73 public $procOptions = [];
74
75 /**
76 * Run-away brake for recursive calls.
77 *
78 * @var int
79 */
80 public $TS_transform_db_safecounter = 100;
81
82 /**
83 * Data caching for processing function
84 *
85 * @var array
86 */
87 public $getKeepTags_cache = [];
88
89 /**
90 * Storage of the allowed CSS class names in the RTE
91 *
92 * @var array
93 */
94 public $allowedClasses = [];
95
96 /**
97 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
98 * they have a special place for configuration via 'proc.keepPDIVattribs'
99 *
100 * @var array
101 */
102 protected $allowedAttributesForParagraphTags = [
103 'class',
104 'align',
105 'id',
106 'title',
107 'dir',
108 'lang',
109 'xml:lang',
110 'itemscope',
111 'itemtype',
112 'itemprop'
113 ];
114
115 /**
116 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
117 * plus some special tags like <hr> and <img> (if images are allowed).
118 * Completely overrideable via 'proc.allowTagsOutside'
119 *
120 * @var array
121 */
122 protected $allowedTagsOutsideOfParagraphs = [
123 'address',
124 'article',
125 'aside',
126 'blockquote',
127 'div',
128 'footer',
129 'header',
130 'hr',
131 'nav',
132 'section'
133 ];
134
135 /**
136 * Initialize, setting element reference and record PID
137 *
138 * @param string $elRef Element reference, eg "tt_content:bodytext
139 * @param int $recPid PID of the record (page id)
140 * @return void
141 */
142 public function init($elRef = '', $recPid = 0)
143 {
144 $this->recPid = $recPid;
145 $this->elRef = $elRef;
146 }
147
148 /**********************************************
149 *
150 * Main function
151 *
152 **********************************************/
153 /**
154 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
155 * This is the main function called from DataHandler and transfer data classes
156 *
157 * @param string $value Input value
158 * @param array $specConf Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
159 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
160 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
161 * @return string Output value
162 */
163 public function RTE_transform($value, $specConf, $direction = 'rte', $thisConfig = [])
164 {
165 $this->tsConfig = $thisConfig;
166 $this->procOptions = (array)$thisConfig['proc.'];
167 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
168
169 // Dynamic configuration of blockElementList
170 if ($this->procOptions['blockElementList']) {
171 $this->blockElementList = $this->procOptions['blockElementList'];
172 }
173
174 // Define which attributes are allowed on <p> tags
175 if (isset($this->procOptions['keepPDIVattribs'])) {
176 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
177 }
178 // Override tags which are allowed outside of <p> tags
179 if (isset($this->procOptions['allowTagsOutside'])) {
180 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
181 }
182
183 // Setting modes / transformations to be called
184 if ((string)$this->procOptions['overruleMode'] !== '') {
185 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
186 } else {
187 // Get parameters for rte_transformation:
188 $specialFieldConfiguration = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
189 $modes = GeneralUtility::trimExplode('-', $specialFieldConfiguration['mode']);
190 }
191 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
192
193 $value = $this->streamlineLineBreaksForProcessing($value);
194
195 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
196 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
197
198 // Traverse modes
199 foreach ($modes as $cmd) {
200 if ($direction == 'db') {
201 // Checking for user defined transformation:
202 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
203 $_procObj = GeneralUtility::getUserObj($_classRef);
204 $_procObj->pObj = $this;
205 $_procObj->transformationKey = $cmd;
206 $value = $_procObj->transform_db($value, $this);
207 } else {
208 // ... else use defaults:
209 switch ($cmd) {
210 case 'detectbrokenlinks':
211 $value = $this->removeBrokenLinkMarkers($value);
212 break;
213 case 'ts_images':
214 $value = $this->TS_images_db($value);
215 break;
216 case 'ts_links':
217 $value = $this->TS_links_db($value);
218 break;
219 case 'css_transform':
220 // Transform empty paragraphs into spacing paragraphs
221 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
222 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
223 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
224 $value = $this->TS_transform_db($value);
225 break;
226 default:
227 // Do nothing
228 }
229 }
230 } elseif ($direction == 'rte') {
231 // Checking for user defined transformation:
232 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
233 $_procObj = GeneralUtility::getUserObj($_classRef);
234 $_procObj->pObj = $this;
235 $value = $_procObj->transform_rte($value, $this);
236 } else {
237 // ... else use defaults:
238 switch ($cmd) {
239 case 'detectbrokenlinks':
240 $value = $this->markBrokenLinks($value);
241 break;
242 case 'ts_images':
243 $value = $this->TS_images_rte($value);
244 break;
245 case 'ts_links':
246 $value = $this->TS_links_rte($value);
247 break;
248 case 'css_transform':
249 $value = $this->TS_transform_rte($value);
250 break;
251 default:
252 // Do nothing
253 }
254 }
255 }
256 }
257
258 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
259 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
260
261 // Final clean up of linebreaks
262 $value = $this->streamlineLineBreaksAfterProcessing($value);
263
264 return $value;
265 }
266
267 /**
268 * Ensures what transformation modes should be executed, and that they are only executed once.
269 *
270 * @param string $direction
271 * @param array $modes
272 * @return array the resolved transformation modes
273 */
274 protected function resolveAppliedTransformationModes(string $direction, array $modes)
275 {
276 $modeList = implode(',', $modes);
277
278 // Replace the shortcut "ts_css" with all custom modes
279 $modeList = str_replace('ts_css', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
280
281 // Make list unique
282 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
283 // Reverse order if direction is "rte"
284 if ($direction === 'rte') {
285 $modes = array_reverse($modes);
286 }
287
288 return $modes;
289 }
290
291 /**
292 * Runs the HTML parser if it is configured
293 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
294 * is done and thus totally independent processing options you can set up.
295 *
296 * This is only possible via TSconfig (procOptions) currently.
297 *
298 * @param string $content
299 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
300 * @return string the processed content
301 */
302 protected function runHtmlParserIfConfigured($content, $configurationDirective)
303 {
304 if ($this->procOptions[$configurationDirective]) {
305 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
306 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
307 }
308 return $content;
309 }
310
311 /************************************
312 *
313 * Specific RTE TRANSFORMATION functions
314 *
315 *************************************/
316 /**
317 * Transformation handler: 'ts_images' / direction: "db"
318 * Processing images inserted in the RTE.
319 * This is used when content goes from the RTE to the database.
320 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
321 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
322 * Also "magic" images are processed here.
323 *
324 * @param string $value The content from RTE going to Database
325 * @return string Processed content
326 */
327 public function TS_images_db($value)
328 {
329 // Split content by <img> tags and traverse the resulting array for processing:
330 $imgSplit = $this->splitTags('img', $value);
331 if (count($imgSplit) > 1) {
332 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
333 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
334 /** @var $resourceFactory Resource\ResourceFactory */
335 $resourceFactory = Resource\ResourceFactory::getInstance();
336 /** @var $magicImageService Resource\Service\MagicImageService */
337 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
338 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
339 foreach ($imgSplit as $k => $v) {
340 // Image found, do processing:
341 if ($k % 2) {
342 // Get attributes
343 list($attribArray) = $this->get_tag_attributes($v, true);
344 // It's always an absolute URL coming from the RTE into the Database.
345 $absoluteUrl = trim($attribArray['src']);
346 // Make path absolute if it is relative and we have a site path which is not '/'
347 $pI = pathinfo($absoluteUrl);
348 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
349 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
350 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
351 $absoluteUrl = $siteUrl . $absoluteUrl;
352 }
353 // Image dimensions set in the img tag, if any
354 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
355 if ($imgTagDimensions[0]) {
356 $attribArray['width'] = $imgTagDimensions[0];
357 }
358 if ($imgTagDimensions[1]) {
359 $attribArray['height'] = $imgTagDimensions[1];
360 }
361 $originalImageFile = null;
362 if ($attribArray['data-htmlarea-file-uid']) {
363 // An original image file uid is available
364 try {
365 /** @var $originalImageFile Resource\File */
366 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
367 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
368 // Log the fact the file could not be retrieved.
369 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
370 $this->getLogger()->error($message);
371 }
372 }
373 if ($originalImageFile instanceof Resource\File) {
374 // Public url of local file is relative to the site url, absolute otherwise
375 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
376 // This is a plain image, i.e. reference to the original image
377 if ($this->procOptions['plainImageMode']) {
378 // "plain image mode" is configured
379 // Find the dimensions of the original image
380 $imageInfo = [
381 $originalImageFile->getProperty('width'),
382 $originalImageFile->getProperty('height')
383 ];
384 if (!$imageInfo[0] || !$imageInfo[1]) {
385 $filePath = $originalImageFile->getForLocalProcessing(false);
386 $imageInfo = @getimagesize($filePath);
387 }
388 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
389 }
390 } else {
391 // Magic image case: get a processed file with the requested configuration
392 $imageConfiguration = [
393 'width' => $imgTagDimensions[0],
394 'height' => $imgTagDimensions[1]
395 ];
396 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
397 $attribArray['width'] = $magicImage->getProperty('width');
398 $attribArray['height'] = $magicImage->getProperty('height');
399 $attribArray['src'] = $magicImage->getPublicUrl();
400 }
401 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
402 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
403 // Fetch the external image
404 $externalFile = GeneralUtility::getUrl($absoluteUrl);
405 if ($externalFile) {
406 $pU = parse_url($absoluteUrl);
407 $pI = pathinfo($pU['path']);
408 $extension = strtolower($pI['extension']);
409 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
410 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
411 // We insert this image into the user default upload folder
412 list($table, $field) = explode(':', $this->elRef);
413 /** @var Resource\Folder $folder */
414 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
415 /** @var Resource\File $fileObject */
416 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
417 $imageConfiguration = [
418 'width' => $attribArray['width'],
419 'height' => $attribArray['height']
420 ];
421 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
422 $attribArray['width'] = $magicImage->getProperty('width');
423 $attribArray['height'] = $magicImage->getProperty('height');
424 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
425 $attribArray['src'] = $magicImage->getPublicUrl();
426 }
427 }
428 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
429 // Finally, check image as local file (siteURL equals the one of the image)
430 // Image has no data-htmlarea-file-uid attribute
431 // Relative path, rawurldecoded for special characters.
432 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
433 // Absolute filepath, locked to relative path of this project
434 $filepath = GeneralUtility::getFileAbsFileName($path);
435 // Check file existence (in relative directory to this installation!)
436 if ($filepath && @is_file($filepath)) {
437 // Treat it as a plain image
438 if ($this->procOptions['plainImageMode']) {
439 // If "plain image mode" has been configured
440 // Find the original dimensions of the image
441 $imageInfo = @getimagesize($filepath);
442 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
443 }
444 // Let's try to find a file uid for this image
445 try {
446 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
447 if ($fileOrFolderObject instanceof Resource\FileInterface) {
448 $fileIdentifier = $fileOrFolderObject->getIdentifier();
449 /** @var Resource\AbstractFile $fileObject */
450 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
451 // @todo if the retrieved file is a processed file, get the original file...
452 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
453 }
454 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
455 // Nothing to be done if file/folder not found
456 }
457 }
458 }
459 // Remove width and height from style attribute
460 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
461 // Must have alt attribute
462 if (!isset($attribArray['alt'])) {
463 $attribArray['alt'] = '';
464 }
465 // Convert absolute to relative url
466 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
467 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
468 }
469 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
470 }
471 }
472 }
473 return implode('', $imgSplit);
474 }
475
476 /**
477 * Transformation handler: 'ts_images' / direction: "rte"
478 * Processing images from database content going into the RTE.
479 * Processing includes converting the src attribute to an absolute URL.
480 *
481 * @param string $value Content input
482 * @return string Content output
483 */
484 public function TS_images_rte($value)
485 {
486 // Split content by <img> tags and traverse the resulting array for processing:
487 $imgSplit = $this->splitTags('img', $value);
488 if (count($imgSplit) > 1) {
489 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
490 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
491 foreach ($imgSplit as $k => $v) {
492 // Image found
493 if ($k % 2) {
494 // Get the attributes of the img tag
495 list($attribArray) = $this->get_tag_attributes($v, true);
496 $absoluteUrl = trim($attribArray['src']);
497 // Transform the src attribute into an absolute url, if it not already
498 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
499 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
500 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
501 $attribArray['src'] = $siteUrl . $attribArray['src'];
502 }
503 // Must have alt attribute
504 if (!isset($attribArray['alt'])) {
505 $attribArray['alt'] = '';
506 }
507 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
508 }
509 }
510 }
511 // Return processed content:
512 return implode('', $imgSplit);
513 }
514
515 /**
516 * Transformation handler: 'ts_links' / direction: "db"
517 * Converting <A>-tags to <link tags>
518 *
519 * @param string $value Content input
520 * @return string Content output
521 * @see TS_links_rte()
522 */
523 public function TS_links_db($value)
524 {
525 $conf = [];
526 // Split content into <a> tag blocks and process:
527 $blockSplit = $this->splitIntoBlock('A', $value);
528 foreach ($blockSplit as $k => $v) {
529 // If an A-tag was found:
530 if ($k % 2) {
531 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
532 $info = $this->urlInfoForLinkTags($attribArray['href']);
533 // Check options:
534 $attribArray_copy = $attribArray;
535 unset($attribArray_copy['href']);
536 unset($attribArray_copy['target']);
537 unset($attribArray_copy['class']);
538 unset($attribArray_copy['title']);
539 unset($attribArray_copy['data-htmlarea-external']);
540 // Remove additional parameters
541 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'])) {
542 $parameters = [
543 'conf' => &$conf,
544 'aTagParams' => &$attribArray_copy
545 ];
546 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'] as $objRef) {
547 $processor = GeneralUtility::getUserObj($objRef);
548 $attribArray_copy = $processor->removeParams($parameters, $this);
549 }
550 }
551 // Only if href, target, class and tile are the only attributes, we can alter the link!
552 if (empty($attribArray_copy)) {
553 // Quoting class and title attributes if they contain spaces
554 $attribArray['class'] = preg_match('/ /', $attribArray['class']) ? '"' . $attribArray['class'] . '"' : $attribArray['class'];
555 $attribArray['title'] = preg_match('/ /', $attribArray['title']) ? '"' . $attribArray['title'] . '"' : $attribArray['title'];
556 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
557 // If data-htmlarea-external attribute is set, keep the href unchanged
558 if ($attribArray['data-htmlarea-external']) {
559 $href = $attribArray['href'];
560 } else {
561 $href = $info['url'] . ($info['query'] ? ',0,' . $info['query'] : '');
562 }
563 $typoLink = GeneralUtility::makeInstance(TypoLinkCodecService::class)->encode(['url' => $href, 'target' => $attribArray['target'], 'class' => trim($attribArray['class'], '"'), 'title' => trim($attribArray['title'], '"'), 'additionalParams' => '']);
564 $bTag = '<link ' . $typoLink . '>';
565 $eTag = '</link>';
566 // Modify parameters
567 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
568 $parameters = [
569 'conf' => &$conf,
570 'currentBlock' => $v,
571 'url' => $href,
572 'attributes' => $attribArray
573 ];
574 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
575 $processor = GeneralUtility::getUserObj($objRef);
576 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
577 }
578 } else {
579 $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
580 }
581 } else {
582 // ... otherwise store the link as a-tag.
583 // Unsetting 'rtekeep' attribute if that had been set.
584 unset($attribArray['rtekeep']);
585 if (!$attribArray['data-htmlarea-external']) {
586 $siteURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
587 // If the url is local, remove url-prefix
588 if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
589 $attribArray['href'] = substr($attribArray['href'], strlen($siteURL));
590 }
591 // Check for FAL link-handler keyword
592 list($linkHandlerKeyword, $linkHandlerValue) = explode(':', $attribArray['href'], 2);
593 if ($linkHandlerKeyword === '?file') {
594 try {
595 $fileOrFolderObject = Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject(rawurldecode($linkHandlerValue));
596 if ($fileOrFolderObject instanceof Resource\FileInterface || $fileOrFolderObject instanceof Resource\Folder) {
597 $attribArray['href'] = $fileOrFolderObject->getPublicUrl();
598 }
599 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
600 // The identifier inserted in the RTE is already gone...
601 }
602 }
603 }
604 unset($attribArray['data-htmlarea-external']);
605 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
606 $eTag = '</a>';
607 $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
608 }
609 }
610 }
611 return implode('', $blockSplit);
612 }
613
614 /**
615 * Transformation handler: 'ts_links' / direction: "rte"
616 * Converting <link tags> to <A>-tags
617 *
618 * @param string $value Content input
619 * @return string Content output
620 * @see TS_links_rte()
621 */
622 public function TS_links_rte($value)
623 {
624 $conf = [];
625 $value = $this->TS_AtagToAbs($value);
626 // Split content by the TYPO3 pseudo tag "<link>":
627 $blockSplit = $this->splitIntoBlock('link', $value, 1);
628 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
629 foreach ($blockSplit as $k => $v) {
630 $error = '';
631 $external = false;
632 // Block
633 if ($k % 2) {
634 // split away the first "<link" part
635 $typolink = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
636 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typolink);
637
638 $link_param = $tagCode['url'];
639 // Parsing the typolink data. This parsing is roughly done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
640 // Parse URL:
641 $pU = parse_url($link_param);
642 if (strstr($link_param, '@') && (!$pU['scheme'] || $pU['scheme'] == 'mailto')) {
643 // mailadr
644 $href = 'mailto:' . preg_replace('/^mailto:/i', '', $link_param);
645 } elseif ($link_param[0] === '#') {
646 // check if anchor
647 $href = $siteUrl . $link_param;
648 } else {
649 // Check for FAL link-handler keyword:
650 list($linkHandlerKeyword, $linkHandlerValue) = explode(':', trim($link_param), 2);
651 if ($linkHandlerKeyword === 'file' && strpos($link_param, 'file://') !== 0) {
652 $href = $siteUrl . '?' . $linkHandlerKeyword . ':' . rawurlencode($linkHandlerValue);
653 } else {
654 $fileChar = (int)strpos($link_param, '/');
655 $urlChar = (int)strpos($link_param, '.');
656 // Detects if a file is found in site-root.
657 list($rootFileDat) = explode('?', $link_param);
658 $rFD_fI = pathinfo($rootFileDat);
659 $fileExtension = strtolower($rFD_fI['extension']);
660 if (strpos($link_param, '/') === false && trim($rootFileDat) && (@is_file(PATH_site . $rootFileDat) || $fileExtension === 'php' || $fileExtension === 'html' || $fileExtension === 'htm')) {
661 $href = $siteUrl . $link_param;
662 } elseif (
663 (
664 $pU['scheme']
665 && !isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][$pU['scheme']])
666 )
667 || $urlChar && (!$fileChar || $urlChar < $fileChar)
668 ) {
669 // url (external): if has scheme or if a '.' comes before a '/'.
670 $href = $link_param;
671 if (!$pU['scheme']) {
672 $href = 'http://' . $href;
673 }
674 $external = true;
675 } elseif ($fileChar) {
676 // It is an internal file or folder
677 // Try to transform the href into a FAL reference
678 try {
679 $fileOrFolderObject = Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject($link_param);
680 } catch (Resource\Exception $exception) {
681 // Nothing to be done if file/folder not found or path invalid
682 $fileOrFolderObject = null;
683 }
684 if ($fileOrFolderObject instanceof Resource\Folder) {
685 // It's a folder
686 $folderIdentifier = $fileOrFolderObject->getIdentifier();
687 $href = $siteUrl . '?file:' . rawurlencode($folderIdentifier);
688 } elseif ($fileOrFolderObject instanceof Resource\FileInterface) {
689 // It's a file
690 $fileIdentifier = $fileOrFolderObject->getIdentifier();
691 /** @var Resource\File $fileObject */
692 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
693 $href = $siteUrl . '?file:' . $fileObject->getUid();
694 } else {
695 $href = $siteUrl . $link_param;
696 }
697 } else {
698 // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
699 // Splitting the parameter by ',' and if the array counts more than 1 element it's an id/type/parameters triplet
700 $pairParts = GeneralUtility::trimExplode(',', $link_param, true);
701 $idPart = $pairParts[0];
702 $link_params_parts = explode('#', $idPart);
703 $idPart = trim($link_params_parts[0]);
704 $sectionMark = trim($link_params_parts[1]);
705 if ((string)$idPart === '') {
706 $idPart = $this->recPid;
707 }
708 // If no id or alias is given, set it to class record pid
709 // Checking if the id-parameter is an alias.
710 if (!MathUtility::canBeInterpretedAsInteger($idPart)) {
711 list($idPartR) = BackendUtility::getRecordsByField('pages', 'alias', $idPart);
712 $idPart = (int)$idPartR['uid'];
713 }
714 $page = BackendUtility::getRecord('pages', $idPart);
715 if (is_array($page)) {
716 // Page must exist...
717 $href = $siteUrl . '?id=' . $idPart . ($pairParts[2] ? $pairParts[2] : '') . ($sectionMark ? '#' . $sectionMark : '');
718 } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][array_shift(explode(':', $link_param))])) {
719 $href = $link_param;
720 } else {
721 $href = $siteUrl . '?id=' . $link_param;
722 $error = 'No page found: ' . $idPart;
723 }
724 }
725 }
726 }
727 // Setting the A-tag:
728 $bTag = '<a href="' . htmlspecialchars($href) . '"'
729 . ($tagCode['target'] ? ' target="' . htmlspecialchars($tagCode['target']) . '"' : '')
730 . ($tagCode['class'] ? ' class="' . htmlspecialchars($tagCode['class']) . '"' : '')
731 . ($tagCode['title'] ? ' title="' . htmlspecialchars($tagCode['title']) . '"' : '')
732 . ($external ? ' data-htmlarea-external="1"' : '') . '>';
733 $eTag = '</a>';
734 // Modify parameters
735 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
736 $parameters = [
737 'conf' => &$conf,
738 'currentBlock' => $v,
739 'url' => $href,
740 'tagCode' => $tagCode,
741 'external' => $external,
742 'error' => $error
743 ];
744 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
745 $processor = GeneralUtility::getUserObj($objRef);
746 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
747 }
748 } else {
749 $blockSplit[$k] = $bTag . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
750 }
751 }
752 }
753 // Return content:
754 return implode('', $blockSplit);
755 }
756
757 /**
758 * Transformation handler: 'css_transform' / direction: "db"
759 * Cleaning (->db) for standard content elements (ts)
760 *
761 * @param string $value Content input
762 * @return string Content output
763 * @see TS_transform_rte()
764 */
765 public function TS_transform_db($value)
766 {
767 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
768 $this->TS_transform_db_safecounter--;
769 if ($this->TS_transform_db_safecounter < 0) {
770 return $value;
771 }
772 // Split the content from RTE by the occurrence of these blocks:
773 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
774
775 // Avoid superfluous linebreaks by transform_db after ending headListTag
776 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
777 array_pop($blockSplit);
778 }
779
780 // Traverse the blocks
781 foreach ($blockSplit as $k => $v) {
782 if ($k % 2) {
783 // Inside block:
784 // Init:
785 $tag = $this->getFirstTag($v);
786 $tagName = strtolower($this->getFirstTagName($v));
787 // Process based on the tag:
788 switch ($tagName) {
789 case 'blockquote':
790 case 'dd':
791 case 'div':
792 case 'header':
793 case 'section':
794 case 'footer':
795 case 'nav':
796 case 'article':
797 case 'aside':
798 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
799 break;
800 case 'pre':
801 break;
802 default:
803 // usually <hx> tags and <table> tags where no other block elements are within the tags
804 // Eliminate true linebreaks inside block element tags
805 $blockSplit[$k] = preg_replace(('/[' . LF . ']+/'), ' ', $blockSplit[$k]);
806 }
807 } else {
808 // NON-block:
809 if (trim($blockSplit[$k]) !== '') {
810 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
811 // Remove linebreaks preceding hr tags
812 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
813 // Remove linebreaks following hr tags
814 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
815 // Replace other linebreaks with space
816 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
817 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
818 } else {
819 unset($blockSplit[$k]);
820 }
821 }
822 }
823 $this->TS_transform_db_safecounter++;
824 return implode(LF, $blockSplit);
825 }
826
827 /**
828 * Wraps a-tags that contain a style attribute with a span-tag
829 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
830 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
831 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
832 * allowed.
833 *
834 * @param string $value Content input
835 * @return string Content output
836 */
837 public function transformStyledATags($value)
838 {
839 $blockSplit = $this->splitIntoBlock('A', $value);
840 foreach ($blockSplit as $k => $v) {
841 // If an A-tag was found
842 if ($k % 2) {
843 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
844 // If "style" attribute is set and rteerror is not set!
845 if ($attribArray['style'] && !$attribArray['rteerror']) {
846 $attribArray_copy['style'] = $attribArray['style'];
847 unset($attribArray['style']);
848 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
849 $eTag = '</a></span>';
850 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
851 }
852 }
853 }
854 return implode('', $blockSplit);
855 }
856
857 /**
858 * Transformation handler: css_transform / direction: "rte"
859 * Set (->rte) for standard content elements (ts)
860 *
861 * @param string $value Content input
862 * @return string Content output
863 * @see TS_transform_db()
864 */
865 public function TS_transform_rte($value)
866 {
867 // Split the content from database by the occurrence of the block elements
868 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
869 // Traverse the blocks
870 foreach ($blockSplit as $k => $v) {
871 if ($k % 2) {
872 // Inside one of the blocks:
873 // Init:
874 $tag = $this->getFirstTag($v);
875 $tagName = strtolower($this->getFirstTagName($v));
876 // Based on tagname, we do transformations:
877 switch ($tagName) {
878 case 'blockquote':
879 case 'dd':
880 case 'div':
881 case 'header':
882 case 'section':
883 case 'footer':
884 case 'nav':
885 case 'article':
886 case 'aside':
887 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
888 break;
889 }
890 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
891 } else {
892 // NON-block:
893 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
894 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
895 // If the line is followed by a block or is the last line:
896 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
897 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
898 if (!$onlyLineBreaks) {
899 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
900 } else {
901 // If the line contains only linebreaks, remove the leading linebreak
902 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
903 }
904 }
905 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
906 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
907 unset($blockSplit[$k]);
908 } else {
909 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
910 }
911 }
912 }
913 return implode(LF, $blockSplit);
914 }
915
916 /***************************************************************
917 *
918 * Generic RTE transformation, analysis and helper functions
919 *
920 **************************************************************/
921
922 /**
923 * Function for cleaning content going into the database.
924 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
925 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
926 *
927 * @param string $content Content to clean up
928 * @return string Clean content
929 * @see getKeepTags()
930 */
931 public function HTMLcleaner_db($content)
932 {
933 $keepTags = $this->getKeepTags('db');
934 // Default: remove unknown tags.
935 $keepUnknownTags = (bool)$this->procOptions['dontRemoveUnknownTags_db'];
936 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
937 }
938
939 /**
940 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
941 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
942 *
943 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
944 * @return array Configuration array
945 * @see HTMLcleaner_db()
946 */
947 public function getKeepTags($direction = 'rte')
948 {
949 if (!is_array($this->getKeepTags_cache[$direction])) {
950 // Setting up allowed tags:
951 // Default is to get allowed/denied tags from internal array of processing options:
952 // Construct default list of tags to keep:
953 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($this->procOptions['allowTags']), true));
954 // For tags to deny, remove them from $keepTags array:
955 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
956 foreach ($denyTags as $dKe) {
957 unset($keepTags[$dKe]);
958 }
959 // Based on the direction of content, set further options:
960 switch ($direction) {
961 case 'rte':
962 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
963 // Transform bold/italics tags to strong/em
964 if (isset($keepTags['b'])) {
965 $keepTags['b'] = ['remap' => 'STRONG'];
966 }
967 if (isset($keepTags['i'])) {
968 $keepTags['i'] = ['remap' => 'EM'];
969 }
970 }
971 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
972 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
973 break;
974 case 'db':
975 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
976 // Transform strong/em back to bold/italics:
977 if (isset($keepTags['strong'])) {
978 $keepTags['strong'] = ['remap' => 'b'];
979 }
980 if (isset($keepTags['em'])) {
981 $keepTags['em'] = ['remap' => 'i'];
982 }
983 }
984 // Setting up span tags if they are allowed:
985 if (isset($keepTags['span'])) {
986 $keepTags['span'] = [
987 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
988 'fixAttrib' => [
989 'class' => [
990 'removeIfFalse' => 1
991 ]
992 ],
993 'rmTagIfNoAttrib' => 1
994 ];
995 if (!empty($this->allowedClasses)) {
996 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
997 }
998 }
999 // Setting further options, getting them from the processiong options:
1000 $TSc = $this->procOptions['HTMLparser_db.'];
1001 if (!$TSc['globalNesting']) {
1002 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1003 }
1004 if (!$TSc['noAttrib']) {
1005 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1006 }
1007 // Transforming the array from TypoScript to regular array:
1008 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
1009 break;
1010 }
1011 // Caching (internally, in object memory) the result unless tagList is set:
1012 $this->getKeepTags_cache[$direction] = $keepTags;
1013 }
1014 // Return result:
1015 return $this->getKeepTags_cache[$direction];
1016 }
1017
1018 /**
1019 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
1020 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
1021 * The function ->setDivTags does the opposite.
1022 * This function processes content to go into the database.
1023 *
1024 * @param string $value Value to process.
1025 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
1026 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
1027 * @return string Processed input value.
1028 * @see setDivTags()
1029 */
1030 public function divideIntoLines($value, $count = 5, $returnArray = false)
1031 {
1032 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
1033 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
1034 // Returns plainly the content if there was no p sections in it
1035 if (count($paragraphBlocks) <= 1 || $count <= 0) {
1036 return $this->sanitizeLineBreaksForContentOnly($value);
1037 }
1038
1039 // Traverse the splitted sections
1040 foreach ($paragraphBlocks as $k => $v) {
1041 if ($k % 2) {
1042 // Inside a <p> section
1043 $v = $this->removeFirstAndLastTag($v);
1044 // Fetching 'sub-lines' - which will explode any further p nesting recursively
1045 $subLines = $this->divideIntoLines($v, $count - 1, true);
1046 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
1047 if (is_array($subLines)) {
1048 $paragraphBlocks[$k] = implode(LF, $subLines);
1049 } else {
1050 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
1051 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
1052 }
1053 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1054 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1055 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1056 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
1057 $paragraphBlocks[$k] = '';
1058 }
1059 } else {
1060 // Outside a paragraph, if there is still something in there, just add a <p> tag
1061 // Remove positions which are outside <p> tags and without content
1062 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
1063 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
1064 if ((string)$paragraphBlocks[$k] === '') {
1065 unset($paragraphBlocks[$k]);
1066 } else {
1067 // add <p> tags around the content
1068 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
1069 }
1070 }
1071 }
1072 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
1073 }
1074
1075 /**
1076 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
1077 * For processing of content going FROM database TO RTE.
1078 *
1079 * @param string $value Value to convert
1080 * @return string Processed value.
1081 * @see divideIntoLines()
1082 */
1083 public function setDivTags($value)
1084 {
1085 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1086 $keepTags = $this->getKeepTags('rte');
1087 // Divide the content into lines
1088 $parts = explode(LF, $value);
1089 foreach ($parts as $k => $v) {
1090 // Processing of line content:
1091 // If the line is blank, set it to &nbsp;
1092 if (trim($parts[$k]) === '') {
1093 $parts[$k] = '&nbsp;';
1094 } else {
1095 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
1096 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
1097 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
1098 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
1099 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
1100 }
1101 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
1102 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
1103 $testStr = strtolower(trim($parts[$k]));
1104 if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
1105 if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
1106 // Only set p-tags if there is not already div or p tags:
1107 $parts[$k] = '<p>' . $parts[$k] . '</p>';
1108 }
1109 }
1110 }
1111 }
1112 // Implode result:
1113 return implode(LF, $parts);
1114 }
1115
1116 /**
1117 * Used for transformation from RTE to DB
1118 *
1119 * Works on a single line within a <p> tag when storing into the database
1120 * This always adds <p> tags and validates the arguments,
1121 * additionally the content is cleaned up via the HTMLcleaner.
1122 *
1123 * @param string $content the content within the <p> tag
1124 * @param string $fullContentWithTag the whole <p> tag surrounded as well
1125 *
1126 * @return string the full <p> tag with cleaned content
1127 */
1128 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
1129 {
1130 // clean up the content
1131 $content = $this->HTMLcleaner_db($content);
1132 // Get the <p> tag, and validate the attributes
1133 $fTag = $this->getFirstTag($fullContentWithTag);
1134 // Check which attributes of the <p> tag to keep attributes
1135 if (!empty($this->allowedAttributesForParagraphTags)) {
1136 list($tagAttributes) = $this->get_tag_attributes($fTag);
1137 // Make sure the tag attributes only contain the ones that are defined to be allowed
1138 $tagAttributes = array_intersect_key($tagAttributes, $this->allowedAttributesForParagraphTags);
1139
1140 // Only allow classes that are whitelisted in $this->allowedClasses
1141 if (trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1142 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1143 $classes = array_intersect($classes, $this->allowedClasses);
1144 if (!empty($classes)) {
1145 $tagAttributes['class'] = implode(' ', $classes);
1146 } else {
1147 unset($tagAttributes['class']);
1148 }
1149 }
1150 } else {
1151 $tagAttributes = [];
1152 }
1153 // Remove any line break
1154 $content = str_replace(LF, '', $content);
1155 // Compile the surrounding <p> tag
1156 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1157 return $content;
1158 }
1159
1160 /**
1161 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1162 *
1163 * @param string $content
1164 * @return string the modified content
1165 */
1166 protected function sanitizeLineBreaksForContentOnly(string $content)
1167 {
1168 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1169 $content = str_replace(LF . LF, LF, $content);
1170 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1171 return $content;
1172 }
1173
1174 /**
1175 * Finds width and height from attrib-array
1176 * If the width and height is found in the style-attribute, use that!
1177 *
1178 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1179 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1180 */
1181 public function getWHFromAttribs($attribArray)
1182 {
1183 $style = trim($attribArray['style']);
1184 $w = 0;
1185 $h = 0;
1186 if ($style) {
1187 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1188 // Width
1189 $reg = [];
1190 preg_match('/width' . $regex . '/i', $style, $reg);
1191 $w = (int)$reg[1];
1192 // Height
1193 preg_match('/height' . $regex . '/i', $style, $reg);
1194 $h = (int)$reg[1];
1195 }
1196 if (!$w) {
1197 $w = $attribArray['width'];
1198 }
1199 if (!$h) {
1200 $h = $attribArray['height'];
1201 }
1202 return [(int)$w, (int)$h];
1203 }
1204
1205 /**
1206 * Parse <A>-tag href and return status of email,external,file or page
1207 *
1208 * @param string $url URL to analyse.
1209 * @return array Information in an array about the URL
1210 */
1211 public function urlInfoForLinkTags($url)
1212 {
1213 $info = [];
1214 $url = trim($url);
1215 if (substr(strtolower($url), 0, 7) == 'mailto:') {
1216 $info['url'] = trim(substr($url, 7));
1217 $info['type'] = 'email';
1218 } elseif (strpos($url, '?file:') !== false) {
1219 $info['type'] = 'file';
1220 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1221 } else {
1222 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1223 $urlLength = strlen($url);
1224 for ($a = 0; $a < $urlLength; $a++) {
1225 if ($url[$a] != $curURL[$a]) {
1226 break;
1227 }
1228 }
1229 $info['relScriptPath'] = substr($curURL, $a);
1230 $info['relUrl'] = substr($url, $a);
1231 $info['url'] = $url;
1232 $info['type'] = 'ext';
1233 $siteUrl_parts = parse_url($url);
1234 $curUrl_parts = parse_url($curURL);
1235 // Hosts should match
1236 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1237 // If the script path seems to match or is empty (FE-EDIT)
1238 // New processing order 100502
1239 $uP = parse_url($info['relUrl']);
1240 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1241 $info['url'] = $info['relUrl'];
1242 $info['type'] = 'anchor';
1243 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1244 // URL is a page (id parameter)
1245 $pp = preg_split('/^id=/', $uP['query']);
1246 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1247 $parameters = explode('&', $pp[1]);
1248 $id = array_shift($parameters);
1249 if ($id) {
1250 $info['pageid'] = $id;
1251 $info['cElement'] = $uP['fragment'];
1252 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1253 $info['type'] = 'page';
1254 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1255 }
1256 } else {
1257 $info['url'] = $info['relUrl'];
1258 $info['type'] = 'file';
1259 }
1260 } else {
1261 unset($info['relScriptPath']);
1262 unset($info['relUrl']);
1263 }
1264 }
1265 return $info;
1266 }
1267
1268 /**
1269 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1270 *
1271 * @param string $value Content input
1272 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set.
1273 * @return string Content output
1274 */
1275 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1276 {
1277 $blockSplit = $this->splitIntoBlock('A', $value);
1278 foreach ($blockSplit as $k => $v) {
1279 // Block
1280 if ($k % 2) {
1281 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1282 // Checking if there is a scheme, and if not, prepend the current url.
1283 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1284 if ($attribArray['href'] !== '') {
1285 $uP = parse_url(strtolower($attribArray['href']));
1286 if (!$uP['scheme']) {
1287 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1288 } elseif ($uP['scheme'] != 'mailto') {
1289 $attribArray['data-htmlarea-external'] = 1;
1290 }
1291 } else {
1292 $attribArray['rtekeep'] = 1;
1293 }
1294 if (!$dontSetRTEKEEP) {
1295 $attribArray['rtekeep'] = 1;
1296 }
1297 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1298 $eTag = '</a>';
1299 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1300 }
1301 }
1302 return implode('', $blockSplit);
1303 }
1304
1305 /**
1306 * Apply plain image settings to the dimensions of the image
1307 *
1308 * @param array $imageInfo: info array of the image
1309 * @param array $attribArray: array of attributes of an image tag
1310 *
1311 * @return array a modified attributes array
1312 */
1313 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1314 {
1315 if ($this->procOptions['plainImageMode']) {
1316 // Perform corrections to aspect ratio based on configuration
1317 switch ((string)$this->procOptions['plainImageMode']) {
1318 case 'lockDimensions':
1319 $attribArray['width'] = $imageInfo[0];
1320 $attribArray['height'] = $imageInfo[1];
1321 break;
1322 case 'lockRatioWhenSmaller':
1323 if ($attribArray['width'] > $imageInfo[0]) {
1324 $attribArray['width'] = $imageInfo[0];
1325 }
1326 if ($imageInfo[0] > 0) {
1327 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1328 }
1329 break;
1330 case 'lockRatio':
1331 if ($imageInfo[0] > 0) {
1332 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1333 }
1334 break;
1335 }
1336 }
1337 return $attribArray;
1338 }
1339
1340 /**
1341 * Called before any processing / transformation is made
1342 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1343 * CR has a very disturbing effect, so just remove all CR and rely on LF
1344 *
1345 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1346 *
1347 * @param string $content the content to process
1348 * @return string the modified content
1349 */
1350 protected function streamlineLineBreaksForProcessing(string $content)
1351 {
1352 return str_replace(CR, '', $content);
1353 }
1354
1355 /**
1356 * Called after any processing / transformation was made
1357 * just before the content is returned by the RTE parser all line breaks
1358 * get unified to be "CRLF"s again.
1359 *
1360 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1361 *
1362 * @param string $content the content to process
1363 * @return string the modified content
1364 */
1365 protected function streamlineLineBreaksAfterProcessing(string $content)
1366 {
1367 // Make sure no \r\n sequences has entered in the meantime
1368 $content = $this->streamlineLineBreaksForProcessing($content);
1369 // ... and then change all \n into \r\n
1370 return str_replace(LF, CRLF, $content);
1371 }
1372
1373 /**
1374 * Content Transformation from DB to RTE
1375 * Checks all <a> tags which reference a t3://page and checks if the page is available
1376 * If not, some offensive styling is added.
1377 *
1378 * @param string $content
1379 * @return string the modified content
1380 */
1381 protected function markBrokenLinks(string $content): string
1382 {
1383 $blocks = $this->splitIntoBlock('A', $content);
1384 $linkService = GeneralUtility::makeInstance(LinkService::class);
1385 foreach ($blocks as $position => $value) {
1386 if ($position % 2 === 0) {
1387 continue;
1388 }
1389 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1390 if (empty($attributes['href'])) {
1391 continue;
1392 }
1393 $hrefInformation = $linkService->resolve($attributes['href']);
1394 if ($hrefInformation['type'] === LinkService::TYPE_PAGE) {
1395 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1396 if (!is_array($pageRecord)) {
1397 // Page does not exist
1398 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1399 $styling = 'background-color: yellow; border:2px red solid; color: black;';
1400 if (empty($attributes['style'])) {
1401 $attributes['style'] = $styling;
1402 } else {
1403 $attributes['style'] .= ' ' . $styling;
1404 }
1405 }
1406 }
1407 // Always rewrite the block to allow the nested calling even if a page is found
1408 $blocks[$position] =
1409 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1410 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1411 . '</a>';
1412 }
1413 return implode('', $blocks);
1414 }
1415
1416 /**
1417 * Content Transformation from RTE to DB
1418 * Removes link information error attributes from <a> tags that are added to broken links
1419 *
1420 * @param string $content the content to process
1421 * @return string the modified content
1422 */
1423 protected function removeBrokenLinkMarkers(string $content): string
1424 {
1425 $blocks = $this->splitIntoBlock('A', $content);
1426 foreach ($blocks as $position => $value) {
1427 if ($position % 2 === 0) {
1428 continue;
1429 }
1430 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1431 if (empty($attributes['href'])) {
1432 continue;
1433 }
1434 // Always remove the styling again (regardless of the page was found or not)
1435 // so the database does not contain ugly stuff
1436 unset($attributes['data-rte-error']);
1437 if (isset($attributes['style'])) {
1438 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1439 if (empty($attributes['style'])) {
1440 unset($attributes['style']);
1441 }
1442 }
1443 $blocks[$position] =
1444 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1445 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1446 . '</a>';
1447 }
1448 return implode('', $blocks);
1449 }
1450
1451 /**
1452 * Instantiates a logger
1453 *
1454 * @return \TYPO3\CMS\Core\Log\Logger
1455 */
1456 protected function getLogger()
1457 {
1458 /** @var $logManager LogManager */
1459 $logManager = GeneralUtility::makeInstance(LogManager::class);
1460 return $logManager->getLogger(get_class($this));
1461 }
1462 }