[TASK] Always keep <a> instead of <link> tags in RTE transformations
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\LinkHandling\LinkService;
19 use TYPO3\CMS\Core\Log\LogManager;
20 use TYPO3\CMS\Core\Resource;
21 use TYPO3\CMS\Core\Utility\GeneralUtility;
22 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
23
24 /**
25 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
26 *
27 * Concerning line breaks:
28 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
29 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
30 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
31 */
32 class RteHtmlParser extends HtmlParser
33 {
34 /**
35 * List of elements that are not wrapped into a "p" tag while doing the transformation.
36 * @var string
37 */
38 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
39
40 /**
41 * List of all tags that are allowed by default
42 * @var string
43 */
44 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
45
46 /**
47 * Set this to the pid of the record manipulated by the class.
48 *
49 * @var int
50 */
51 public $recPid = 0;
52
53 /**
54 * Element reference [table]:[field], eg. "tt_content:bodytext"
55 *
56 * @var string
57 */
58 public $elRef = '';
59
60 /**
61 * Current Page TSConfig
62 *
63 * @var array
64 */
65 public $tsConfig = [];
66
67 /**
68 * Set to the TSconfig options coming from Page TSconfig
69 *
70 * @var array
71 */
72 public $procOptions = [];
73
74 /**
75 * Run-away brake for recursive calls.
76 *
77 * @var int
78 */
79 public $TS_transform_db_safecounter = 100;
80
81 /**
82 * Data caching for processing function
83 *
84 * @var array
85 */
86 public $getKeepTags_cache = [];
87
88 /**
89 * Storage of the allowed CSS class names in the RTE
90 *
91 * @var array
92 */
93 public $allowedClasses = [];
94
95 /**
96 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
97 * they have a special place for configuration via 'proc.keepPDIVattribs'
98 *
99 * @var array
100 */
101 protected $allowedAttributesForParagraphTags = [
102 'class',
103 'align',
104 'id',
105 'title',
106 'dir',
107 'lang',
108 'xml:lang',
109 'itemscope',
110 'itemtype',
111 'itemprop'
112 ];
113
114 /**
115 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
116 * plus some special tags like <hr> and <img> (if images are allowed).
117 * Completely overrideable via 'proc.allowTagsOutside'
118 *
119 * @var array
120 */
121 protected $allowedTagsOutsideOfParagraphs = [
122 'address',
123 'article',
124 'aside',
125 'blockquote',
126 'div',
127 'footer',
128 'header',
129 'hr',
130 'nav',
131 'section'
132 ];
133
134 /**
135 * Initialize, setting element reference and record PID
136 *
137 * @param string $elRef Element reference, eg "tt_content:bodytext
138 * @param int $recPid PID of the record (page id)
139 * @return void
140 */
141 public function init($elRef = '', $recPid = 0)
142 {
143 $this->recPid = $recPid;
144 $this->elRef = $elRef;
145 }
146
147 /**********************************************
148 *
149 * Main function
150 *
151 **********************************************/
152 /**
153 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
154 * This is the main function called from DataHandler and transfer data classes
155 *
156 * @param string $value Input value
157 * @param array $specConf Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
158 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
159 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
160 * @return string Output value
161 */
162 public function RTE_transform($value, $specConf, $direction = 'rte', $thisConfig = [])
163 {
164 $this->tsConfig = $thisConfig;
165 $this->procOptions = (array)$thisConfig['proc.'];
166 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
167
168 // Dynamic configuration of blockElementList
169 if ($this->procOptions['blockElementList']) {
170 $this->blockElementList = $this->procOptions['blockElementList'];
171 }
172
173 // Define which attributes are allowed on <p> tags
174 if (isset($this->procOptions['keepPDIVattribs'])) {
175 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
176 }
177 // Override tags which are allowed outside of <p> tags
178 if (isset($this->procOptions['allowTagsOutside'])) {
179 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
180 }
181
182 // Setting modes / transformations to be called
183 if ((string)$this->procOptions['overruleMode'] !== '') {
184 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
185 } else {
186 // Get parameters for rte_transformation:
187 $specialFieldConfiguration = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
188 $modes = GeneralUtility::trimExplode('-', $specialFieldConfiguration['mode']);
189 }
190 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
191
192 $value = $this->streamlineLineBreaksForProcessing($value);
193
194 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
195 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
196
197 // Traverse modes
198 foreach ($modes as $cmd) {
199 if ($direction == 'db') {
200 // Checking for user defined transformation:
201 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
202 $_procObj = GeneralUtility::getUserObj($_classRef);
203 $_procObj->pObj = $this;
204 $_procObj->transformationKey = $cmd;
205 $value = $_procObj->transform_db($value, $this);
206 } else {
207 // ... else use defaults:
208 switch ($cmd) {
209 case 'detectbrokenlinks':
210 $value = $this->removeBrokenLinkMarkers($value);
211 break;
212 case 'ts_images':
213 $value = $this->TS_images_db($value);
214 break;
215 case 'ts_links':
216 $value = $this->TS_links_db($value);
217 break;
218 case 'css_transform':
219 // Transform empty paragraphs into spacing paragraphs
220 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
221 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
222 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
223 $value = $this->TS_transform_db($value);
224 break;
225 default:
226 // Do nothing
227 }
228 }
229 } elseif ($direction == 'rte') {
230 // Checking for user defined transformation:
231 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
232 $_procObj = GeneralUtility::getUserObj($_classRef);
233 $_procObj->pObj = $this;
234 $value = $_procObj->transform_rte($value, $this);
235 } else {
236 // ... else use defaults:
237 switch ($cmd) {
238 case 'detectbrokenlinks':
239 $value = $this->markBrokenLinks($value);
240 break;
241 case 'ts_images':
242 $value = $this->TS_images_rte($value);
243 break;
244 case 'ts_links':
245 $value = $this->TS_links_rte($value);
246 break;
247 case 'css_transform':
248 $value = $this->TS_transform_rte($value);
249 break;
250 default:
251 // Do nothing
252 }
253 }
254 }
255 }
256
257 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
258 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
259
260 // Final clean up of linebreaks
261 $value = $this->streamlineLineBreaksAfterProcessing($value);
262
263 return $value;
264 }
265
266 /**
267 * Ensures what transformation modes should be executed, and that they are only executed once.
268 *
269 * @param string $direction
270 * @param array $modes
271 * @return array the resolved transformation modes
272 */
273 protected function resolveAppliedTransformationModes(string $direction, array $modes)
274 {
275 $modeList = implode(',', $modes);
276
277 // Replace the shortcut "ts_css" with all custom modes
278 $modeList = str_replace('ts_css', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
279
280 // Make list unique
281 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
282 // Reverse order if direction is "rte"
283 if ($direction === 'rte') {
284 $modes = array_reverse($modes);
285 }
286
287 return $modes;
288 }
289
290 /**
291 * Runs the HTML parser if it is configured
292 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
293 * is done and thus totally independent processing options you can set up.
294 *
295 * This is only possible via TSconfig (procOptions) currently.
296 *
297 * @param string $content
298 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
299 * @return string the processed content
300 */
301 protected function runHtmlParserIfConfigured($content, $configurationDirective)
302 {
303 if ($this->procOptions[$configurationDirective]) {
304 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
305 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
306 }
307 return $content;
308 }
309
310 /************************************
311 *
312 * Specific RTE TRANSFORMATION functions
313 *
314 *************************************/
315 /**
316 * Transformation handler: 'ts_images' / direction: "db"
317 * Processing images inserted in the RTE.
318 * This is used when content goes from the RTE to the database.
319 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
320 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
321 * Also "magic" images are processed here.
322 *
323 * @param string $value The content from RTE going to Database
324 * @return string Processed content
325 */
326 public function TS_images_db($value)
327 {
328 // Split content by <img> tags and traverse the resulting array for processing:
329 $imgSplit = $this->splitTags('img', $value);
330 if (count($imgSplit) > 1) {
331 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
332 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
333 /** @var $resourceFactory Resource\ResourceFactory */
334 $resourceFactory = Resource\ResourceFactory::getInstance();
335 /** @var $magicImageService Resource\Service\MagicImageService */
336 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
337 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
338 foreach ($imgSplit as $k => $v) {
339 // Image found, do processing:
340 if ($k % 2) {
341 // Get attributes
342 list($attribArray) = $this->get_tag_attributes($v, true);
343 // It's always an absolute URL coming from the RTE into the Database.
344 $absoluteUrl = trim($attribArray['src']);
345 // Make path absolute if it is relative and we have a site path which is not '/'
346 $pI = pathinfo($absoluteUrl);
347 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
348 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
349 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
350 $absoluteUrl = $siteUrl . $absoluteUrl;
351 }
352 // Image dimensions set in the img tag, if any
353 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
354 if ($imgTagDimensions[0]) {
355 $attribArray['width'] = $imgTagDimensions[0];
356 }
357 if ($imgTagDimensions[1]) {
358 $attribArray['height'] = $imgTagDimensions[1];
359 }
360 $originalImageFile = null;
361 if ($attribArray['data-htmlarea-file-uid']) {
362 // An original image file uid is available
363 try {
364 /** @var $originalImageFile Resource\File */
365 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
366 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
367 // Log the fact the file could not be retrieved.
368 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
369 $this->getLogger()->error($message);
370 }
371 }
372 if ($originalImageFile instanceof Resource\File) {
373 // Public url of local file is relative to the site url, absolute otherwise
374 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
375 // This is a plain image, i.e. reference to the original image
376 if ($this->procOptions['plainImageMode']) {
377 // "plain image mode" is configured
378 // Find the dimensions of the original image
379 $imageInfo = [
380 $originalImageFile->getProperty('width'),
381 $originalImageFile->getProperty('height')
382 ];
383 if (!$imageInfo[0] || !$imageInfo[1]) {
384 $filePath = $originalImageFile->getForLocalProcessing(false);
385 $imageInfo = @getimagesize($filePath);
386 }
387 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
388 }
389 } else {
390 // Magic image case: get a processed file with the requested configuration
391 $imageConfiguration = [
392 'width' => $imgTagDimensions[0],
393 'height' => $imgTagDimensions[1]
394 ];
395 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
396 $attribArray['width'] = $magicImage->getProperty('width');
397 $attribArray['height'] = $magicImage->getProperty('height');
398 $attribArray['src'] = $magicImage->getPublicUrl();
399 }
400 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
401 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
402 // Fetch the external image
403 $externalFile = GeneralUtility::getUrl($absoluteUrl);
404 if ($externalFile) {
405 $pU = parse_url($absoluteUrl);
406 $pI = pathinfo($pU['path']);
407 $extension = strtolower($pI['extension']);
408 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
409 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
410 // We insert this image into the user default upload folder
411 list($table, $field) = explode(':', $this->elRef);
412 /** @var Resource\Folder $folder */
413 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
414 /** @var Resource\File $fileObject */
415 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
416 $imageConfiguration = [
417 'width' => $attribArray['width'],
418 'height' => $attribArray['height']
419 ];
420 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
421 $attribArray['width'] = $magicImage->getProperty('width');
422 $attribArray['height'] = $magicImage->getProperty('height');
423 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
424 $attribArray['src'] = $magicImage->getPublicUrl();
425 }
426 }
427 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
428 // Finally, check image as local file (siteURL equals the one of the image)
429 // Image has no data-htmlarea-file-uid attribute
430 // Relative path, rawurldecoded for special characters.
431 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
432 // Absolute filepath, locked to relative path of this project
433 $filepath = GeneralUtility::getFileAbsFileName($path);
434 // Check file existence (in relative directory to this installation!)
435 if ($filepath && @is_file($filepath)) {
436 // Treat it as a plain image
437 if ($this->procOptions['plainImageMode']) {
438 // If "plain image mode" has been configured
439 // Find the original dimensions of the image
440 $imageInfo = @getimagesize($filepath);
441 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
442 }
443 // Let's try to find a file uid for this image
444 try {
445 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
446 if ($fileOrFolderObject instanceof Resource\FileInterface) {
447 $fileIdentifier = $fileOrFolderObject->getIdentifier();
448 /** @var Resource\AbstractFile $fileObject */
449 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
450 // @todo if the retrieved file is a processed file, get the original file...
451 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
452 }
453 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
454 // Nothing to be done if file/folder not found
455 }
456 }
457 }
458 // Remove width and height from style attribute
459 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
460 // Must have alt attribute
461 if (!isset($attribArray['alt'])) {
462 $attribArray['alt'] = '';
463 }
464 // Convert absolute to relative url
465 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
466 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
467 }
468 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
469 }
470 }
471 }
472 return implode('', $imgSplit);
473 }
474
475 /**
476 * Transformation handler: 'ts_images' / direction: "rte"
477 * Processing images from database content going into the RTE.
478 * Processing includes converting the src attribute to an absolute URL.
479 *
480 * @param string $value Content input
481 * @return string Content output
482 */
483 public function TS_images_rte($value)
484 {
485 // Split content by <img> tags and traverse the resulting array for processing:
486 $imgSplit = $this->splitTags('img', $value);
487 if (count($imgSplit) > 1) {
488 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
489 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
490 foreach ($imgSplit as $k => $v) {
491 // Image found
492 if ($k % 2) {
493 // Get the attributes of the img tag
494 list($attribArray) = $this->get_tag_attributes($v, true);
495 $absoluteUrl = trim($attribArray['src']);
496 // Transform the src attribute into an absolute url, if it not already
497 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
498 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
499 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
500 $attribArray['src'] = $siteUrl . $attribArray['src'];
501 }
502 // Must have alt attribute
503 if (!isset($attribArray['alt'])) {
504 $attribArray['alt'] = '';
505 }
506 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
507 }
508 }
509 }
510 // Return processed content:
511 return implode('', $imgSplit);
512 }
513
514 /**
515 * Transformation handler: 'ts_links' / direction: "db"
516 * Processing anchor tags, and resolves them correctly again via the LinkService syntax
517 *
518 * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render
519 * the result.
520 *
521 * @param string $value Content input
522 * @return string Content output
523 * @see TS_links_rte()
524 */
525 public function TS_links_db($value)
526 {
527 $blockSplit = $this->splitIntoBlock('A', $value);
528 foreach ($blockSplit as $k => $v) {
529 if ($k % 2) {
530 list($tagAttributes) = $this->get_tag_attributes($this->getFirstTag($v), true);
531 $linkService = GeneralUtility::makeInstance(LinkService::class);
532 $linkInformation = $linkService->resolve($tagAttributes['href']);
533
534 // Modify parameters, this hook should be deprecated
535 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])
536 && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
537 $parameters = [
538 'currentBlock' => $v,
539 'linkInformation' => $linkInformation,
540 'url' => $linkInformation['href'],
541 'attributes' => $tagAttributes
542 ];
543 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
544 $processor = GeneralUtility::getUserObj($objRef);
545 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
546 }
547 } else {
548 // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
549 $tagAttributes['href'] = $linkService->asString($linkInformation);
550 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
551 . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>';
552 }
553 }
554 }
555 return implode('', $blockSplit);
556 }
557
558 /**
559 * Transformation handler: 'ts_links' / direction: "rte"
560 * Converting TYPO3-specific <link> tags to <a> tags
561 *
562 * This functionality is only used to convert legacy <link> tags to the new linking syntax using <a> tags, and will
563 * not be converted back to <link> tags anymore.
564 *
565 * @param string $value Content input
566 * @return string Content output
567 */
568 public function TS_links_rte($value)
569 {
570 $value = $this->TS_AtagToAbs($value);
571 // Split content by the TYPO3 pseudo tag "<link>"
572 $blockSplit = $this->splitIntoBlock('link', $value, true);
573 foreach ($blockSplit as $k => $v) {
574 // Block
575 if ($k % 2) {
576 // Split away the first "<link " part
577 $typoLinkData = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
578 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
579
580 // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
581 $linkService = GeneralUtility::makeInstance(LinkService::class);
582 $linkInformation = $linkService->resolve($tagCode['url']);
583
584 $href = $linkService->asString($linkInformation);
585
586 // Modify parameters by a hook
587 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
588 // backwards-compatibility: show an error message if the page is not found
589 $error = '';
590 if ($linkInformation['type'] === LinkService::TYPE_PAGE) {
591 $pageRecord = BackendUtility::getRecord('pages', $linkInformation['pageuid']);
592 // Page does not exist
593 if (!is_array($pageRecord)) {
594 $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
595 }
596 }
597 $parameters = [
598 'currentBlock' => $v,
599 'url' => $href,
600 'tagCode' => $tagCode,
601 'external' => $linkInformation['type'] === LinkService::TYPE_URL,
602 'error' => $error
603 ];
604 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
605 $processor = GeneralUtility::getUserObj($objRef);
606 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
607 }
608 } else {
609 $anchorAttributes = [
610 'href' => $href,
611 'target' => $tagCode['target'],
612 'class' => $tagCode['class'],
613 'title' => $tagCode['title']
614 ];
615
616 // Setting the <a> tag
617 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
618 . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k]))
619 . '</a>';
620 }
621 }
622 }
623 return implode('', $blockSplit);
624 }
625
626 /**
627 * Transformation handler: 'css_transform' / direction: "db"
628 * Cleaning (->db) for standard content elements (ts)
629 *
630 * @param string $value Content input
631 * @return string Content output
632 * @see TS_transform_rte()
633 */
634 public function TS_transform_db($value)
635 {
636 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
637 $this->TS_transform_db_safecounter--;
638 if ($this->TS_transform_db_safecounter < 0) {
639 return $value;
640 }
641 // Split the content from RTE by the occurrence of these blocks:
642 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
643
644 // Avoid superfluous linebreaks by transform_db after ending headListTag
645 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
646 array_pop($blockSplit);
647 }
648
649 // Traverse the blocks
650 foreach ($blockSplit as $k => $v) {
651 if ($k % 2) {
652 // Inside block:
653 // Init:
654 $tag = $this->getFirstTag($v);
655 $tagName = strtolower($this->getFirstTagName($v));
656 // Process based on the tag:
657 switch ($tagName) {
658 case 'blockquote':
659 case 'dd':
660 case 'div':
661 case 'header':
662 case 'section':
663 case 'footer':
664 case 'nav':
665 case 'article':
666 case 'aside':
667 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
668 break;
669 case 'pre':
670 break;
671 default:
672 // usually <hx> tags and <table> tags where no other block elements are within the tags
673 // Eliminate true linebreaks inside block element tags
674 $blockSplit[$k] = preg_replace(('/[' . LF . ']+/'), ' ', $blockSplit[$k]);
675 }
676 } else {
677 // NON-block:
678 if (trim($blockSplit[$k]) !== '') {
679 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
680 // Remove linebreaks preceding hr tags
681 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
682 // Remove linebreaks following hr tags
683 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
684 // Replace other linebreaks with space
685 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
686 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
687 } else {
688 unset($blockSplit[$k]);
689 }
690 }
691 }
692 $this->TS_transform_db_safecounter++;
693 return implode(LF, $blockSplit);
694 }
695
696 /**
697 * Wraps a-tags that contain a style attribute with a span-tag
698 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
699 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
700 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
701 * allowed.
702 *
703 * @param string $value Content input
704 * @return string Content output
705 */
706 public function transformStyledATags($value)
707 {
708 $blockSplit = $this->splitIntoBlock('A', $value);
709 foreach ($blockSplit as $k => $v) {
710 // If an A-tag was found
711 if ($k % 2) {
712 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
713 // If "style" attribute is set and rteerror is not set!
714 if ($attribArray['style'] && !$attribArray['rteerror']) {
715 $attribArray_copy['style'] = $attribArray['style'];
716 unset($attribArray['style']);
717 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
718 $eTag = '</a></span>';
719 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
720 }
721 }
722 }
723 return implode('', $blockSplit);
724 }
725
726 /**
727 * Transformation handler: css_transform / direction: "rte"
728 * Set (->rte) for standard content elements (ts)
729 *
730 * @param string $value Content input
731 * @return string Content output
732 * @see TS_transform_db()
733 */
734 public function TS_transform_rte($value)
735 {
736 // Split the content from database by the occurrence of the block elements
737 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
738 // Traverse the blocks
739 foreach ($blockSplit as $k => $v) {
740 if ($k % 2) {
741 // Inside one of the blocks:
742 // Init:
743 $tag = $this->getFirstTag($v);
744 $tagName = strtolower($this->getFirstTagName($v));
745 // Based on tagname, we do transformations:
746 switch ($tagName) {
747 case 'blockquote':
748 case 'dd':
749 case 'div':
750 case 'header':
751 case 'section':
752 case 'footer':
753 case 'nav':
754 case 'article':
755 case 'aside':
756 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
757 break;
758 }
759 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
760 } else {
761 // NON-block:
762 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
763 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
764 // If the line is followed by a block or is the last line:
765 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
766 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
767 if (!$onlyLineBreaks) {
768 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
769 } else {
770 // If the line contains only linebreaks, remove the leading linebreak
771 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
772 }
773 }
774 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
775 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
776 unset($blockSplit[$k]);
777 } else {
778 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
779 }
780 }
781 }
782 return implode(LF, $blockSplit);
783 }
784
785 /***************************************************************
786 *
787 * Generic RTE transformation, analysis and helper functions
788 *
789 **************************************************************/
790
791 /**
792 * Function for cleaning content going into the database.
793 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
794 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
795 *
796 * @param string $content Content to clean up
797 * @return string Clean content
798 * @see getKeepTags()
799 */
800 public function HTMLcleaner_db($content)
801 {
802 $keepTags = $this->getKeepTags('db');
803 // Default: remove unknown tags.
804 $keepUnknownTags = (bool)$this->procOptions['dontRemoveUnknownTags_db'];
805 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
806 }
807
808 /**
809 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
810 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
811 *
812 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
813 * @return array Configuration array
814 * @see HTMLcleaner_db()
815 */
816 public function getKeepTags($direction = 'rte')
817 {
818 if (!is_array($this->getKeepTags_cache[$direction])) {
819 // Setting up allowed tags:
820 // Default is to get allowed/denied tags from internal array of processing options:
821 // Construct default list of tags to keep:
822 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($this->procOptions['allowTags']), true));
823 // For tags to deny, remove them from $keepTags array:
824 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
825 foreach ($denyTags as $dKe) {
826 unset($keepTags[$dKe]);
827 }
828 // Based on the direction of content, set further options:
829 switch ($direction) {
830 case 'rte':
831 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
832 // Transform bold/italics tags to strong/em
833 if (isset($keepTags['b'])) {
834 $keepTags['b'] = ['remap' => 'STRONG'];
835 }
836 if (isset($keepTags['i'])) {
837 $keepTags['i'] = ['remap' => 'EM'];
838 }
839 }
840 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
841 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
842 break;
843 case 'db':
844 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
845 // Transform strong/em back to bold/italics:
846 if (isset($keepTags['strong'])) {
847 $keepTags['strong'] = ['remap' => 'b'];
848 }
849 if (isset($keepTags['em'])) {
850 $keepTags['em'] = ['remap' => 'i'];
851 }
852 }
853 // Setting up span tags if they are allowed:
854 if (isset($keepTags['span'])) {
855 $keepTags['span'] = [
856 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
857 'fixAttrib' => [
858 'class' => [
859 'removeIfFalse' => 1
860 ]
861 ],
862 'rmTagIfNoAttrib' => 1
863 ];
864 if (!empty($this->allowedClasses)) {
865 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
866 }
867 }
868 // Setting further options, getting them from the processiong options:
869 $TSc = $this->procOptions['HTMLparser_db.'];
870 if (!$TSc['globalNesting']) {
871 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
872 }
873 if (!$TSc['noAttrib']) {
874 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
875 }
876 // Transforming the array from TypoScript to regular array:
877 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
878 break;
879 }
880 // Caching (internally, in object memory) the result unless tagList is set:
881 $this->getKeepTags_cache[$direction] = $keepTags;
882 }
883 // Return result:
884 return $this->getKeepTags_cache[$direction];
885 }
886
887 /**
888 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
889 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
890 * The function ->setDivTags does the opposite.
891 * This function processes content to go into the database.
892 *
893 * @param string $value Value to process.
894 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
895 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
896 * @return string Processed input value.
897 * @see setDivTags()
898 */
899 public function divideIntoLines($value, $count = 5, $returnArray = false)
900 {
901 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
902 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
903 // Returns plainly the content if there was no p sections in it
904 if (count($paragraphBlocks) <= 1 || $count <= 0) {
905 return $this->sanitizeLineBreaksForContentOnly($value);
906 }
907
908 // Traverse the splitted sections
909 foreach ($paragraphBlocks as $k => $v) {
910 if ($k % 2) {
911 // Inside a <p> section
912 $v = $this->removeFirstAndLastTag($v);
913 // Fetching 'sub-lines' - which will explode any further p nesting recursively
914 $subLines = $this->divideIntoLines($v, $count - 1, true);
915 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
916 if (is_array($subLines)) {
917 $paragraphBlocks[$k] = implode(LF, $subLines);
918 } else {
919 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
920 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
921 }
922 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
923 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
924 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
925 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
926 $paragraphBlocks[$k] = '';
927 }
928 } else {
929 // Outside a paragraph, if there is still something in there, just add a <p> tag
930 // Remove positions which are outside <p> tags and without content
931 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
932 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
933 if ((string)$paragraphBlocks[$k] === '') {
934 unset($paragraphBlocks[$k]);
935 } else {
936 // add <p> tags around the content
937 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
938 }
939 }
940 }
941 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
942 }
943
944 /**
945 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
946 * For processing of content going FROM database TO RTE.
947 *
948 * @param string $value Value to convert
949 * @return string Processed value.
950 * @see divideIntoLines()
951 */
952 public function setDivTags($value)
953 {
954 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
955 $keepTags = $this->getKeepTags('rte');
956 // Divide the content into lines
957 $parts = explode(LF, $value);
958 foreach ($parts as $k => $v) {
959 // Processing of line content:
960 // If the line is blank, set it to &nbsp;
961 if (trim($parts[$k]) === '') {
962 $parts[$k] = '&nbsp;';
963 } else {
964 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
965 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
966 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
967 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
968 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
969 }
970 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
971 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
972 $testStr = strtolower(trim($parts[$k]));
973 if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
974 if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
975 // Only set p-tags if there is not already div or p tags:
976 $parts[$k] = '<p>' . $parts[$k] . '</p>';
977 }
978 }
979 }
980 }
981 // Implode result:
982 return implode(LF, $parts);
983 }
984
985 /**
986 * Used for transformation from RTE to DB
987 *
988 * Works on a single line within a <p> tag when storing into the database
989 * This always adds <p> tags and validates the arguments,
990 * additionally the content is cleaned up via the HTMLcleaner.
991 *
992 * @param string $content the content within the <p> tag
993 * @param string $fullContentWithTag the whole <p> tag surrounded as well
994 *
995 * @return string the full <p> tag with cleaned content
996 */
997 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
998 {
999 // clean up the content
1000 $content = $this->HTMLcleaner_db($content);
1001 // Get the <p> tag, and validate the attributes
1002 $fTag = $this->getFirstTag($fullContentWithTag);
1003 // Check which attributes of the <p> tag to keep attributes
1004 if (!empty($this->allowedAttributesForParagraphTags)) {
1005 list($tagAttributes) = $this->get_tag_attributes($fTag);
1006 // Make sure the tag attributes only contain the ones that are defined to be allowed
1007 $tagAttributes = array_intersect_key($tagAttributes, $this->allowedAttributesForParagraphTags);
1008
1009 // Only allow classes that are whitelisted in $this->allowedClasses
1010 if (trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1011 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1012 $classes = array_intersect($classes, $this->allowedClasses);
1013 if (!empty($classes)) {
1014 $tagAttributes['class'] = implode(' ', $classes);
1015 } else {
1016 unset($tagAttributes['class']);
1017 }
1018 }
1019 } else {
1020 $tagAttributes = [];
1021 }
1022 // Remove any line break
1023 $content = str_replace(LF, '', $content);
1024 // Compile the surrounding <p> tag
1025 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1026 return $content;
1027 }
1028
1029 /**
1030 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1031 *
1032 * @param string $content
1033 * @return string the modified content
1034 */
1035 protected function sanitizeLineBreaksForContentOnly(string $content)
1036 {
1037 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1038 $content = str_replace(LF . LF, LF, $content);
1039 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1040 return $content;
1041 }
1042
1043 /**
1044 * Finds width and height from attrib-array
1045 * If the width and height is found in the style-attribute, use that!
1046 *
1047 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1048 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1049 */
1050 public function getWHFromAttribs($attribArray)
1051 {
1052 $style = trim($attribArray['style']);
1053 $w = 0;
1054 $h = 0;
1055 if ($style) {
1056 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1057 // Width
1058 $reg = [];
1059 preg_match('/width' . $regex . '/i', $style, $reg);
1060 $w = (int)$reg[1];
1061 // Height
1062 preg_match('/height' . $regex . '/i', $style, $reg);
1063 $h = (int)$reg[1];
1064 }
1065 if (!$w) {
1066 $w = $attribArray['width'];
1067 }
1068 if (!$h) {
1069 $h = $attribArray['height'];
1070 }
1071 return [(int)$w, (int)$h];
1072 }
1073
1074 /**
1075 * Parse <A>-tag href and return status of email,external,file or page
1076 * This functionality is not in use anymore
1077 *
1078 * @param string $url URL to analyse.
1079 * @return array Information in an array about the URL
1080 */
1081 public function urlInfoForLinkTags($url)
1082 {
1083 $info = [];
1084 $url = trim($url);
1085 if (substr(strtolower($url), 0, 7) == 'mailto:') {
1086 $info['url'] = trim(substr($url, 7));
1087 $info['type'] = 'email';
1088 } elseif (strpos($url, '?file:') !== false) {
1089 $info['type'] = 'file';
1090 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1091 } else {
1092 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1093 $urlLength = strlen($url);
1094 for ($a = 0; $a < $urlLength; $a++) {
1095 if ($url[$a] != $curURL[$a]) {
1096 break;
1097 }
1098 }
1099 $info['relScriptPath'] = substr($curURL, $a);
1100 $info['relUrl'] = substr($url, $a);
1101 $info['url'] = $url;
1102 $info['type'] = 'ext';
1103 $siteUrl_parts = parse_url($url);
1104 $curUrl_parts = parse_url($curURL);
1105 // Hosts should match
1106 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1107 // If the script path seems to match or is empty (FE-EDIT)
1108 // New processing order 100502
1109 $uP = parse_url($info['relUrl']);
1110 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1111 $info['url'] = $info['relUrl'];
1112 $info['type'] = 'anchor';
1113 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1114 // URL is a page (id parameter)
1115 $pp = preg_split('/^id=/', $uP['query']);
1116 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1117 $parameters = explode('&', $pp[1]);
1118 $id = array_shift($parameters);
1119 if ($id) {
1120 $info['pageid'] = $id;
1121 $info['cElement'] = $uP['fragment'];
1122 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1123 $info['type'] = 'page';
1124 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1125 }
1126 } else {
1127 $info['url'] = $info['relUrl'];
1128 $info['type'] = 'file';
1129 }
1130 } else {
1131 unset($info['relScriptPath']);
1132 unset($info['relUrl']);
1133 }
1134 }
1135 return $info;
1136 }
1137
1138 /**
1139 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1140 *
1141 * @param string $value Content input
1142 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set. (not in use anymore)
1143 * @return string Content output
1144 */
1145 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1146 {
1147 $blockSplit = $this->splitIntoBlock('A', $value);
1148 foreach ($blockSplit as $k => $v) {
1149 // Block
1150 if ($k % 2) {
1151 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1152 // Checking if there is a scheme, and if not, prepend the current url.
1153 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1154 if ($attribArray['href'] !== '') {
1155 $uP = parse_url(strtolower($attribArray['href']));
1156 if (!$uP['scheme']) {
1157 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1158 }
1159 }
1160 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1161 $eTag = '</a>';
1162 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1163 }
1164 }
1165 return implode('', $blockSplit);
1166 }
1167
1168 /**
1169 * Apply plain image settings to the dimensions of the image
1170 *
1171 * @param array $imageInfo: info array of the image
1172 * @param array $attribArray: array of attributes of an image tag
1173 *
1174 * @return array a modified attributes array
1175 */
1176 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1177 {
1178 if ($this->procOptions['plainImageMode']) {
1179 // Perform corrections to aspect ratio based on configuration
1180 switch ((string)$this->procOptions['plainImageMode']) {
1181 case 'lockDimensions':
1182 $attribArray['width'] = $imageInfo[0];
1183 $attribArray['height'] = $imageInfo[1];
1184 break;
1185 case 'lockRatioWhenSmaller':
1186 if ($attribArray['width'] > $imageInfo[0]) {
1187 $attribArray['width'] = $imageInfo[0];
1188 }
1189 if ($imageInfo[0] > 0) {
1190 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1191 }
1192 break;
1193 case 'lockRatio':
1194 if ($imageInfo[0] > 0) {
1195 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1196 }
1197 break;
1198 }
1199 }
1200 return $attribArray;
1201 }
1202
1203 /**
1204 * Called before any processing / transformation is made
1205 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1206 * CR has a very disturbing effect, so just remove all CR and rely on LF
1207 *
1208 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1209 *
1210 * @param string $content the content to process
1211 * @return string the modified content
1212 */
1213 protected function streamlineLineBreaksForProcessing(string $content)
1214 {
1215 return str_replace(CR, '', $content);
1216 }
1217
1218 /**
1219 * Called after any processing / transformation was made
1220 * just before the content is returned by the RTE parser all line breaks
1221 * get unified to be "CRLF"s again.
1222 *
1223 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1224 *
1225 * @param string $content the content to process
1226 * @return string the modified content
1227 */
1228 protected function streamlineLineBreaksAfterProcessing(string $content)
1229 {
1230 // Make sure no \r\n sequences has entered in the meantime
1231 $content = $this->streamlineLineBreaksForProcessing($content);
1232 // ... and then change all \n into \r\n
1233 return str_replace(LF, CRLF, $content);
1234 }
1235
1236 /**
1237 * Content Transformation from DB to RTE
1238 * Checks all <a> tags which reference a t3://page and checks if the page is available
1239 * If not, some offensive styling is added.
1240 *
1241 * @param string $content
1242 * @return string the modified content
1243 */
1244 protected function markBrokenLinks(string $content): string
1245 {
1246 $blocks = $this->splitIntoBlock('A', $content);
1247 $linkService = GeneralUtility::makeInstance(LinkService::class);
1248 foreach ($blocks as $position => $value) {
1249 if ($position % 2 === 0) {
1250 continue;
1251 }
1252 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1253 if (empty($attributes['href'])) {
1254 continue;
1255 }
1256 $hrefInformation = $linkService->resolve($attributes['href']);
1257 if ($hrefInformation['type'] === LinkService::TYPE_PAGE) {
1258 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1259 if (!is_array($pageRecord)) {
1260 // Page does not exist
1261 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1262 $styling = 'background-color: yellow; border:2px red solid; color: black;';
1263 if (empty($attributes['style'])) {
1264 $attributes['style'] = $styling;
1265 } else {
1266 $attributes['style'] .= ' ' . $styling;
1267 }
1268 }
1269 }
1270 // Always rewrite the block to allow the nested calling even if a page is found
1271 $blocks[$position] =
1272 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1273 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1274 . '</a>';
1275 }
1276 return implode('', $blocks);
1277 }
1278
1279 /**
1280 * Content Transformation from RTE to DB
1281 * Removes link information error attributes from <a> tags that are added to broken links
1282 *
1283 * @param string $content the content to process
1284 * @return string the modified content
1285 */
1286 protected function removeBrokenLinkMarkers(string $content): string
1287 {
1288 $blocks = $this->splitIntoBlock('A', $content);
1289 foreach ($blocks as $position => $value) {
1290 if ($position % 2 === 0) {
1291 continue;
1292 }
1293 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1294 if (empty($attributes['href'])) {
1295 continue;
1296 }
1297 // Always remove the styling again (regardless of the page was found or not)
1298 // so the database does not contain ugly stuff
1299 unset($attributes['data-rte-error']);
1300 if (isset($attributes['style'])) {
1301 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1302 if (empty($attributes['style'])) {
1303 unset($attributes['style']);
1304 }
1305 }
1306 $blocks[$position] =
1307 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1308 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1309 . '</a>';
1310 }
1311 return implode('', $blocks);
1312 }
1313
1314 /**
1315 * Instantiates a logger
1316 *
1317 * @return \TYPO3\CMS\Core\Log\Logger
1318 */
1319 protected function getLogger()
1320 {
1321 /** @var $logManager LogManager */
1322 $logManager = GeneralUtility::makeInstance(LogManager::class);
1323 return $logManager->getLogger(get_class($this));
1324 }
1325 }