[BUGFIX] Do not mark "current" links as broken
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use Psr\Log\LoggerAwareInterface;
18 use Psr\Log\LoggerAwareTrait;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
20 use TYPO3\CMS\Core\LinkHandling\Exception\UnknownLinkHandlerException;
21 use TYPO3\CMS\Core\LinkHandling\LinkService;
22 use TYPO3\CMS\Core\Resource;
23 use TYPO3\CMS\Core\Utility\GeneralUtility;
24 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
25
26 /**
27 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
28 *
29 * Concerning line breaks:
30 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
31 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
32 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
33 */
34 class RteHtmlParser extends HtmlParser implements LoggerAwareInterface
35 {
36 use LoggerAwareTrait;
37
38 /**
39 * List of elements that are not wrapped into a "p" tag while doing the transformation.
40 * @var string
41 */
42 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
43
44 /**
45 * List of all tags that are allowed by default
46 * @var string
47 */
48 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
49
50 /**
51 * Set this to the pid of the record manipulated by the class.
52 *
53 * @var int
54 */
55 public $recPid = 0;
56
57 /**
58 * Element reference [table]:[field], eg. "tt_content:bodytext"
59 *
60 * @var string
61 */
62 public $elRef = '';
63
64 /**
65 * Current Page TSConfig
66 *
67 * @var array
68 */
69 public $tsConfig = [];
70
71 /**
72 * Set to the TSconfig options coming from Page TSconfig
73 *
74 * @var array
75 */
76 public $procOptions = [];
77
78 /**
79 * Run-away brake for recursive calls.
80 *
81 * @var int
82 */
83 public $TS_transform_db_safecounter = 100;
84
85 /**
86 * Data caching for processing function
87 *
88 * @var array
89 */
90 public $getKeepTags_cache = [];
91
92 /**
93 * Storage of the allowed CSS class names in the RTE
94 *
95 * @var array
96 */
97 public $allowedClasses = [];
98
99 /**
100 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
101 * they have a special place for configuration via 'proc.keepPDIVattribs'
102 *
103 * @var array
104 */
105 protected $allowedAttributesForParagraphTags = [
106 'class',
107 'align',
108 'id',
109 'title',
110 'dir',
111 'lang',
112 'xml:lang',
113 'itemscope',
114 'itemtype',
115 'itemprop'
116 ];
117
118 /**
119 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
120 * plus some special tags like <hr> and <img> (if images are allowed).
121 * Completely overrideable via 'proc.allowTagsOutside'
122 *
123 * @var array
124 */
125 protected $allowedTagsOutsideOfParagraphs = [
126 'address',
127 'article',
128 'aside',
129 'blockquote',
130 'div',
131 'footer',
132 'header',
133 'hr',
134 'nav',
135 'section'
136 ];
137
138 /**
139 * Initialize, setting element reference and record PID
140 *
141 * @param string $elRef Element reference, eg "tt_content:bodytext
142 * @param int $recPid PID of the record (page id)
143 */
144 public function init($elRef = '', $recPid = 0)
145 {
146 $this->recPid = $recPid;
147 $this->elRef = $elRef;
148 }
149
150 /**********************************************
151 *
152 * Main function
153 *
154 **********************************************/
155 /**
156 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
157 * This is the main function called from DataHandler and transfer data classes
158 *
159 * @param string $value Input value
160 * @param null $_ unused
161 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
162 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
163 * @return string Output value
164 */
165 public function RTE_transform($value, $_ = null, $direction = 'rte', $thisConfig = [])
166 {
167 $this->tsConfig = $thisConfig;
168 $this->procOptions = (array)$thisConfig['proc.'];
169 if (isset($this->procOptions['allowedClasses.'])) {
170 $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
171 } else {
172 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
173 }
174
175 // Dynamic configuration of blockElementList
176 if ($this->procOptions['blockElementList']) {
177 $this->blockElementList = $this->procOptions['blockElementList'];
178 }
179
180 // Define which attributes are allowed on <p> tags
181 if (isset($this->procOptions['allowAttributes.'])) {
182 $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
183 } elseif (isset($this->procOptions['keepPDIVattribs'])) {
184 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
185 }
186 // Override tags which are allowed outside of <p> tags
187 if (isset($this->procOptions['allowTagsOutside'])) {
188 if (!isset($this->procOptions['allowTagsOutside.'])) {
189 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
190 } else {
191 $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
192 }
193 }
194
195 // Setting modes / transformations to be called
196 if ((string)$this->procOptions['overruleMode'] !== '') {
197 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
198 } else {
199 $modes = [$this->procOptions['mode']];
200 }
201 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
202
203 $value = $this->streamlineLineBreaksForProcessing($value);
204
205 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
206 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
207
208 // Traverse modes
209 foreach ($modes as $cmd) {
210 if ($direction === 'db') {
211 // Checking for user defined transformation:
212 if ($className = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
213 $_procObj = GeneralUtility::makeInstance($className);
214 $_procObj->pObj = $this;
215 $_procObj->transformationKey = $cmd;
216 $value = $_procObj->transform_db($value, $this);
217 } else {
218 // ... else use defaults:
219 switch ($cmd) {
220 case 'detectbrokenlinks':
221 $value = $this->removeBrokenLinkMarkers($value);
222 break;
223 case 'ts_images':
224 $value = $this->TS_images_db($value);
225 break;
226 case 'ts_links':
227 $value = $this->TS_links_db($value);
228 break;
229 case 'css_transform':
230 // Transform empty paragraphs into spacing paragraphs
231 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
232 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
233 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
234 $value = $this->TS_transform_db($value);
235 break;
236 default:
237 // Do nothing
238 }
239 }
240 } elseif ($direction === 'rte') {
241 // Checking for user defined transformation:
242 if ($className = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
243 $_procObj = GeneralUtility::makeInstance($className);
244 $_procObj->pObj = $this;
245 $value = $_procObj->transform_rte($value, $this);
246 } else {
247 // ... else use defaults:
248 switch ($cmd) {
249 case 'detectbrokenlinks':
250 $value = $this->markBrokenLinks($value);
251 break;
252 case 'ts_images':
253 $value = $this->TS_images_rte($value);
254 break;
255 case 'ts_links':
256 $value = $this->TS_links_rte($value, true);
257 break;
258 case 'css_transform':
259 $value = $this->TS_transform_rte($value);
260 break;
261 default:
262 // Do nothing
263 }
264 }
265 }
266 }
267
268 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
269 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
270
271 // Final clean up of linebreaks
272 $value = $this->streamlineLineBreaksAfterProcessing($value);
273
274 return $value;
275 }
276
277 /**
278 * Ensures what transformation modes should be executed, and that they are only executed once.
279 *
280 * @param string $direction
281 * @param array $modes
282 * @return array the resolved transformation modes
283 */
284 protected function resolveAppliedTransformationModes(string $direction, array $modes)
285 {
286 $modeList = implode(',', $modes);
287
288 // Replace the shortcut "default" with all custom modes
289 $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
290
291 // Make list unique
292 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
293 // Reverse order if direction is "rte"
294 if ($direction === 'rte') {
295 $modes = array_reverse($modes);
296 }
297
298 return $modes;
299 }
300
301 /**
302 * Runs the HTML parser if it is configured
303 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
304 * is done and thus totally independent processing options you can set up.
305 *
306 * This is only possible via TSconfig (procOptions) currently.
307 *
308 * @param string $content
309 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
310 * @return string the processed content
311 */
312 protected function runHtmlParserIfConfigured($content, $configurationDirective)
313 {
314 if ($this->procOptions[$configurationDirective]) {
315 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
316 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
317 }
318 return $content;
319 }
320
321 /************************************
322 *
323 * Specific RTE TRANSFORMATION functions
324 *
325 *************************************/
326 /**
327 * Transformation handler: 'ts_images' / direction: "db"
328 * Processing images inserted in the RTE.
329 * This is used when content goes from the RTE to the database.
330 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
331 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
332 * Also "magic" images are processed here.
333 *
334 * @param string $value The content from RTE going to Database
335 * @return string Processed content
336 */
337 public function TS_images_db($value)
338 {
339 // Split content by <img> tags and traverse the resulting array for processing:
340 $imgSplit = $this->splitTags('img', $value);
341 if (count($imgSplit) > 1) {
342 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
343 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
344 /** @var $resourceFactory Resource\ResourceFactory */
345 $resourceFactory = Resource\ResourceFactory::getInstance();
346 /** @var $magicImageService Resource\Service\MagicImageService */
347 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
348 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
349 foreach ($imgSplit as $k => $v) {
350 // Image found, do processing:
351 if ($k % 2) {
352 // Get attributes
353 list($attribArray) = $this->get_tag_attributes($v, true);
354 // It's always an absolute URL coming from the RTE into the Database.
355 $absoluteUrl = trim($attribArray['src']);
356 // Make path absolute if it is relative and we have a site path which is not '/'
357 $pI = pathinfo($absoluteUrl);
358 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
359 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
360 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
361 $absoluteUrl = $siteUrl . $absoluteUrl;
362 }
363 // Image dimensions set in the img tag, if any
364 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
365 if ($imgTagDimensions[0]) {
366 $attribArray['width'] = $imgTagDimensions[0];
367 }
368 if ($imgTagDimensions[1]) {
369 $attribArray['height'] = $imgTagDimensions[1];
370 }
371 $originalImageFile = null;
372 if ($attribArray['data-htmlarea-file-uid']) {
373 // An original image file uid is available
374 try {
375 /** @var $originalImageFile Resource\File */
376 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
377 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
378 // Log the fact the file could not be retrieved.
379 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
380 $this->logger->error($message);
381 }
382 }
383 if ($originalImageFile instanceof Resource\File) {
384 // Public url of local file is relative to the site url, absolute otherwise
385 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
386 // This is a plain image, i.e. reference to the original image
387 if ($this->procOptions['plainImageMode']) {
388 // "plain image mode" is configured
389 // Find the dimensions of the original image
390 $imageInfo = [
391 $originalImageFile->getProperty('width'),
392 $originalImageFile->getProperty('height')
393 ];
394 if (!$imageInfo[0] || !$imageInfo[1]) {
395 $filePath = $originalImageFile->getForLocalProcessing(false);
396 $imageInfo = @getimagesize($filePath);
397 }
398 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
399 }
400 } else {
401 // Magic image case: get a processed file with the requested configuration
402 $imageConfiguration = [
403 'width' => $imgTagDimensions[0],
404 'height' => $imgTagDimensions[1]
405 ];
406 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
407 $attribArray['width'] = $magicImage->getProperty('width');
408 $attribArray['height'] = $magicImage->getProperty('height');
409 $attribArray['src'] = $magicImage->getPublicUrl();
410 }
411 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
412 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
413 // Fetch the external image
414 $externalFile = GeneralUtility::getUrl($absoluteUrl);
415 if ($externalFile) {
416 $pU = parse_url($absoluteUrl);
417 $pI = pathinfo($pU['path']);
418 $extension = strtolower($pI['extension']);
419 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
420 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
421 // We insert this image into the user default upload folder
422 list($table, $field) = explode(':', $this->elRef);
423 /** @var Resource\Folder $folder */
424 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
425 /** @var Resource\File $fileObject */
426 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
427 $imageConfiguration = [
428 'width' => $attribArray['width'],
429 'height' => $attribArray['height']
430 ];
431 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
432 $attribArray['width'] = $magicImage->getProperty('width');
433 $attribArray['height'] = $magicImage->getProperty('height');
434 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
435 $attribArray['src'] = $magicImage->getPublicUrl();
436 }
437 }
438 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
439 // Finally, check image as local file (siteURL equals the one of the image)
440 // Image has no data-htmlarea-file-uid attribute
441 // Relative path, rawurldecoded for special characters.
442 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
443 // Absolute filepath, locked to relative path of this project
444 $filepath = GeneralUtility::getFileAbsFileName($path);
445 // Check file existence (in relative directory to this installation!)
446 if ($filepath && @is_file($filepath)) {
447 // Treat it as a plain image
448 if ($this->procOptions['plainImageMode']) {
449 // If "plain image mode" has been configured
450 // Find the original dimensions of the image
451 $imageInfo = @getimagesize($filepath);
452 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
453 }
454 // Let's try to find a file uid for this image
455 try {
456 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
457 if ($fileOrFolderObject instanceof Resource\FileInterface) {
458 $fileIdentifier = $fileOrFolderObject->getIdentifier();
459 /** @var Resource\AbstractFile $fileObject */
460 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
461 // @todo if the retrieved file is a processed file, get the original file...
462 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
463 }
464 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
465 // Nothing to be done if file/folder not found
466 }
467 }
468 }
469 // Remove width and height from style attribute
470 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
471 // Must have alt attribute
472 if (!isset($attribArray['alt'])) {
473 $attribArray['alt'] = '';
474 }
475 // Convert absolute to relative url
476 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
477 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
478 }
479 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
480 }
481 }
482 }
483 return implode('', $imgSplit);
484 }
485
486 /**
487 * Transformation handler: 'ts_images' / direction: "rte"
488 * Processing images from database content going into the RTE.
489 * Processing includes converting the src attribute to an absolute URL.
490 *
491 * @param string $value Content input
492 * @return string Content output
493 */
494 public function TS_images_rte($value)
495 {
496 // Split content by <img> tags and traverse the resulting array for processing:
497 $imgSplit = $this->splitTags('img', $value);
498 if (count($imgSplit) > 1) {
499 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
500 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
501 foreach ($imgSplit as $k => $v) {
502 // Image found
503 if ($k % 2) {
504 // Get the attributes of the img tag
505 list($attribArray) = $this->get_tag_attributes($v, true);
506 $absoluteUrl = trim($attribArray['src']);
507 // Transform the src attribute into an absolute url, if it not already
508 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
509 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
510 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
511 $attribArray['src'] = $siteUrl . $attribArray['src'];
512 }
513 // Must have alt attribute
514 if (!isset($attribArray['alt'])) {
515 $attribArray['alt'] = '';
516 }
517 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
518 }
519 }
520 }
521 // Return processed content:
522 return implode('', $imgSplit);
523 }
524
525 /**
526 * Transformation handler: 'ts_links' / direction: "db"
527 * Processing anchor tags, and resolves them correctly again via the LinkService syntax
528 *
529 * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render
530 * the result.
531 *
532 * @param string $value Content input
533 * @return string Content output
534 * @see TS_links_rte()
535 */
536 public function TS_links_db($value)
537 {
538 $blockSplit = $this->splitIntoBlock('A', $value);
539 foreach ($blockSplit as $k => $v) {
540 if ($k % 2) {
541 list($tagAttributes) = $this->get_tag_attributes($this->getFirstTag($v), true);
542 $linkService = GeneralUtility::makeInstance(LinkService::class);
543 $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
544
545 // Modify parameters, this hook should be deprecated
546 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
547 trigger_error('The hook "t3lib/class.t3lib_parsehtml_proc.php->modifyParams_LinksDb_PostProc" will be removed in TYPO3 v10, use LinkService syntax to modify links to be stored in the database.', E_USER_DEPRECATED);
548 $parameters = [
549 'currentBlock' => $v,
550 'linkInformation' => $linkInformation,
551 'url' => $linkInformation['href'],
552 'attributes' => $tagAttributes
553 ];
554 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] ?? [] as $className) {
555 $processor = GeneralUtility::makeInstance($className);
556 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
557 }
558 } else {
559 // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
560 $tagAttributes['href'] = $linkService->asString($linkInformation);
561 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
562 . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>';
563 }
564 }
565 }
566 return implode('', $blockSplit);
567 }
568
569 /**
570 * Transformation handler: 'ts_links' / direction: "rte"
571 * Converting TYPO3-specific <link> tags to <a> tags
572 *
573 * This functionality is only used to convert legacy <link> tags to the new linking syntax using <a> tags, and will
574 * not be converted back to <link> tags anymore.
575 *
576 * @param string $value Content input
577 * @param bool $internallyCalledFromCore internal option for calls where the Core is still using this function, to supress method deprecations
578 * @return string Content output
579 * @deprecated will be removed in TYPO3 v10, only ->TS_AtagToAbs() should be called directly, <link> syntax is deprecated
580 */
581 public function TS_links_rte($value, $internallyCalledFromCore = null)
582 {
583 if ($internallyCalledFromCore === null) {
584 trigger_error('This method will be removed in TYPO3 v10, use TS_AtagToAbs() directly and do not use <link> syntax anymore', E_USER_DEPRECATED);
585 }
586 $hasLinkTags = false;
587 $value = $this->TS_AtagToAbs($value);
588 // Split content by the TYPO3 pseudo tag "<link>"
589 $blockSplit = $this->splitIntoBlock('link', $value, true);
590 foreach ($blockSplit as $k => $v) {
591 // Block
592 if ($k % 2) {
593 $hasLinkTags = true;
594 // Split away the first "<link " part
595 $typoLinkData = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
596 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
597
598 // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
599 $linkService = GeneralUtility::makeInstance(LinkService::class);
600 $linkInformation = $linkService->resolve($tagCode['url']);
601
602 try {
603 $href = $linkService->asString($linkInformation);
604 } catch (UnknownLinkHandlerException $e) {
605 $href = '';
606 }
607
608 // Modify parameters by a hook
609 if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] ?? false)) {
610 trigger_error('The hook "t3lib/class.t3lib_parsehtml_proc.php->modifyParams_LinksRte_PostProc" will be removed in TYPO3 v10, use the link service to properly use ', E_USER_DEPRECATED);
611 // backwards-compatibility: show an error message if the page is not found
612 $error = '';
613 if ($linkInformation['type'] === LinkService::TYPE_PAGE) {
614 $pageRecord = BackendUtility::getRecord('pages', $linkInformation['pageuid']);
615 // Page does not exist
616 if (!is_array($pageRecord)) {
617 $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
618 }
619 }
620 $parameters = [
621 'currentBlock' => $v,
622 'url' => $href,
623 'tagCode' => $tagCode,
624 'external' => $linkInformation['type'] === LinkService::TYPE_URL,
625 'error' => $error
626 ];
627 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $className) {
628 $processor = GeneralUtility::makeInstance($className);
629 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
630 }
631 } else {
632 $anchorAttributes = [
633 'href' => $href,
634 'target' => $tagCode['target'],
635 'class' => $tagCode['class'],
636 'title' => $tagCode['title']
637 ];
638
639 // Setting the <a> tag
640 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
641 . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k]), $internallyCalledFromCore)
642 . '</a>';
643 }
644 }
645 }
646 if ($hasLinkTags) {
647 trigger_error('Content with <link> syntax was found, update your content to use the t3:// syntax, and migrate your content via the upgrade wizard in the install tool', E_USER_DEPRECATED);
648 }
649 return implode('', $blockSplit);
650 }
651
652 /**
653 * Transformation handler: 'css_transform' / direction: "db"
654 * Cleaning (->db) for standard content elements (ts)
655 *
656 * @param string $value Content input
657 * @return string Content output
658 * @see TS_transform_rte()
659 */
660 public function TS_transform_db($value)
661 {
662 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
663 $this->TS_transform_db_safecounter--;
664 if ($this->TS_transform_db_safecounter < 0) {
665 return $value;
666 }
667 // Split the content from RTE by the occurrence of these blocks:
668 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
669
670 // Avoid superfluous linebreaks by transform_db after ending headListTag
671 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
672 array_pop($blockSplit);
673 }
674
675 // Traverse the blocks
676 foreach ($blockSplit as $k => $v) {
677 if ($k % 2) {
678 // Inside block:
679 // Init:
680 $tag = $this->getFirstTag($v);
681 $tagName = strtolower($this->getFirstTagName($v));
682 // Process based on the tag:
683 switch ($tagName) {
684 case 'blockquote':
685 case 'dd':
686 case 'div':
687 case 'header':
688 case 'section':
689 case 'footer':
690 case 'nav':
691 case 'article':
692 case 'aside':
693 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
694 break;
695 case 'pre':
696 break;
697 default:
698 // usually <hx> tags and <table> tags where no other block elements are within the tags
699 // Eliminate true linebreaks inside block element tags
700 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
701 }
702 } else {
703 // NON-block:
704 if (trim($blockSplit[$k]) !== '') {
705 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
706 // Remove linebreaks preceding hr tags
707 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
708 // Remove linebreaks following hr tags
709 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
710 // Replace other linebreaks with space
711 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
712 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
713 } else {
714 unset($blockSplit[$k]);
715 }
716 }
717 }
718 $this->TS_transform_db_safecounter++;
719 return implode(LF, $blockSplit);
720 }
721
722 /**
723 * Wraps a-tags that contain a style attribute with a span-tag
724 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
725 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
726 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
727 * allowed.
728 *
729 * @param string $value Content input
730 * @return string Content output
731 * @deprecated since TYPO3 v9.0, will be removed in TYPO3 v10, see comment above, adding attribuet "rteerror" is not necessary anymore.
732 */
733 public function transformStyledATags($value)
734 {
735 trigger_error('This method will be removed in TYPO3 v10. TYPO3 can handle style attribute in anchor tags properly since TYPO3 v8 LTS', E_USER_DEPRECATED);
736 $blockSplit = $this->splitIntoBlock('A', $value);
737 foreach ($blockSplit as $k => $v) {
738 // If an A-tag was found
739 if ($k % 2) {
740 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
741 // If "style" attribute is set and rteerror is not set!
742 if ($attribArray['style'] && !$attribArray['rteerror']) {
743 $attribArray_copy['style'] = $attribArray['style'];
744 unset($attribArray['style']);
745 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
746 $eTag = '</a></span>';
747 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
748 }
749 }
750 }
751 return implode('', $blockSplit);
752 }
753
754 /**
755 * Transformation handler: css_transform / direction: "rte"
756 * Set (->rte) for standard content elements (ts)
757 *
758 * @param string $value Content input
759 * @return string Content output
760 * @see TS_transform_db()
761 */
762 public function TS_transform_rte($value)
763 {
764 // Split the content from database by the occurrence of the block elements
765 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
766 // Traverse the blocks
767 foreach ($blockSplit as $k => $v) {
768 if ($k % 2) {
769 // Inside one of the blocks:
770 // Init:
771 $tag = $this->getFirstTag($v);
772 $tagName = strtolower($this->getFirstTagName($v));
773 // Based on tagname, we do transformations:
774 switch ($tagName) {
775 case 'blockquote':
776 case 'dd':
777 case 'div':
778 case 'header':
779 case 'section':
780 case 'footer':
781 case 'nav':
782 case 'article':
783 case 'aside':
784 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
785 break;
786 }
787 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
788 } else {
789 // NON-block:
790 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
791 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
792 // If the line is followed by a block or is the last line:
793 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
794 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
795 if (!$onlyLineBreaks) {
796 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
797 } else {
798 // If the line contains only linebreaks, remove the leading linebreak
799 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
800 }
801 }
802 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
803 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
804 unset($blockSplit[$k]);
805 } else {
806 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
807 }
808 }
809 }
810 return implode(LF, $blockSplit);
811 }
812
813 /***************************************************************
814 *
815 * Generic RTE transformation, analysis and helper functions
816 *
817 **************************************************************/
818
819 /**
820 * Function for cleaning content going into the database.
821 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
822 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
823 *
824 * @param string $content Content to clean up
825 * @return string Clean content
826 * @see getKeepTags()
827 */
828 public function HTMLcleaner_db($content)
829 {
830 $keepTags = $this->getKeepTags('db');
831 // Default: remove unknown tags.
832 $keepUnknownTags = (bool)$this->procOptions['dontRemoveUnknownTags_db'];
833 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
834 }
835
836 /**
837 * Creates an array of configuration for the HTMLcleaner function based on whether content
838 * go TO or FROM the Rich Text Editor ($direction)
839 *
840 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
841 * @return array Configuration array
842 * @see HTMLcleaner_db()
843 */
844 public function getKeepTags($direction = 'rte')
845 {
846 if (!is_array($this->getKeepTags_cache[$direction])) {
847 // Setting up allowed tags:
848 // Default is to get allowed/denied tags from internal array of processing options:
849 // Construct default list of tags to keep:
850 if (is_array($this->procOptions['allowTags.'])) {
851 $keepTags = implode(',', $this->procOptions['allowTags.']);
852 } else {
853 $keepTags = $this->procOptions['allowTags'];
854 }
855 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
856 // For tags to deny, remove them from $keepTags array:
857 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
858 foreach ($denyTags as $dKe) {
859 unset($keepTags[$dKe]);
860 }
861 // Based on the direction of content, set further options:
862 switch ($direction) {
863 case 'rte':
864 // Transforming keepTags array so it can be understood by the HTMLcleaner function.
865 // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
866 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
867 break;
868 case 'db':
869 // Setting up span tags if they are allowed:
870 if (isset($keepTags['span'])) {
871 $keepTags['span'] = [
872 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
873 'fixAttrib' => [
874 'class' => [
875 'removeIfFalse' => 1
876 ]
877 ],
878 'rmTagIfNoAttrib' => 1
879 ];
880 if (!empty($this->allowedClasses)) {
881 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
882 }
883 }
884 // Setting further options, getting them from the processing options
885 $TSc = $this->procOptions['HTMLparser_db.'];
886 if (!$TSc['globalNesting']) {
887 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
888 }
889 if (!$TSc['noAttrib']) {
890 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
891 }
892 // Transforming the array from TypoScript to regular array:
893 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
894 break;
895 }
896 // Caching (internally, in object memory) the result
897 $this->getKeepTags_cache[$direction] = $keepTags;
898 }
899 // Return result:
900 return $this->getKeepTags_cache[$direction];
901 }
902
903 /**
904 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
905 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
906 * The function ->setDivTags does the opposite.
907 * This function processes content to go into the database.
908 *
909 * @param string $value Value to process.
910 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
911 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
912 * @return string|array Processed input value.
913 * @see setDivTags()
914 */
915 public function divideIntoLines($value, $count = 5, $returnArray = false)
916 {
917 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
918 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
919 // Returns plainly the content if there was no p sections in it
920 if (count($paragraphBlocks) <= 1 || $count <= 0) {
921 return $this->sanitizeLineBreaksForContentOnly($value);
922 }
923
924 // Traverse the splitted sections
925 foreach ($paragraphBlocks as $k => $v) {
926 if ($k % 2) {
927 // Inside a <p> section
928 $v = $this->removeFirstAndLastTag($v);
929 // Fetching 'sub-lines' - which will explode any further p nesting recursively
930 $subLines = $this->divideIntoLines($v, $count - 1, true);
931 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
932 if (is_array($subLines)) {
933 $paragraphBlocks[$k] = implode(LF, $subLines);
934 } else {
935 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
936 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
937 }
938 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
939 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
940 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
941 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
942 $paragraphBlocks[$k] = '';
943 }
944 } else {
945 // Outside a paragraph, if there is still something in there, just add a <p> tag
946 // Remove positions which are outside <p> tags and without content
947 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
948 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
949 if ((string)$paragraphBlocks[$k] === '') {
950 unset($paragraphBlocks[$k]);
951 } else {
952 // add <p> tags around the content
953 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
954 }
955 }
956 }
957 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
958 }
959
960 /**
961 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
962 * For processing of content going FROM database TO RTE.
963 *
964 * @param string $value Value to convert
965 * @return string Processed value.
966 * @see divideIntoLines()
967 */
968 public function setDivTags($value)
969 {
970 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
971 $keepTags = $this->getKeepTags('rte');
972 // Divide the content into lines
973 $parts = explode(LF, $value);
974 foreach ($parts as $k => $v) {
975 // Processing of line content:
976 // If the line is blank, set it to &nbsp;
977 if (trim($parts[$k]) === '') {
978 $parts[$k] = '&nbsp;';
979 } else {
980 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
981 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
982 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
983 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
984 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
985 }
986 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
987 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
988 $testStr = strtolower(trim($parts[$k]));
989 if (substr($testStr, 0, 4) !== '<div' || substr($testStr, -6) !== '</div>') {
990 if (substr($testStr, 0, 2) !== '<p' || substr($testStr, -4) !== '</p>') {
991 // Only set p-tags if there is not already div or p tags:
992 $parts[$k] = '<p>' . $parts[$k] . '</p>';
993 }
994 }
995 }
996 }
997 // Implode result:
998 return implode(LF, $parts);
999 }
1000
1001 /**
1002 * Used for transformation from RTE to DB
1003 *
1004 * Works on a single line within a <p> tag when storing into the database
1005 * This always adds <p> tags and validates the arguments,
1006 * additionally the content is cleaned up via the HTMLcleaner.
1007 *
1008 * @param string $content the content within the <p> tag
1009 * @param string $fullContentWithTag the whole <p> tag surrounded as well
1010 *
1011 * @return string the full <p> tag with cleaned content
1012 */
1013 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
1014 {
1015 // clean up the content
1016 $content = $this->HTMLcleaner_db($content);
1017 // Get the <p> tag, and validate the attributes
1018 $fTag = $this->getFirstTag($fullContentWithTag);
1019 // Check which attributes of the <p> tag to keep attributes
1020 if (!empty($this->allowedAttributesForParagraphTags)) {
1021 list($tagAttributes) = $this->get_tag_attributes($fTag);
1022 // Make sure the tag attributes only contain the ones that are defined to be allowed
1023 $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
1024
1025 // Only allow classes that are whitelisted in $this->allowedClasses
1026 if (trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1027 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1028 $classes = array_intersect($classes, $this->allowedClasses);
1029 if (!empty($classes)) {
1030 $tagAttributes['class'] = implode(' ', $classes);
1031 } else {
1032 unset($tagAttributes['class']);
1033 }
1034 }
1035 } else {
1036 $tagAttributes = [];
1037 }
1038 // Remove any line break
1039 $content = str_replace(LF, '', $content);
1040 // Compile the surrounding <p> tag
1041 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1042 return $content;
1043 }
1044
1045 /**
1046 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1047 *
1048 * @param string $content
1049 * @return string the modified content
1050 */
1051 protected function sanitizeLineBreaksForContentOnly(string $content)
1052 {
1053 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1054 $content = str_replace(LF . LF, LF, $content);
1055 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1056 return $content;
1057 }
1058
1059 /**
1060 * Finds width and height from attrib-array
1061 * If the width and height is found in the style-attribute, use that!
1062 *
1063 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1064 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1065 */
1066 public function getWHFromAttribs($attribArray)
1067 {
1068 $style = trim($attribArray['style']);
1069 $w = 0;
1070 $h = 0;
1071 if ($style) {
1072 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1073 // Width
1074 $reg = [];
1075 preg_match('/width' . $regex . '/i', $style, $reg);
1076 $w = (int)$reg[1];
1077 // Height
1078 preg_match('/height' . $regex . '/i', $style, $reg);
1079 $h = (int)$reg[1];
1080 }
1081 if (!$w) {
1082 $w = $attribArray['width'];
1083 }
1084 if (!$h) {
1085 $h = $attribArray['height'];
1086 }
1087 return [(int)$w, (int)$h];
1088 }
1089
1090 /**
1091 * Parse <A>-tag href and return status of email,external,file or page
1092 * This functionality is not in use anymore
1093 *
1094 * @param string $url URL to analyse.
1095 * @return array Information in an array about the URL
1096 */
1097 public function urlInfoForLinkTags($url)
1098 {
1099 $info = [];
1100 $url = trim($url);
1101 if (substr(strtolower($url), 0, 7) === 'mailto:') {
1102 $info['url'] = trim(substr($url, 7));
1103 $info['type'] = 'email';
1104 } elseif (strpos($url, '?file:') !== false) {
1105 $info['type'] = 'file';
1106 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1107 } else {
1108 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1109 $urlLength = strlen($url);
1110 $a = 0;
1111 for (; $a < $urlLength; $a++) {
1112 if ($url[$a] != $curURL[$a]) {
1113 break;
1114 }
1115 }
1116 $info['relScriptPath'] = substr($curURL, $a);
1117 $info['relUrl'] = substr($url, $a);
1118 $info['url'] = $url;
1119 $info['type'] = 'ext';
1120 $siteUrl_parts = parse_url($url);
1121 $curUrl_parts = parse_url($curURL);
1122 // Hosts should match
1123 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1124 // If the script path seems to match or is empty (FE-EDIT)
1125 // New processing order 100502
1126 $uP = parse_url($info['relUrl']);
1127 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1128 $info['url'] = $info['relUrl'];
1129 $info['type'] = 'anchor';
1130 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1131 // URL is a page (id parameter)
1132 $pp = preg_split('/^id=/', $uP['query']);
1133 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1134 $parameters = explode('&', $pp[1]);
1135 $id = array_shift($parameters);
1136 if ($id) {
1137 $info['pageid'] = $id;
1138 $info['cElement'] = $uP['fragment'];
1139 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1140 $info['type'] = 'page';
1141 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1142 }
1143 } else {
1144 $info['url'] = $info['relUrl'];
1145 $info['type'] = 'file';
1146 }
1147 } else {
1148 unset($info['relScriptPath']);
1149 unset($info['relUrl']);
1150 }
1151 }
1152 return $info;
1153 }
1154
1155 /**
1156 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1157 *
1158 * @param string $value Content input
1159 * @return string Content output
1160 */
1161 public function TS_AtagToAbs($value)
1162 {
1163 if (func_num_args() > 1) {
1164 trigger_error('Second argument of TS_AtagToAbs() is not in use and is removed, however the argument in the callers code can be removed without side-effects.', E_USER_DEPRECATED);
1165 }
1166 $blockSplit = $this->splitIntoBlock('A', $value);
1167 foreach ($blockSplit as $k => $v) {
1168 // Block
1169 if ($k % 2) {
1170 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1171 // Checking if there is a scheme, and if not, prepend the current url.
1172 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1173 if ($attribArray['href'] !== '') {
1174 $uP = parse_url(strtolower($attribArray['href']));
1175 if (!$uP['scheme']) {
1176 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1177 }
1178 }
1179 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1180 $eTag = '</a>';
1181 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1182 }
1183 }
1184 return implode('', $blockSplit);
1185 }
1186
1187 /**
1188 * Apply plain image settings to the dimensions of the image
1189 *
1190 * @param array $imageInfo: info array of the image
1191 * @param array $attribArray: array of attributes of an image tag
1192 *
1193 * @return array a modified attributes array
1194 */
1195 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1196 {
1197 if ($this->procOptions['plainImageMode']) {
1198 // Perform corrections to aspect ratio based on configuration
1199 switch ((string)$this->procOptions['plainImageMode']) {
1200 case 'lockDimensions':
1201 $attribArray['width'] = $imageInfo[0];
1202 $attribArray['height'] = $imageInfo[1];
1203 break;
1204 case 'lockRatioWhenSmaller':
1205 if ($attribArray['width'] > $imageInfo[0]) {
1206 $attribArray['width'] = $imageInfo[0];
1207 }
1208 if ($imageInfo[0] > 0) {
1209 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1210 }
1211 break;
1212 case 'lockRatio':
1213 if ($imageInfo[0] > 0) {
1214 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1215 }
1216 break;
1217 }
1218 }
1219 return $attribArray;
1220 }
1221
1222 /**
1223 * Called before any processing / transformation is made
1224 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1225 * CR has a very disturbing effect, so just remove all CR and rely on LF
1226 *
1227 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1228 *
1229 * @param string $content the content to process
1230 * @return string the modified content
1231 */
1232 protected function streamlineLineBreaksForProcessing(string $content)
1233 {
1234 return str_replace(CR, '', $content);
1235 }
1236
1237 /**
1238 * Called after any processing / transformation was made
1239 * just before the content is returned by the RTE parser all line breaks
1240 * get unified to be "CRLF"s again.
1241 *
1242 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1243 *
1244 * @param string $content the content to process
1245 * @return string the modified content
1246 */
1247 protected function streamlineLineBreaksAfterProcessing(string $content)
1248 {
1249 // Make sure no \r\n sequences has entered in the meantime
1250 $content = $this->streamlineLineBreaksForProcessing($content);
1251 // ... and then change all \n into \r\n
1252 return str_replace(LF, CRLF, $content);
1253 }
1254
1255 /**
1256 * Content Transformation from DB to RTE
1257 * Checks all <a> tags which reference a t3://page and checks if the page is available
1258 * If not, some offensive styling is added.
1259 *
1260 * @param string $content
1261 * @return string the modified content
1262 */
1263 protected function markBrokenLinks(string $content): string
1264 {
1265 $blocks = $this->splitIntoBlock('A', $content);
1266 $linkService = GeneralUtility::makeInstance(LinkService::class);
1267 foreach ($blocks as $position => $value) {
1268 if ($position % 2 === 0) {
1269 continue;
1270 }
1271 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1272 if (empty($attributes['href'])) {
1273 continue;
1274 }
1275 $hrefInformation = $linkService->resolve($attributes['href']);
1276 if ($hrefInformation['type'] === LinkService::TYPE_PAGE && $hrefInformation['pageuid'] !== 'current') {
1277 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1278 if (!is_array($pageRecord)) {
1279 // Page does not exist
1280 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1281 $styling = 'background-color: yellow; border:2px red solid; color: black;';
1282 if (empty($attributes['style'])) {
1283 $attributes['style'] = $styling;
1284 } else {
1285 $attributes['style'] .= ' ' . $styling;
1286 }
1287 }
1288 }
1289 // Always rewrite the block to allow the nested calling even if a page is found
1290 $blocks[$position] =
1291 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1292 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1293 . '</a>';
1294 }
1295 return implode('', $blocks);
1296 }
1297
1298 /**
1299 * Content Transformation from RTE to DB
1300 * Removes link information error attributes from <a> tags that are added to broken links
1301 *
1302 * @param string $content the content to process
1303 * @return string the modified content
1304 */
1305 protected function removeBrokenLinkMarkers(string $content): string
1306 {
1307 $blocks = $this->splitIntoBlock('A', $content);
1308 foreach ($blocks as $position => $value) {
1309 if ($position % 2 === 0) {
1310 continue;
1311 }
1312 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1313 if (empty($attributes['href'])) {
1314 continue;
1315 }
1316 // Always remove the styling again (regardless of the page was found or not)
1317 // so the database does not contain ugly stuff
1318 unset($attributes['data-rte-error']);
1319 if (isset($attributes['style'])) {
1320 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1321 if (empty($attributes['style'])) {
1322 unset($attributes['style']);
1323 }
1324 }
1325 $blocks[$position] =
1326 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1327 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1328 . '</a>';
1329 }
1330 return implode('', $blocks);
1331 }
1332 }