90e3bff9439b9822eb42fc039ccfef5611ab48f2
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\LinkHandling\LinkService;
19 use TYPO3\CMS\Core\Log\LogManager;
20 use TYPO3\CMS\Core\Resource;
21 use TYPO3\CMS\Core\Utility\GeneralUtility;
22 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
23
24 /**
25 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
26 *
27 * Concerning line breaks:
28 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
29 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
30 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
31 */
32 class RteHtmlParser extends HtmlParser
33 {
34 /**
35 * List of elements that are not wrapped into a "p" tag while doing the transformation.
36 * @var string
37 */
38 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
39
40 /**
41 * List of all tags that are allowed by default
42 * @var string
43 */
44 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
45
46 /**
47 * Set this to the pid of the record manipulated by the class.
48 *
49 * @var int
50 */
51 public $recPid = 0;
52
53 /**
54 * Element reference [table]:[field], eg. "tt_content:bodytext"
55 *
56 * @var string
57 */
58 public $elRef = '';
59
60 /**
61 * Current Page TSConfig
62 *
63 * @var array
64 */
65 public $tsConfig = [];
66
67 /**
68 * Set to the TSconfig options coming from Page TSconfig
69 *
70 * @var array
71 */
72 public $procOptions = [];
73
74 /**
75 * Run-away brake for recursive calls.
76 *
77 * @var int
78 */
79 public $TS_transform_db_safecounter = 100;
80
81 /**
82 * Data caching for processing function
83 *
84 * @var array
85 */
86 public $getKeepTags_cache = [];
87
88 /**
89 * Storage of the allowed CSS class names in the RTE
90 *
91 * @var array
92 */
93 public $allowedClasses = [];
94
95 /**
96 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
97 * they have a special place for configuration via 'proc.keepPDIVattribs'
98 *
99 * @var array
100 */
101 protected $allowedAttributesForParagraphTags = [
102 'class',
103 'align',
104 'id',
105 'title',
106 'dir',
107 'lang',
108 'xml:lang',
109 'itemscope',
110 'itemtype',
111 'itemprop'
112 ];
113
114 /**
115 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
116 * plus some special tags like <hr> and <img> (if images are allowed).
117 * Completely overrideable via 'proc.allowTagsOutside'
118 *
119 * @var array
120 */
121 protected $allowedTagsOutsideOfParagraphs = [
122 'address',
123 'article',
124 'aside',
125 'blockquote',
126 'div',
127 'footer',
128 'header',
129 'hr',
130 'nav',
131 'section'
132 ];
133
134 /**
135 * Initialize, setting element reference and record PID
136 *
137 * @param string $elRef Element reference, eg "tt_content:bodytext
138 * @param int $recPid PID of the record (page id)
139 */
140 public function init($elRef = '', $recPid = 0)
141 {
142 $this->recPid = $recPid;
143 $this->elRef = $elRef;
144 }
145
146 /**********************************************
147 *
148 * Main function
149 *
150 **********************************************/
151 /**
152 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
153 * This is the main function called from DataHandler and transfer data classes
154 *
155 * @param string $value Input value
156 * @param array $specConf deprecated old "defaultExtras" parsed as array
157 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
158 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
159 * @return string Output value
160 */
161 public function RTE_transform($value, $specConf = [], $direction = 'rte', $thisConfig = [])
162 {
163 $this->tsConfig = $thisConfig;
164 $this->procOptions = (array)$thisConfig['proc.'];
165 if (isset($this->procOptions['allowedClasses.'])) {
166 $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
167 } else {
168 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
169 }
170
171 // Dynamic configuration of blockElementList
172 if ($this->procOptions['blockElementList']) {
173 $this->blockElementList = $this->procOptions['blockElementList'];
174 }
175
176 // Define which attributes are allowed on <p> tags
177 if (isset($this->procOptions['allowAttributes.'])) {
178 $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
179 } elseif (isset($this->procOptions['keepPDIVattribs'])) {
180 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
181 }
182 // Override tags which are allowed outside of <p> tags
183 if (isset($this->procOptions['allowTagsOutside'])) {
184 if (!isset($this->procOptions['allowTagsOutside.'])) {
185 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
186 } else {
187 $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
188 }
189 }
190
191 // Setting modes / transformations to be called
192 if ((string)$this->procOptions['overruleMode'] !== '') {
193 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
194 } elseif (!empty($this->procOptions['mode'])) {
195 $modes = [$this->procOptions['mode']];
196 } else {
197 // Get parameters for rte_transformation:
198 // @deprecated since TYPO3 v8, will be removed in TYPO3 v9 - the else{} part can be removed in v9
199 GeneralUtility::deprecationLog(
200 'Argument 2 of RteHtmlParser::RTE_transform() is deprecated. Transformations should be given in $thisConfig[\'proc.\'][\'overruleMode\']'
201 );
202 $specialFieldConfiguration = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
203 $modes = GeneralUtility::trimExplode('-', $specialFieldConfiguration['mode']);
204 }
205 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
206
207 $value = $this->streamlineLineBreaksForProcessing($value);
208
209 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
210 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
211
212 // Traverse modes
213 foreach ($modes as $cmd) {
214 if ($direction === 'db') {
215 // Checking for user defined transformation:
216 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
217 $_procObj = GeneralUtility::getUserObj($_classRef);
218 $_procObj->pObj = $this;
219 $_procObj->transformationKey = $cmd;
220 $value = $_procObj->transform_db($value, $this);
221 } else {
222 // ... else use defaults:
223 switch ($cmd) {
224 case 'detectbrokenlinks':
225 $value = $this->removeBrokenLinkMarkers($value);
226 break;
227 case 'ts_images':
228 $value = $this->TS_images_db($value);
229 break;
230 case 'ts_links':
231 $value = $this->TS_links_db($value);
232 break;
233 case 'css_transform':
234 // Transform empty paragraphs into spacing paragraphs
235 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
236 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
237 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
238 $value = $this->TS_transform_db($value);
239 break;
240 default:
241 // Do nothing
242 }
243 }
244 } elseif ($direction === 'rte') {
245 // Checking for user defined transformation:
246 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
247 $_procObj = GeneralUtility::getUserObj($_classRef);
248 $_procObj->pObj = $this;
249 $value = $_procObj->transform_rte($value, $this);
250 } else {
251 // ... else use defaults:
252 switch ($cmd) {
253 case 'detectbrokenlinks':
254 $value = $this->markBrokenLinks($value);
255 break;
256 case 'ts_images':
257 $value = $this->TS_images_rte($value);
258 break;
259 case 'ts_links':
260 $value = $this->TS_links_rte($value);
261 break;
262 case 'css_transform':
263 $value = $this->TS_transform_rte($value);
264 break;
265 default:
266 // Do nothing
267 }
268 }
269 }
270 }
271
272 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
273 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
274
275 // Final clean up of linebreaks
276 $value = $this->streamlineLineBreaksAfterProcessing($value);
277
278 return $value;
279 }
280
281 /**
282 * Ensures what transformation modes should be executed, and that they are only executed once.
283 *
284 * @param string $direction
285 * @param array $modes
286 * @return array the resolved transformation modes
287 */
288 protected function resolveAppliedTransformationModes(string $direction, array $modes)
289 {
290 $modeList = implode(',', $modes);
291
292 // Replace the shortcut "default" with all custom modes
293 $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
294 // Replace the shortcut "ts_css" with all custom modes
295 // @deprecated since TYPO3 v8, will be removed in TYPO3 v9 - NEXT line can be removed in v9
296 $modeList = str_replace('ts_css', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
297
298 // Make list unique
299 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
300 // Reverse order if direction is "rte"
301 if ($direction === 'rte') {
302 $modes = array_reverse($modes);
303 }
304
305 return $modes;
306 }
307
308 /**
309 * Runs the HTML parser if it is configured
310 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
311 * is done and thus totally independent processing options you can set up.
312 *
313 * This is only possible via TSconfig (procOptions) currently.
314 *
315 * @param string $content
316 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
317 * @return string the processed content
318 */
319 protected function runHtmlParserIfConfigured($content, $configurationDirective)
320 {
321 if ($this->procOptions[$configurationDirective]) {
322 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
323 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
324 }
325 return $content;
326 }
327
328 /************************************
329 *
330 * Specific RTE TRANSFORMATION functions
331 *
332 *************************************/
333 /**
334 * Transformation handler: 'ts_images' / direction: "db"
335 * Processing images inserted in the RTE.
336 * This is used when content goes from the RTE to the database.
337 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
338 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
339 * Also "magic" images are processed here.
340 *
341 * @param string $value The content from RTE going to Database
342 * @return string Processed content
343 */
344 public function TS_images_db($value)
345 {
346 // Split content by <img> tags and traverse the resulting array for processing:
347 $imgSplit = $this->splitTags('img', $value);
348 if (count($imgSplit) > 1) {
349 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
350 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
351 /** @var $resourceFactory Resource\ResourceFactory */
352 $resourceFactory = Resource\ResourceFactory::getInstance();
353 /** @var $magicImageService Resource\Service\MagicImageService */
354 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
355 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
356 foreach ($imgSplit as $k => $v) {
357 // Image found, do processing:
358 if ($k % 2) {
359 // Get attributes
360 list($attribArray) = $this->get_tag_attributes($v, true);
361 // It's always an absolute URL coming from the RTE into the Database.
362 $absoluteUrl = trim($attribArray['src']);
363 // Make path absolute if it is relative and we have a site path which is not '/'
364 $pI = pathinfo($absoluteUrl);
365 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
366 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
367 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
368 $absoluteUrl = $siteUrl . $absoluteUrl;
369 }
370 // Image dimensions set in the img tag, if any
371 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
372 if ($imgTagDimensions[0]) {
373 $attribArray['width'] = $imgTagDimensions[0];
374 }
375 if ($imgTagDimensions[1]) {
376 $attribArray['height'] = $imgTagDimensions[1];
377 }
378 $originalImageFile = null;
379 if ($attribArray['data-htmlarea-file-uid']) {
380 // An original image file uid is available
381 try {
382 /** @var $originalImageFile Resource\File */
383 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
384 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
385 // Log the fact the file could not be retrieved.
386 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
387 $this->getLogger()->error($message);
388 }
389 }
390 if ($originalImageFile instanceof Resource\File) {
391 // Public url of local file is relative to the site url, absolute otherwise
392 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
393 // This is a plain image, i.e. reference to the original image
394 if ($this->procOptions['plainImageMode']) {
395 // "plain image mode" is configured
396 // Find the dimensions of the original image
397 $imageInfo = [
398 $originalImageFile->getProperty('width'),
399 $originalImageFile->getProperty('height')
400 ];
401 if (!$imageInfo[0] || !$imageInfo[1]) {
402 $filePath = $originalImageFile->getForLocalProcessing(false);
403 $imageInfo = @getimagesize($filePath);
404 }
405 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
406 }
407 } else {
408 // Magic image case: get a processed file with the requested configuration
409 $imageConfiguration = [
410 'width' => $imgTagDimensions[0],
411 'height' => $imgTagDimensions[1]
412 ];
413 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
414 $attribArray['width'] = $magicImage->getProperty('width');
415 $attribArray['height'] = $magicImage->getProperty('height');
416 $attribArray['src'] = $magicImage->getPublicUrl();
417 }
418 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
419 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
420 // Fetch the external image
421 $externalFile = GeneralUtility::getUrl($absoluteUrl);
422 if ($externalFile) {
423 $pU = parse_url($absoluteUrl);
424 $pI = pathinfo($pU['path']);
425 $extension = strtolower($pI['extension']);
426 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
427 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
428 // We insert this image into the user default upload folder
429 list($table, $field) = explode(':', $this->elRef);
430 /** @var Resource\Folder $folder */
431 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
432 /** @var Resource\File $fileObject */
433 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
434 $imageConfiguration = [
435 'width' => $attribArray['width'],
436 'height' => $attribArray['height']
437 ];
438 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
439 $attribArray['width'] = $magicImage->getProperty('width');
440 $attribArray['height'] = $magicImage->getProperty('height');
441 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
442 $attribArray['src'] = $magicImage->getPublicUrl();
443 }
444 }
445 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
446 // Finally, check image as local file (siteURL equals the one of the image)
447 // Image has no data-htmlarea-file-uid attribute
448 // Relative path, rawurldecoded for special characters.
449 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
450 // Absolute filepath, locked to relative path of this project
451 $filepath = GeneralUtility::getFileAbsFileName($path);
452 // Check file existence (in relative directory to this installation!)
453 if ($filepath && @is_file($filepath)) {
454 // Treat it as a plain image
455 if ($this->procOptions['plainImageMode']) {
456 // If "plain image mode" has been configured
457 // Find the original dimensions of the image
458 $imageInfo = @getimagesize($filepath);
459 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
460 }
461 // Let's try to find a file uid for this image
462 try {
463 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
464 if ($fileOrFolderObject instanceof Resource\FileInterface) {
465 $fileIdentifier = $fileOrFolderObject->getIdentifier();
466 /** @var Resource\AbstractFile $fileObject */
467 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
468 // @todo if the retrieved file is a processed file, get the original file...
469 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
470 }
471 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
472 // Nothing to be done if file/folder not found
473 }
474 }
475 }
476 // Remove width and height from style attribute
477 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
478 // Must have alt attribute
479 if (!isset($attribArray['alt'])) {
480 $attribArray['alt'] = '';
481 }
482 // Convert absolute to relative url
483 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
484 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
485 }
486 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
487 }
488 }
489 }
490 return implode('', $imgSplit);
491 }
492
493 /**
494 * Transformation handler: 'ts_images' / direction: "rte"
495 * Processing images from database content going into the RTE.
496 * Processing includes converting the src attribute to an absolute URL.
497 *
498 * @param string $value Content input
499 * @return string Content output
500 */
501 public function TS_images_rte($value)
502 {
503 // Split content by <img> tags and traverse the resulting array for processing:
504 $imgSplit = $this->splitTags('img', $value);
505 if (count($imgSplit) > 1) {
506 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
507 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
508 foreach ($imgSplit as $k => $v) {
509 // Image found
510 if ($k % 2) {
511 // Get the attributes of the img tag
512 list($attribArray) = $this->get_tag_attributes($v, true);
513 $absoluteUrl = trim($attribArray['src']);
514 // Transform the src attribute into an absolute url, if it not already
515 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
516 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
517 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
518 $attribArray['src'] = $siteUrl . $attribArray['src'];
519 }
520 // Must have alt attribute
521 if (!isset($attribArray['alt'])) {
522 $attribArray['alt'] = '';
523 }
524 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
525 }
526 }
527 }
528 // Return processed content:
529 return implode('', $imgSplit);
530 }
531
532 /**
533 * Transformation handler: 'ts_links' / direction: "db"
534 * Processing anchor tags, and resolves them correctly again via the LinkService syntax
535 *
536 * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render
537 * the result.
538 *
539 * @param string $value Content input
540 * @return string Content output
541 * @see TS_links_rte()
542 */
543 public function TS_links_db($value)
544 {
545 $blockSplit = $this->splitIntoBlock('A', $value);
546 foreach ($blockSplit as $k => $v) {
547 if ($k % 2) {
548 list($tagAttributes) = $this->get_tag_attributes($this->getFirstTag($v), true);
549 $linkService = GeneralUtility::makeInstance(LinkService::class);
550 $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
551
552 // Modify parameters, this hook should be deprecated
553 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])
554 && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
555 $parameters = [
556 'currentBlock' => $v,
557 'linkInformation' => $linkInformation,
558 'url' => $linkInformation['href'],
559 'attributes' => $tagAttributes
560 ];
561 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
562 $processor = GeneralUtility::getUserObj($objRef);
563 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
564 }
565 } else {
566 // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
567 $tagAttributes['href'] = $linkService->asString($linkInformation);
568 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
569 . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>';
570 }
571 }
572 }
573 return implode('', $blockSplit);
574 }
575
576 /**
577 * Transformation handler: 'ts_links' / direction: "rte"
578 * Converting TYPO3-specific <link> tags to <a> tags
579 *
580 * This functionality is only used to convert legacy <link> tags to the new linking syntax using <a> tags, and will
581 * not be converted back to <link> tags anymore.
582 *
583 * @param string $value Content input
584 * @return string Content output
585 */
586 public function TS_links_rte($value)
587 {
588 $value = $this->TS_AtagToAbs($value);
589 // Split content by the TYPO3 pseudo tag "<link>"
590 $blockSplit = $this->splitIntoBlock('link', $value, true);
591 foreach ($blockSplit as $k => $v) {
592 // Block
593 if ($k % 2) {
594 // Split away the first "<link " part
595 $typoLinkData = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
596 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
597
598 // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
599 $linkService = GeneralUtility::makeInstance(LinkService::class);
600 $linkInformation = $linkService->resolve($tagCode['url']);
601
602 $href = $linkService->asString($linkInformation);
603
604 // Modify parameters by a hook
605 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
606 // backwards-compatibility: show an error message if the page is not found
607 $error = '';
608 if ($linkInformation['type'] === LinkService::TYPE_PAGE) {
609 $pageRecord = BackendUtility::getRecord('pages', $linkInformation['pageuid']);
610 // Page does not exist
611 if (!is_array($pageRecord)) {
612 $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
613 }
614 }
615 $parameters = [
616 'currentBlock' => $v,
617 'url' => $href,
618 'tagCode' => $tagCode,
619 'external' => $linkInformation['type'] === LinkService::TYPE_URL,
620 'error' => $error
621 ];
622 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
623 $processor = GeneralUtility::getUserObj($objRef);
624 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
625 }
626 } else {
627 $anchorAttributes = [
628 'href' => $href,
629 'target' => $tagCode['target'],
630 'class' => $tagCode['class'],
631 'title' => $tagCode['title']
632 ];
633
634 // Setting the <a> tag
635 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
636 . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k]))
637 . '</a>';
638 }
639 }
640 }
641 return implode('', $blockSplit);
642 }
643
644 /**
645 * Transformation handler: 'css_transform' / direction: "db"
646 * Cleaning (->db) for standard content elements (ts)
647 *
648 * @param string $value Content input
649 * @return string Content output
650 * @see TS_transform_rte()
651 */
652 public function TS_transform_db($value)
653 {
654 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
655 $this->TS_transform_db_safecounter--;
656 if ($this->TS_transform_db_safecounter < 0) {
657 return $value;
658 }
659 // Split the content from RTE by the occurrence of these blocks:
660 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
661
662 // Avoid superfluous linebreaks by transform_db after ending headListTag
663 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
664 array_pop($blockSplit);
665 }
666
667 // Traverse the blocks
668 foreach ($blockSplit as $k => $v) {
669 if ($k % 2) {
670 // Inside block:
671 // Init:
672 $tag = $this->getFirstTag($v);
673 $tagName = strtolower($this->getFirstTagName($v));
674 // Process based on the tag:
675 switch ($tagName) {
676 case 'blockquote':
677 case 'dd':
678 case 'div':
679 case 'header':
680 case 'section':
681 case 'footer':
682 case 'nav':
683 case 'article':
684 case 'aside':
685 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
686 break;
687 case 'pre':
688 break;
689 default:
690 // usually <hx> tags and <table> tags where no other block elements are within the tags
691 // Eliminate true linebreaks inside block element tags
692 $blockSplit[$k] = preg_replace(('/[' . LF . ']+/'), ' ', $blockSplit[$k]);
693 }
694 } else {
695 // NON-block:
696 if (trim($blockSplit[$k]) !== '') {
697 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
698 // Remove linebreaks preceding hr tags
699 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
700 // Remove linebreaks following hr tags
701 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
702 // Replace other linebreaks with space
703 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
704 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
705 } else {
706 unset($blockSplit[$k]);
707 }
708 }
709 }
710 $this->TS_transform_db_safecounter++;
711 return implode(LF, $blockSplit);
712 }
713
714 /**
715 * Wraps a-tags that contain a style attribute with a span-tag
716 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
717 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
718 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
719 * allowed.
720 *
721 * @param string $value Content input
722 * @return string Content output
723 */
724 public function transformStyledATags($value)
725 {
726 $blockSplit = $this->splitIntoBlock('A', $value);
727 foreach ($blockSplit as $k => $v) {
728 // If an A-tag was found
729 if ($k % 2) {
730 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
731 // If "style" attribute is set and rteerror is not set!
732 if ($attribArray['style'] && !$attribArray['rteerror']) {
733 $attribArray_copy['style'] = $attribArray['style'];
734 unset($attribArray['style']);
735 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
736 $eTag = '</a></span>';
737 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
738 }
739 }
740 }
741 return implode('', $blockSplit);
742 }
743
744 /**
745 * Transformation handler: css_transform / direction: "rte"
746 * Set (->rte) for standard content elements (ts)
747 *
748 * @param string $value Content input
749 * @return string Content output
750 * @see TS_transform_db()
751 */
752 public function TS_transform_rte($value)
753 {
754 // Split the content from database by the occurrence of the block elements
755 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
756 // Traverse the blocks
757 foreach ($blockSplit as $k => $v) {
758 if ($k % 2) {
759 // Inside one of the blocks:
760 // Init:
761 $tag = $this->getFirstTag($v);
762 $tagName = strtolower($this->getFirstTagName($v));
763 // Based on tagname, we do transformations:
764 switch ($tagName) {
765 case 'blockquote':
766 case 'dd':
767 case 'div':
768 case 'header':
769 case 'section':
770 case 'footer':
771 case 'nav':
772 case 'article':
773 case 'aside':
774 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
775 break;
776 }
777 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
778 } else {
779 // NON-block:
780 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
781 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
782 // If the line is followed by a block or is the last line:
783 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
784 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
785 if (!$onlyLineBreaks) {
786 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
787 } else {
788 // If the line contains only linebreaks, remove the leading linebreak
789 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
790 }
791 }
792 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
793 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
794 unset($blockSplit[$k]);
795 } else {
796 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
797 }
798 }
799 }
800 return implode(LF, $blockSplit);
801 }
802
803 /***************************************************************
804 *
805 * Generic RTE transformation, analysis and helper functions
806 *
807 **************************************************************/
808
809 /**
810 * Function for cleaning content going into the database.
811 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
812 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
813 *
814 * @param string $content Content to clean up
815 * @return string Clean content
816 * @see getKeepTags()
817 */
818 public function HTMLcleaner_db($content)
819 {
820 $keepTags = $this->getKeepTags('db');
821 // Default: remove unknown tags.
822 $keepUnknownTags = (bool)$this->procOptions['dontRemoveUnknownTags_db'];
823 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
824 }
825
826 /**
827 * Creates an array of configuration for the HTMLcleaner function based on whether content
828 * go TO or FROM the Rich Text Editor ($direction)
829 *
830 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
831 * @return array Configuration array
832 * @see HTMLcleaner_db()
833 */
834 public function getKeepTags($direction = 'rte')
835 {
836 if (!is_array($this->getKeepTags_cache[$direction])) {
837 // Setting up allowed tags:
838 // Default is to get allowed/denied tags from internal array of processing options:
839 // Construct default list of tags to keep:
840 if (is_array($this->procOptions['allowTags.'])) {
841 $keepTags = implode(',', $this->procOptions['allowTags.']);
842 } else {
843 $keepTags = $this->procOptions['allowTags'];
844 }
845 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
846 // For tags to deny, remove them from $keepTags array:
847 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
848 foreach ($denyTags as $dKe) {
849 unset($keepTags[$dKe]);
850 }
851 // Based on the direction of content, set further options:
852 switch ($direction) {
853 case 'rte':
854 // Transforming keepTags array so it can be understood by the HTMLcleaner function.
855 // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
856 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
857 break;
858 case 'db':
859 // Setting up span tags if they are allowed:
860 if (isset($keepTags['span'])) {
861 $keepTags['span'] = [
862 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
863 'fixAttrib' => [
864 'class' => [
865 'removeIfFalse' => 1
866 ]
867 ],
868 'rmTagIfNoAttrib' => 1
869 ];
870 if (!empty($this->allowedClasses)) {
871 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
872 }
873 }
874 // Setting further options, getting them from the processing options
875 $TSc = $this->procOptions['HTMLparser_db.'];
876 if (!$TSc['globalNesting']) {
877 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
878 }
879 if (!$TSc['noAttrib']) {
880 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
881 }
882 // Transforming the array from TypoScript to regular array:
883 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
884 break;
885 }
886 // Caching (internally, in object memory) the result
887 $this->getKeepTags_cache[$direction] = $keepTags;
888 }
889 // Return result:
890 return $this->getKeepTags_cache[$direction];
891 }
892
893 /**
894 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
895 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
896 * The function ->setDivTags does the opposite.
897 * This function processes content to go into the database.
898 *
899 * @param string $value Value to process.
900 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
901 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
902 * @return string Processed input value.
903 * @see setDivTags()
904 */
905 public function divideIntoLines($value, $count = 5, $returnArray = false)
906 {
907 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
908 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
909 // Returns plainly the content if there was no p sections in it
910 if (count($paragraphBlocks) <= 1 || $count <= 0) {
911 return $this->sanitizeLineBreaksForContentOnly($value);
912 }
913
914 // Traverse the splitted sections
915 foreach ($paragraphBlocks as $k => $v) {
916 if ($k % 2) {
917 // Inside a <p> section
918 $v = $this->removeFirstAndLastTag($v);
919 // Fetching 'sub-lines' - which will explode any further p nesting recursively
920 $subLines = $this->divideIntoLines($v, $count - 1, true);
921 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
922 if (is_array($subLines)) {
923 $paragraphBlocks[$k] = implode(LF, $subLines);
924 } else {
925 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
926 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
927 }
928 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
929 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
930 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
931 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
932 $paragraphBlocks[$k] = '';
933 }
934 } else {
935 // Outside a paragraph, if there is still something in there, just add a <p> tag
936 // Remove positions which are outside <p> tags and without content
937 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
938 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
939 if ((string)$paragraphBlocks[$k] === '') {
940 unset($paragraphBlocks[$k]);
941 } else {
942 // add <p> tags around the content
943 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
944 }
945 }
946 }
947 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
948 }
949
950 /**
951 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
952 * For processing of content going FROM database TO RTE.
953 *
954 * @param string $value Value to convert
955 * @return string Processed value.
956 * @see divideIntoLines()
957 */
958 public function setDivTags($value)
959 {
960 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
961 $keepTags = $this->getKeepTags('rte');
962 // Divide the content into lines
963 $parts = explode(LF, $value);
964 foreach ($parts as $k => $v) {
965 // Processing of line content:
966 // If the line is blank, set it to &nbsp;
967 if (trim($parts[$k]) === '') {
968 $parts[$k] = '&nbsp;';
969 } else {
970 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
971 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
972 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
973 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
974 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
975 }
976 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
977 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
978 $testStr = strtolower(trim($parts[$k]));
979 if (substr($testStr, 0, 4) !== '<div' || substr($testStr, -6) !== '</div>') {
980 if (substr($testStr, 0, 2) !== '<p' || substr($testStr, -4) !== '</p>') {
981 // Only set p-tags if there is not already div or p tags:
982 $parts[$k] = '<p>' . $parts[$k] . '</p>';
983 }
984 }
985 }
986 }
987 // Implode result:
988 return implode(LF, $parts);
989 }
990
991 /**
992 * Used for transformation from RTE to DB
993 *
994 * Works on a single line within a <p> tag when storing into the database
995 * This always adds <p> tags and validates the arguments,
996 * additionally the content is cleaned up via the HTMLcleaner.
997 *
998 * @param string $content the content within the <p> tag
999 * @param string $fullContentWithTag the whole <p> tag surrounded as well
1000 *
1001 * @return string the full <p> tag with cleaned content
1002 */
1003 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
1004 {
1005 // clean up the content
1006 $content = $this->HTMLcleaner_db($content);
1007 // Get the <p> tag, and validate the attributes
1008 $fTag = $this->getFirstTag($fullContentWithTag);
1009 // Check which attributes of the <p> tag to keep attributes
1010 if (!empty($this->allowedAttributesForParagraphTags)) {
1011 list($tagAttributes) = $this->get_tag_attributes($fTag);
1012 // Make sure the tag attributes only contain the ones that are defined to be allowed
1013 $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
1014
1015 // Only allow classes that are whitelisted in $this->allowedClasses
1016 if (trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1017 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1018 $classes = array_intersect($classes, $this->allowedClasses);
1019 if (!empty($classes)) {
1020 $tagAttributes['class'] = implode(' ', $classes);
1021 } else {
1022 unset($tagAttributes['class']);
1023 }
1024 }
1025 } else {
1026 $tagAttributes = [];
1027 }
1028 // Remove any line break
1029 $content = str_replace(LF, '', $content);
1030 // Compile the surrounding <p> tag
1031 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1032 return $content;
1033 }
1034
1035 /**
1036 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1037 *
1038 * @param string $content
1039 * @return string the modified content
1040 */
1041 protected function sanitizeLineBreaksForContentOnly(string $content)
1042 {
1043 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1044 $content = str_replace(LF . LF, LF, $content);
1045 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1046 return $content;
1047 }
1048
1049 /**
1050 * Finds width and height from attrib-array
1051 * If the width and height is found in the style-attribute, use that!
1052 *
1053 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1054 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1055 */
1056 public function getWHFromAttribs($attribArray)
1057 {
1058 $style = trim($attribArray['style']);
1059 $w = 0;
1060 $h = 0;
1061 if ($style) {
1062 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1063 // Width
1064 $reg = [];
1065 preg_match('/width' . $regex . '/i', $style, $reg);
1066 $w = (int)$reg[1];
1067 // Height
1068 preg_match('/height' . $regex . '/i', $style, $reg);
1069 $h = (int)$reg[1];
1070 }
1071 if (!$w) {
1072 $w = $attribArray['width'];
1073 }
1074 if (!$h) {
1075 $h = $attribArray['height'];
1076 }
1077 return [(int)$w, (int)$h];
1078 }
1079
1080 /**
1081 * Parse <A>-tag href and return status of email,external,file or page
1082 * This functionality is not in use anymore
1083 *
1084 * @param string $url URL to analyse.
1085 * @return array Information in an array about the URL
1086 */
1087 public function urlInfoForLinkTags($url)
1088 {
1089 $info = [];
1090 $url = trim($url);
1091 if (substr(strtolower($url), 0, 7) === 'mailto:') {
1092 $info['url'] = trim(substr($url, 7));
1093 $info['type'] = 'email';
1094 } elseif (strpos($url, '?file:') !== false) {
1095 $info['type'] = 'file';
1096 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1097 } else {
1098 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1099 $urlLength = strlen($url);
1100 $a = 0;
1101 for (; $a < $urlLength; $a++) {
1102 if ($url[$a] != $curURL[$a]) {
1103 break;
1104 }
1105 }
1106 $info['relScriptPath'] = substr($curURL, $a);
1107 $info['relUrl'] = substr($url, $a);
1108 $info['url'] = $url;
1109 $info['type'] = 'ext';
1110 $siteUrl_parts = parse_url($url);
1111 $curUrl_parts = parse_url($curURL);
1112 // Hosts should match
1113 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1114 // If the script path seems to match or is empty (FE-EDIT)
1115 // New processing order 100502
1116 $uP = parse_url($info['relUrl']);
1117 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1118 $info['url'] = $info['relUrl'];
1119 $info['type'] = 'anchor';
1120 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1121 // URL is a page (id parameter)
1122 $pp = preg_split('/^id=/', $uP['query']);
1123 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1124 $parameters = explode('&', $pp[1]);
1125 $id = array_shift($parameters);
1126 if ($id) {
1127 $info['pageid'] = $id;
1128 $info['cElement'] = $uP['fragment'];
1129 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1130 $info['type'] = 'page';
1131 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1132 }
1133 } else {
1134 $info['url'] = $info['relUrl'];
1135 $info['type'] = 'file';
1136 }
1137 } else {
1138 unset($info['relScriptPath']);
1139 unset($info['relUrl']);
1140 }
1141 }
1142 return $info;
1143 }
1144
1145 /**
1146 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1147 *
1148 * @param string $value Content input
1149 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set. (not in use anymore)
1150 * @return string Content output
1151 */
1152 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1153 {
1154 $blockSplit = $this->splitIntoBlock('A', $value);
1155 foreach ($blockSplit as $k => $v) {
1156 // Block
1157 if ($k % 2) {
1158 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1159 // Checking if there is a scheme, and if not, prepend the current url.
1160 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1161 if ($attribArray['href'] !== '') {
1162 $uP = parse_url(strtolower($attribArray['href']));
1163 if (!$uP['scheme']) {
1164 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1165 }
1166 }
1167 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1168 $eTag = '</a>';
1169 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1170 }
1171 }
1172 return implode('', $blockSplit);
1173 }
1174
1175 /**
1176 * Apply plain image settings to the dimensions of the image
1177 *
1178 * @param array $imageInfo: info array of the image
1179 * @param array $attribArray: array of attributes of an image tag
1180 *
1181 * @return array a modified attributes array
1182 */
1183 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1184 {
1185 if ($this->procOptions['plainImageMode']) {
1186 // Perform corrections to aspect ratio based on configuration
1187 switch ((string)$this->procOptions['plainImageMode']) {
1188 case 'lockDimensions':
1189 $attribArray['width'] = $imageInfo[0];
1190 $attribArray['height'] = $imageInfo[1];
1191 break;
1192 case 'lockRatioWhenSmaller':
1193 if ($attribArray['width'] > $imageInfo[0]) {
1194 $attribArray['width'] = $imageInfo[0];
1195 }
1196 if ($imageInfo[0] > 0) {
1197 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1198 }
1199 break;
1200 case 'lockRatio':
1201 if ($imageInfo[0] > 0) {
1202 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1203 }
1204 break;
1205 }
1206 }
1207 return $attribArray;
1208 }
1209
1210 /**
1211 * Called before any processing / transformation is made
1212 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1213 * CR has a very disturbing effect, so just remove all CR and rely on LF
1214 *
1215 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1216 *
1217 * @param string $content the content to process
1218 * @return string the modified content
1219 */
1220 protected function streamlineLineBreaksForProcessing(string $content)
1221 {
1222 return str_replace(CR, '', $content);
1223 }
1224
1225 /**
1226 * Called after any processing / transformation was made
1227 * just before the content is returned by the RTE parser all line breaks
1228 * get unified to be "CRLF"s again.
1229 *
1230 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1231 *
1232 * @param string $content the content to process
1233 * @return string the modified content
1234 */
1235 protected function streamlineLineBreaksAfterProcessing(string $content)
1236 {
1237 // Make sure no \r\n sequences has entered in the meantime
1238 $content = $this->streamlineLineBreaksForProcessing($content);
1239 // ... and then change all \n into \r\n
1240 return str_replace(LF, CRLF, $content);
1241 }
1242
1243 /**
1244 * Content Transformation from DB to RTE
1245 * Checks all <a> tags which reference a t3://page and checks if the page is available
1246 * If not, some offensive styling is added.
1247 *
1248 * @param string $content
1249 * @return string the modified content
1250 */
1251 protected function markBrokenLinks(string $content): string
1252 {
1253 $blocks = $this->splitIntoBlock('A', $content);
1254 $linkService = GeneralUtility::makeInstance(LinkService::class);
1255 foreach ($blocks as $position => $value) {
1256 if ($position % 2 === 0) {
1257 continue;
1258 }
1259 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1260 if (empty($attributes['href'])) {
1261 continue;
1262 }
1263 $hrefInformation = $linkService->resolve($attributes['href']);
1264 if ($hrefInformation['type'] === LinkService::TYPE_PAGE) {
1265 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1266 if (!is_array($pageRecord)) {
1267 // Page does not exist
1268 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1269 $styling = 'background-color: yellow; border:2px red solid; color: black;';
1270 if (empty($attributes['style'])) {
1271 $attributes['style'] = $styling;
1272 } else {
1273 $attributes['style'] .= ' ' . $styling;
1274 }
1275 }
1276 }
1277 // Always rewrite the block to allow the nested calling even if a page is found
1278 $blocks[$position] =
1279 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1280 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1281 . '</a>';
1282 }
1283 return implode('', $blocks);
1284 }
1285
1286 /**
1287 * Content Transformation from RTE to DB
1288 * Removes link information error attributes from <a> tags that are added to broken links
1289 *
1290 * @param string $content the content to process
1291 * @return string the modified content
1292 */
1293 protected function removeBrokenLinkMarkers(string $content): string
1294 {
1295 $blocks = $this->splitIntoBlock('A', $content);
1296 foreach ($blocks as $position => $value) {
1297 if ($position % 2 === 0) {
1298 continue;
1299 }
1300 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1301 if (empty($attributes['href'])) {
1302 continue;
1303 }
1304 // Always remove the styling again (regardless of the page was found or not)
1305 // so the database does not contain ugly stuff
1306 unset($attributes['data-rte-error']);
1307 if (isset($attributes['style'])) {
1308 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1309 if (empty($attributes['style'])) {
1310 unset($attributes['style']);
1311 }
1312 }
1313 $blocks[$position] =
1314 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1315 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1316 . '</a>';
1317 }
1318 return implode('', $blocks);
1319 }
1320
1321 /**
1322 * Instantiates a logger
1323 *
1324 * @return \TYPO3\CMS\Core\Log\Logger
1325 */
1326 protected function getLogger()
1327 {
1328 /** @var $logManager LogManager */
1329 $logManager = GeneralUtility::makeInstance(LogManager::class);
1330 return $logManager->getLogger(get_class($this));
1331 }
1332 }