6560885f746a1ed4d05ad1974a3dea140817c0a7
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\LinkHandling\Exception\UnknownLinkHandlerException;
19 use TYPO3\CMS\Core\LinkHandling\LinkService;
20 use TYPO3\CMS\Core\Log\LogManager;
21 use TYPO3\CMS\Core\Resource;
22 use TYPO3\CMS\Core\Utility\GeneralUtility;
23 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
24
25 /**
26 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
27 *
28 * Concerning line breaks:
29 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
30 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
31 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
32 */
33 class RteHtmlParser extends HtmlParser
34 {
35 /**
36 * List of elements that are not wrapped into a "p" tag while doing the transformation.
37 * @var string
38 */
39 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
40
41 /**
42 * List of all tags that are allowed by default
43 * @var string
44 */
45 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
46
47 /**
48 * Set this to the pid of the record manipulated by the class.
49 *
50 * @var int
51 */
52 public $recPid = 0;
53
54 /**
55 * Element reference [table]:[field], eg. "tt_content:bodytext"
56 *
57 * @var string
58 */
59 public $elRef = '';
60
61 /**
62 * Current Page TSConfig
63 *
64 * @var array
65 */
66 public $tsConfig = [];
67
68 /**
69 * Set to the TSconfig options coming from Page TSconfig
70 *
71 * @var array
72 */
73 public $procOptions = [];
74
75 /**
76 * Run-away brake for recursive calls.
77 *
78 * @var int
79 */
80 public $TS_transform_db_safecounter = 100;
81
82 /**
83 * Data caching for processing function
84 *
85 * @var array
86 */
87 public $getKeepTags_cache = [];
88
89 /**
90 * Storage of the allowed CSS class names in the RTE
91 *
92 * @var array
93 */
94 public $allowedClasses = [];
95
96 /**
97 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
98 * they have a special place for configuration via 'proc.keepPDIVattribs'
99 *
100 * @var array
101 */
102 protected $allowedAttributesForParagraphTags = [
103 'class',
104 'align',
105 'id',
106 'title',
107 'dir',
108 'lang',
109 'xml:lang',
110 'itemscope',
111 'itemtype',
112 'itemprop'
113 ];
114
115 /**
116 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
117 * plus some special tags like <hr> and <img> (if images are allowed).
118 * Completely overrideable via 'proc.allowTagsOutside'
119 *
120 * @var array
121 */
122 protected $allowedTagsOutsideOfParagraphs = [
123 'address',
124 'article',
125 'aside',
126 'blockquote',
127 'div',
128 'footer',
129 'header',
130 'hr',
131 'nav',
132 'section'
133 ];
134
135 /**
136 * Initialize, setting element reference and record PID
137 *
138 * @param string $elRef Element reference, eg "tt_content:bodytext
139 * @param int $recPid PID of the record (page id)
140 */
141 public function init($elRef = '', $recPid = 0)
142 {
143 $this->recPid = $recPid;
144 $this->elRef = $elRef;
145 }
146
147 /**********************************************
148 *
149 * Main function
150 *
151 **********************************************/
152 /**
153 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
154 * This is the main function called from DataHandler and transfer data classes
155 *
156 * @param string $value Input value
157 * @param array $specConf deprecated old "defaultExtras" parsed as array
158 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
159 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
160 * @return string Output value
161 */
162 public function RTE_transform($value, $specConf = [], $direction = 'rte', $thisConfig = [])
163 {
164 $this->tsConfig = $thisConfig;
165 $this->procOptions = (array)$thisConfig['proc.'];
166 if (isset($this->procOptions['allowedClasses.'])) {
167 $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
168 } else {
169 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
170 }
171
172 // Dynamic configuration of blockElementList
173 if ($this->procOptions['blockElementList']) {
174 $this->blockElementList = $this->procOptions['blockElementList'];
175 }
176
177 // Define which attributes are allowed on <p> tags
178 if (isset($this->procOptions['allowAttributes.'])) {
179 $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
180 } elseif (isset($this->procOptions['keepPDIVattribs'])) {
181 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
182 }
183 // Override tags which are allowed outside of <p> tags
184 if (isset($this->procOptions['allowTagsOutside'])) {
185 if (!isset($this->procOptions['allowTagsOutside.'])) {
186 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
187 } else {
188 $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
189 }
190 }
191
192 // Setting modes / transformations to be called
193 if ((string)$this->procOptions['overruleMode'] !== '') {
194 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
195 } elseif (!empty($this->procOptions['mode'])) {
196 $modes = [$this->procOptions['mode']];
197 } else {
198 // Get parameters for rte_transformation:
199 // @deprecated since TYPO3 v8, will be removed in TYPO3 v9 - the else{} part can be removed in v9
200 GeneralUtility::deprecationLog(
201 'Argument 2 of RteHtmlParser::RTE_transform() is deprecated. Transformations should be given in $thisConfig[\'proc.\'][\'overruleMode\']'
202 );
203 $specialFieldConfiguration = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
204 $modes = GeneralUtility::trimExplode('-', $specialFieldConfiguration['mode']);
205 }
206 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
207
208 $value = $this->streamlineLineBreaksForProcessing($value);
209
210 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
211 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
212
213 // Traverse modes
214 foreach ($modes as $cmd) {
215 if ($direction === 'db') {
216 // Checking for user defined transformation:
217 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
218 $_procObj = GeneralUtility::getUserObj($_classRef);
219 $_procObj->pObj = $this;
220 $_procObj->transformationKey = $cmd;
221 $value = $_procObj->transform_db($value, $this);
222 } else {
223 // ... else use defaults:
224 switch ($cmd) {
225 case 'detectbrokenlinks':
226 $value = $this->removeBrokenLinkMarkers($value);
227 break;
228 case 'ts_images':
229 $value = $this->TS_images_db($value);
230 break;
231 case 'ts_links':
232 $value = $this->TS_links_db($value);
233 break;
234 case 'css_transform':
235 // Transform empty paragraphs into spacing paragraphs
236 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
237 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
238 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
239 $value = $this->TS_transform_db($value);
240 break;
241 default:
242 // Do nothing
243 }
244 }
245 } elseif ($direction === 'rte') {
246 // Checking for user defined transformation:
247 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
248 $_procObj = GeneralUtility::getUserObj($_classRef);
249 $_procObj->pObj = $this;
250 $value = $_procObj->transform_rte($value, $this);
251 } else {
252 // ... else use defaults:
253 switch ($cmd) {
254 case 'detectbrokenlinks':
255 $value = $this->markBrokenLinks($value);
256 break;
257 case 'ts_images':
258 $value = $this->TS_images_rte($value);
259 break;
260 case 'ts_links':
261 $value = $this->TS_links_rte($value);
262 break;
263 case 'css_transform':
264 $value = $this->TS_transform_rte($value);
265 break;
266 default:
267 // Do nothing
268 }
269 }
270 }
271 }
272
273 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
274 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
275
276 // Final clean up of linebreaks
277 $value = $this->streamlineLineBreaksAfterProcessing($value);
278
279 return $value;
280 }
281
282 /**
283 * Ensures what transformation modes should be executed, and that they are only executed once.
284 *
285 * @param string $direction
286 * @param array $modes
287 * @return array the resolved transformation modes
288 */
289 protected function resolveAppliedTransformationModes(string $direction, array $modes)
290 {
291 $modeList = implode(',', $modes);
292
293 // Replace the shortcut "default" with all custom modes
294 $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
295 // Replace the shortcut "ts_css" with all custom modes
296 // @deprecated since TYPO3 v8, will be removed in TYPO3 v9 - NEXT line can be removed in v9
297 $modeList = str_replace('ts_css', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
298
299 // Make list unique
300 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
301 // Reverse order if direction is "rte"
302 if ($direction === 'rte') {
303 $modes = array_reverse($modes);
304 }
305
306 return $modes;
307 }
308
309 /**
310 * Runs the HTML parser if it is configured
311 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
312 * is done and thus totally independent processing options you can set up.
313 *
314 * This is only possible via TSconfig (procOptions) currently.
315 *
316 * @param string $content
317 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
318 * @return string the processed content
319 */
320 protected function runHtmlParserIfConfigured($content, $configurationDirective)
321 {
322 if ($this->procOptions[$configurationDirective]) {
323 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
324 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
325 }
326 return $content;
327 }
328
329 /************************************
330 *
331 * Specific RTE TRANSFORMATION functions
332 *
333 *************************************/
334 /**
335 * Transformation handler: 'ts_images' / direction: "db"
336 * Processing images inserted in the RTE.
337 * This is used when content goes from the RTE to the database.
338 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
339 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
340 * Also "magic" images are processed here.
341 *
342 * @param string $value The content from RTE going to Database
343 * @return string Processed content
344 */
345 public function TS_images_db($value)
346 {
347 // Split content by <img> tags and traverse the resulting array for processing:
348 $imgSplit = $this->splitTags('img', $value);
349 if (count($imgSplit) > 1) {
350 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
351 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
352 /** @var $resourceFactory Resource\ResourceFactory */
353 $resourceFactory = Resource\ResourceFactory::getInstance();
354 /** @var $magicImageService Resource\Service\MagicImageService */
355 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
356 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
357 foreach ($imgSplit as $k => $v) {
358 // Image found, do processing:
359 if ($k % 2) {
360 // Get attributes
361 list($attribArray) = $this->get_tag_attributes($v, true);
362 // It's always an absolute URL coming from the RTE into the Database.
363 $absoluteUrl = trim($attribArray['src']);
364 // Make path absolute if it is relative and we have a site path which is not '/'
365 $pI = pathinfo($absoluteUrl);
366 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
367 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
368 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
369 $absoluteUrl = $siteUrl . $absoluteUrl;
370 }
371 // Image dimensions set in the img tag, if any
372 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
373 if ($imgTagDimensions[0]) {
374 $attribArray['width'] = $imgTagDimensions[0];
375 }
376 if ($imgTagDimensions[1]) {
377 $attribArray['height'] = $imgTagDimensions[1];
378 }
379 $originalImageFile = null;
380 if ($attribArray['data-htmlarea-file-uid']) {
381 // An original image file uid is available
382 try {
383 /** @var $originalImageFile Resource\File */
384 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
385 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
386 // Log the fact the file could not be retrieved.
387 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
388 $this->getLogger()->error($message);
389 }
390 }
391 if ($originalImageFile instanceof Resource\File) {
392 // Public url of local file is relative to the site url, absolute otherwise
393 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
394 // This is a plain image, i.e. reference to the original image
395 if ($this->procOptions['plainImageMode']) {
396 // "plain image mode" is configured
397 // Find the dimensions of the original image
398 $imageInfo = [
399 $originalImageFile->getProperty('width'),
400 $originalImageFile->getProperty('height')
401 ];
402 if (!$imageInfo[0] || !$imageInfo[1]) {
403 $filePath = $originalImageFile->getForLocalProcessing(false);
404 $imageInfo = @getimagesize($filePath);
405 }
406 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
407 }
408 } else {
409 // Magic image case: get a processed file with the requested configuration
410 $imageConfiguration = [
411 'width' => $imgTagDimensions[0],
412 'height' => $imgTagDimensions[1]
413 ];
414 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
415 $attribArray['width'] = $magicImage->getProperty('width');
416 $attribArray['height'] = $magicImage->getProperty('height');
417 $attribArray['src'] = $magicImage->getPublicUrl();
418 }
419 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
420 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
421 // Fetch the external image
422 $externalFile = GeneralUtility::getUrl($absoluteUrl);
423 if ($externalFile) {
424 $pU = parse_url($absoluteUrl);
425 $pI = pathinfo($pU['path']);
426 $extension = strtolower($pI['extension']);
427 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
428 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
429 // We insert this image into the user default upload folder
430 list($table, $field) = explode(':', $this->elRef);
431 /** @var Resource\Folder $folder */
432 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
433 /** @var Resource\File $fileObject */
434 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
435 $imageConfiguration = [
436 'width' => $attribArray['width'],
437 'height' => $attribArray['height']
438 ];
439 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
440 $attribArray['width'] = $magicImage->getProperty('width');
441 $attribArray['height'] = $magicImage->getProperty('height');
442 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
443 $attribArray['src'] = $magicImage->getPublicUrl();
444 }
445 }
446 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
447 // Finally, check image as local file (siteURL equals the one of the image)
448 // Image has no data-htmlarea-file-uid attribute
449 // Relative path, rawurldecoded for special characters.
450 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
451 // Absolute filepath, locked to relative path of this project
452 $filepath = GeneralUtility::getFileAbsFileName($path);
453 // Check file existence (in relative directory to this installation!)
454 if ($filepath && @is_file($filepath)) {
455 // Treat it as a plain image
456 if ($this->procOptions['plainImageMode']) {
457 // If "plain image mode" has been configured
458 // Find the original dimensions of the image
459 $imageInfo = @getimagesize($filepath);
460 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
461 }
462 // Let's try to find a file uid for this image
463 try {
464 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
465 if ($fileOrFolderObject instanceof Resource\FileInterface) {
466 $fileIdentifier = $fileOrFolderObject->getIdentifier();
467 /** @var Resource\AbstractFile $fileObject */
468 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
469 // @todo if the retrieved file is a processed file, get the original file...
470 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
471 }
472 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
473 // Nothing to be done if file/folder not found
474 }
475 }
476 }
477 // Remove width and height from style attribute
478 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
479 // Must have alt attribute
480 if (!isset($attribArray['alt'])) {
481 $attribArray['alt'] = '';
482 }
483 // Convert absolute to relative url
484 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
485 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
486 }
487 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
488 }
489 }
490 }
491 return implode('', $imgSplit);
492 }
493
494 /**
495 * Transformation handler: 'ts_images' / direction: "rte"
496 * Processing images from database content going into the RTE.
497 * Processing includes converting the src attribute to an absolute URL.
498 *
499 * @param string $value Content input
500 * @return string Content output
501 */
502 public function TS_images_rte($value)
503 {
504 // Split content by <img> tags and traverse the resulting array for processing:
505 $imgSplit = $this->splitTags('img', $value);
506 if (count($imgSplit) > 1) {
507 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
508 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
509 foreach ($imgSplit as $k => $v) {
510 // Image found
511 if ($k % 2) {
512 // Get the attributes of the img tag
513 list($attribArray) = $this->get_tag_attributes($v, true);
514 $absoluteUrl = trim($attribArray['src']);
515 // Transform the src attribute into an absolute url, if it not already
516 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
517 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
518 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
519 $attribArray['src'] = $siteUrl . $attribArray['src'];
520 }
521 // Must have alt attribute
522 if (!isset($attribArray['alt'])) {
523 $attribArray['alt'] = '';
524 }
525 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
526 }
527 }
528 }
529 // Return processed content:
530 return implode('', $imgSplit);
531 }
532
533 /**
534 * Transformation handler: 'ts_links' / direction: "db"
535 * Processing anchor tags, and resolves them correctly again via the LinkService syntax
536 *
537 * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render
538 * the result.
539 *
540 * @param string $value Content input
541 * @return string Content output
542 * @see TS_links_rte()
543 */
544 public function TS_links_db($value)
545 {
546 $blockSplit = $this->splitIntoBlock('A', $value);
547 foreach ($blockSplit as $k => $v) {
548 if ($k % 2) {
549 list($tagAttributes) = $this->get_tag_attributes($this->getFirstTag($v), true);
550 $linkService = GeneralUtility::makeInstance(LinkService::class);
551 $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
552
553 // Modify parameters, this hook should be deprecated
554 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])
555 && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
556 $parameters = [
557 'currentBlock' => $v,
558 'linkInformation' => $linkInformation,
559 'url' => $linkInformation['href'],
560 'attributes' => $tagAttributes
561 ];
562 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
563 $processor = GeneralUtility::getUserObj($objRef);
564 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
565 }
566 } else {
567 // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
568 $tagAttributes['href'] = $linkService->asString($linkInformation);
569 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
570 . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>';
571 }
572 }
573 }
574 return implode('', $blockSplit);
575 }
576
577 /**
578 * Transformation handler: 'ts_links' / direction: "rte"
579 * Converting TYPO3-specific <link> tags to <a> tags
580 *
581 * This functionality is only used to convert legacy <link> tags to the new linking syntax using <a> tags, and will
582 * not be converted back to <link> tags anymore.
583 *
584 * @param string $value Content input
585 * @return string Content output
586 */
587 public function TS_links_rte($value)
588 {
589 $value = $this->TS_AtagToAbs($value);
590 // Split content by the TYPO3 pseudo tag "<link>"
591 $blockSplit = $this->splitIntoBlock('link', $value, true);
592 foreach ($blockSplit as $k => $v) {
593 // Block
594 if ($k % 2) {
595 // Split away the first "<link " part
596 $typoLinkData = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
597 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
598
599 // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
600 $linkService = GeneralUtility::makeInstance(LinkService::class);
601 $linkInformation = $linkService->resolve($tagCode['url']);
602
603 try {
604 $href = $linkService->asString($linkInformation);
605 } catch (UnknownLinkHandlerException $e) {
606 $href = '';
607 }
608
609 // Modify parameters by a hook
610 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
611 // backwards-compatibility: show an error message if the page is not found
612 $error = '';
613 if ($linkInformation['type'] === LinkService::TYPE_PAGE) {
614 $pageRecord = BackendUtility::getRecord('pages', $linkInformation['pageuid']);
615 // Page does not exist
616 if (!is_array($pageRecord)) {
617 $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
618 }
619 }
620 $parameters = [
621 'currentBlock' => $v,
622 'url' => $href,
623 'tagCode' => $tagCode,
624 'external' => $linkInformation['type'] === LinkService::TYPE_URL,
625 'error' => $error
626 ];
627 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
628 $processor = GeneralUtility::getUserObj($objRef);
629 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
630 }
631 } else {
632 $anchorAttributes = [
633 'href' => $href,
634 'target' => $tagCode['target'],
635 'class' => $tagCode['class'],
636 'title' => $tagCode['title']
637 ];
638
639 // Setting the <a> tag
640 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
641 . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k]))
642 . '</a>';
643 }
644 }
645 }
646 return implode('', $blockSplit);
647 }
648
649 /**
650 * Transformation handler: 'css_transform' / direction: "db"
651 * Cleaning (->db) for standard content elements (ts)
652 *
653 * @param string $value Content input
654 * @return string Content output
655 * @see TS_transform_rte()
656 */
657 public function TS_transform_db($value)
658 {
659 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
660 $this->TS_transform_db_safecounter--;
661 if ($this->TS_transform_db_safecounter < 0) {
662 return $value;
663 }
664 // Split the content from RTE by the occurrence of these blocks:
665 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
666
667 // Avoid superfluous linebreaks by transform_db after ending headListTag
668 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
669 array_pop($blockSplit);
670 }
671
672 // Traverse the blocks
673 foreach ($blockSplit as $k => $v) {
674 if ($k % 2) {
675 // Inside block:
676 // Init:
677 $tag = $this->getFirstTag($v);
678 $tagName = strtolower($this->getFirstTagName($v));
679 // Process based on the tag:
680 switch ($tagName) {
681 case 'blockquote':
682 case 'dd':
683 case 'div':
684 case 'header':
685 case 'section':
686 case 'footer':
687 case 'nav':
688 case 'article':
689 case 'aside':
690 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
691 break;
692 case 'pre':
693 break;
694 default:
695 // usually <hx> tags and <table> tags where no other block elements are within the tags
696 // Eliminate true linebreaks inside block element tags
697 $blockSplit[$k] = preg_replace(('/[' . LF . ']+/'), ' ', $blockSplit[$k]);
698 }
699 } else {
700 // NON-block:
701 if (trim($blockSplit[$k]) !== '') {
702 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
703 // Remove linebreaks preceding hr tags
704 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
705 // Remove linebreaks following hr tags
706 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
707 // Replace other linebreaks with space
708 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
709 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
710 } else {
711 unset($blockSplit[$k]);
712 }
713 }
714 }
715 $this->TS_transform_db_safecounter++;
716 return implode(LF, $blockSplit);
717 }
718
719 /**
720 * Wraps a-tags that contain a style attribute with a span-tag
721 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
722 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
723 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
724 * allowed.
725 *
726 * @param string $value Content input
727 * @return string Content output
728 */
729 public function transformStyledATags($value)
730 {
731 $blockSplit = $this->splitIntoBlock('A', $value);
732 foreach ($blockSplit as $k => $v) {
733 // If an A-tag was found
734 if ($k % 2) {
735 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
736 // If "style" attribute is set and rteerror is not set!
737 if ($attribArray['style'] && !$attribArray['rteerror']) {
738 $attribArray_copy['style'] = $attribArray['style'];
739 unset($attribArray['style']);
740 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
741 $eTag = '</a></span>';
742 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
743 }
744 }
745 }
746 return implode('', $blockSplit);
747 }
748
749 /**
750 * Transformation handler: css_transform / direction: "rte"
751 * Set (->rte) for standard content elements (ts)
752 *
753 * @param string $value Content input
754 * @return string Content output
755 * @see TS_transform_db()
756 */
757 public function TS_transform_rte($value)
758 {
759 // Split the content from database by the occurrence of the block elements
760 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
761 // Traverse the blocks
762 foreach ($blockSplit as $k => $v) {
763 if ($k % 2) {
764 // Inside one of the blocks:
765 // Init:
766 $tag = $this->getFirstTag($v);
767 $tagName = strtolower($this->getFirstTagName($v));
768 // Based on tagname, we do transformations:
769 switch ($tagName) {
770 case 'blockquote':
771 case 'dd':
772 case 'div':
773 case 'header':
774 case 'section':
775 case 'footer':
776 case 'nav':
777 case 'article':
778 case 'aside':
779 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
780 break;
781 }
782 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
783 } else {
784 // NON-block:
785 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
786 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
787 // If the line is followed by a block or is the last line:
788 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
789 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
790 if (!$onlyLineBreaks) {
791 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
792 } else {
793 // If the line contains only linebreaks, remove the leading linebreak
794 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
795 }
796 }
797 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
798 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
799 unset($blockSplit[$k]);
800 } else {
801 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
802 }
803 }
804 }
805 return implode(LF, $blockSplit);
806 }
807
808 /***************************************************************
809 *
810 * Generic RTE transformation, analysis and helper functions
811 *
812 **************************************************************/
813
814 /**
815 * Function for cleaning content going into the database.
816 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
817 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
818 *
819 * @param string $content Content to clean up
820 * @return string Clean content
821 * @see getKeepTags()
822 */
823 public function HTMLcleaner_db($content)
824 {
825 $keepTags = $this->getKeepTags('db');
826 // Default: remove unknown tags.
827 $keepUnknownTags = (bool)$this->procOptions['dontRemoveUnknownTags_db'];
828 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
829 }
830
831 /**
832 * Creates an array of configuration for the HTMLcleaner function based on whether content
833 * go TO or FROM the Rich Text Editor ($direction)
834 *
835 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
836 * @return array Configuration array
837 * @see HTMLcleaner_db()
838 */
839 public function getKeepTags($direction = 'rte')
840 {
841 if (!is_array($this->getKeepTags_cache[$direction])) {
842 // Setting up allowed tags:
843 // Default is to get allowed/denied tags from internal array of processing options:
844 // Construct default list of tags to keep:
845 if (is_array($this->procOptions['allowTags.'])) {
846 $keepTags = implode(',', $this->procOptions['allowTags.']);
847 } else {
848 $keepTags = $this->procOptions['allowTags'];
849 }
850 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
851 // For tags to deny, remove them from $keepTags array:
852 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
853 foreach ($denyTags as $dKe) {
854 unset($keepTags[$dKe]);
855 }
856 // Based on the direction of content, set further options:
857 switch ($direction) {
858 case 'rte':
859 // Transforming keepTags array so it can be understood by the HTMLcleaner function.
860 // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
861 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
862 break;
863 case 'db':
864 // Setting up span tags if they are allowed:
865 if (isset($keepTags['span'])) {
866 $keepTags['span'] = [
867 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
868 'fixAttrib' => [
869 'class' => [
870 'removeIfFalse' => 1
871 ]
872 ],
873 'rmTagIfNoAttrib' => 1
874 ];
875 if (!empty($this->allowedClasses)) {
876 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
877 }
878 }
879 // Setting further options, getting them from the processing options
880 $TSc = $this->procOptions['HTMLparser_db.'];
881 if (!$TSc['globalNesting']) {
882 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
883 }
884 if (!$TSc['noAttrib']) {
885 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
886 }
887 // Transforming the array from TypoScript to regular array:
888 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
889 break;
890 }
891 // Caching (internally, in object memory) the result
892 $this->getKeepTags_cache[$direction] = $keepTags;
893 }
894 // Return result:
895 return $this->getKeepTags_cache[$direction];
896 }
897
898 /**
899 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
900 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
901 * The function ->setDivTags does the opposite.
902 * This function processes content to go into the database.
903 *
904 * @param string $value Value to process.
905 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
906 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
907 * @return string Processed input value.
908 * @see setDivTags()
909 */
910 public function divideIntoLines($value, $count = 5, $returnArray = false)
911 {
912 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
913 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
914 // Returns plainly the content if there was no p sections in it
915 if (count($paragraphBlocks) <= 1 || $count <= 0) {
916 return $this->sanitizeLineBreaksForContentOnly($value);
917 }
918
919 // Traverse the splitted sections
920 foreach ($paragraphBlocks as $k => $v) {
921 if ($k % 2) {
922 // Inside a <p> section
923 $v = $this->removeFirstAndLastTag($v);
924 // Fetching 'sub-lines' - which will explode any further p nesting recursively
925 $subLines = $this->divideIntoLines($v, $count - 1, true);
926 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
927 if (is_array($subLines)) {
928 $paragraphBlocks[$k] = implode(LF, $subLines);
929 } else {
930 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
931 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
932 }
933 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
934 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
935 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
936 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
937 $paragraphBlocks[$k] = '';
938 }
939 } else {
940 // Outside a paragraph, if there is still something in there, just add a <p> tag
941 // Remove positions which are outside <p> tags and without content
942 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
943 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
944 if ((string)$paragraphBlocks[$k] === '') {
945 unset($paragraphBlocks[$k]);
946 } else {
947 // add <p> tags around the content
948 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
949 }
950 }
951 }
952 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
953 }
954
955 /**
956 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
957 * For processing of content going FROM database TO RTE.
958 *
959 * @param string $value Value to convert
960 * @return string Processed value.
961 * @see divideIntoLines()
962 */
963 public function setDivTags($value)
964 {
965 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
966 $keepTags = $this->getKeepTags('rte');
967 // Divide the content into lines
968 $parts = explode(LF, $value);
969 foreach ($parts as $k => $v) {
970 // Processing of line content:
971 // If the line is blank, set it to &nbsp;
972 if (trim($parts[$k]) === '') {
973 $parts[$k] = '&nbsp;';
974 } else {
975 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
976 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
977 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
978 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
979 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
980 }
981 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
982 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
983 $testStr = strtolower(trim($parts[$k]));
984 if (substr($testStr, 0, 4) !== '<div' || substr($testStr, -6) !== '</div>') {
985 if (substr($testStr, 0, 2) !== '<p' || substr($testStr, -4) !== '</p>') {
986 // Only set p-tags if there is not already div or p tags:
987 $parts[$k] = '<p>' . $parts[$k] . '</p>';
988 }
989 }
990 }
991 }
992 // Implode result:
993 return implode(LF, $parts);
994 }
995
996 /**
997 * Used for transformation from RTE to DB
998 *
999 * Works on a single line within a <p> tag when storing into the database
1000 * This always adds <p> tags and validates the arguments,
1001 * additionally the content is cleaned up via the HTMLcleaner.
1002 *
1003 * @param string $content the content within the <p> tag
1004 * @param string $fullContentWithTag the whole <p> tag surrounded as well
1005 *
1006 * @return string the full <p> tag with cleaned content
1007 */
1008 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
1009 {
1010 // clean up the content
1011 $content = $this->HTMLcleaner_db($content);
1012 // Get the <p> tag, and validate the attributes
1013 $fTag = $this->getFirstTag($fullContentWithTag);
1014 // Check which attributes of the <p> tag to keep attributes
1015 if (!empty($this->allowedAttributesForParagraphTags)) {
1016 list($tagAttributes) = $this->get_tag_attributes($fTag);
1017 // Make sure the tag attributes only contain the ones that are defined to be allowed
1018 $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
1019
1020 // Only allow classes that are whitelisted in $this->allowedClasses
1021 if (trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1022 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1023 $classes = array_intersect($classes, $this->allowedClasses);
1024 if (!empty($classes)) {
1025 $tagAttributes['class'] = implode(' ', $classes);
1026 } else {
1027 unset($tagAttributes['class']);
1028 }
1029 }
1030 } else {
1031 $tagAttributes = [];
1032 }
1033 // Remove any line break
1034 $content = str_replace(LF, '', $content);
1035 // Compile the surrounding <p> tag
1036 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1037 return $content;
1038 }
1039
1040 /**
1041 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1042 *
1043 * @param string $content
1044 * @return string the modified content
1045 */
1046 protected function sanitizeLineBreaksForContentOnly(string $content)
1047 {
1048 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1049 $content = str_replace(LF . LF, LF, $content);
1050 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1051 return $content;
1052 }
1053
1054 /**
1055 * Finds width and height from attrib-array
1056 * If the width and height is found in the style-attribute, use that!
1057 *
1058 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1059 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1060 */
1061 public function getWHFromAttribs($attribArray)
1062 {
1063 $style = trim($attribArray['style']);
1064 $w = 0;
1065 $h = 0;
1066 if ($style) {
1067 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1068 // Width
1069 $reg = [];
1070 preg_match('/width' . $regex . '/i', $style, $reg);
1071 $w = (int)$reg[1];
1072 // Height
1073 preg_match('/height' . $regex . '/i', $style, $reg);
1074 $h = (int)$reg[1];
1075 }
1076 if (!$w) {
1077 $w = $attribArray['width'];
1078 }
1079 if (!$h) {
1080 $h = $attribArray['height'];
1081 }
1082 return [(int)$w, (int)$h];
1083 }
1084
1085 /**
1086 * Parse <A>-tag href and return status of email,external,file or page
1087 * This functionality is not in use anymore
1088 *
1089 * @param string $url URL to analyse.
1090 * @return array Information in an array about the URL
1091 */
1092 public function urlInfoForLinkTags($url)
1093 {
1094 $info = [];
1095 $url = trim($url);
1096 if (substr(strtolower($url), 0, 7) === 'mailto:') {
1097 $info['url'] = trim(substr($url, 7));
1098 $info['type'] = 'email';
1099 } elseif (strpos($url, '?file:') !== false) {
1100 $info['type'] = 'file';
1101 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1102 } else {
1103 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1104 $urlLength = strlen($url);
1105 $a = 0;
1106 for (; $a < $urlLength; $a++) {
1107 if ($url[$a] != $curURL[$a]) {
1108 break;
1109 }
1110 }
1111 $info['relScriptPath'] = substr($curURL, $a);
1112 $info['relUrl'] = substr($url, $a);
1113 $info['url'] = $url;
1114 $info['type'] = 'ext';
1115 $siteUrl_parts = parse_url($url);
1116 $curUrl_parts = parse_url($curURL);
1117 // Hosts should match
1118 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1119 // If the script path seems to match or is empty (FE-EDIT)
1120 // New processing order 100502
1121 $uP = parse_url($info['relUrl']);
1122 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1123 $info['url'] = $info['relUrl'];
1124 $info['type'] = 'anchor';
1125 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1126 // URL is a page (id parameter)
1127 $pp = preg_split('/^id=/', $uP['query']);
1128 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1129 $parameters = explode('&', $pp[1]);
1130 $id = array_shift($parameters);
1131 if ($id) {
1132 $info['pageid'] = $id;
1133 $info['cElement'] = $uP['fragment'];
1134 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1135 $info['type'] = 'page';
1136 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1137 }
1138 } else {
1139 $info['url'] = $info['relUrl'];
1140 $info['type'] = 'file';
1141 }
1142 } else {
1143 unset($info['relScriptPath']);
1144 unset($info['relUrl']);
1145 }
1146 }
1147 return $info;
1148 }
1149
1150 /**
1151 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1152 *
1153 * @param string $value Content input
1154 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set. (not in use anymore)
1155 * @return string Content output
1156 */
1157 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1158 {
1159 $blockSplit = $this->splitIntoBlock('A', $value);
1160 foreach ($blockSplit as $k => $v) {
1161 // Block
1162 if ($k % 2) {
1163 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1164 // Checking if there is a scheme, and if not, prepend the current url.
1165 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1166 if ($attribArray['href'] !== '') {
1167 $uP = parse_url(strtolower($attribArray['href']));
1168 if (!$uP['scheme']) {
1169 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1170 }
1171 }
1172 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1173 $eTag = '</a>';
1174 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1175 }
1176 }
1177 return implode('', $blockSplit);
1178 }
1179
1180 /**
1181 * Apply plain image settings to the dimensions of the image
1182 *
1183 * @param array $imageInfo: info array of the image
1184 * @param array $attribArray: array of attributes of an image tag
1185 *
1186 * @return array a modified attributes array
1187 */
1188 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1189 {
1190 if ($this->procOptions['plainImageMode']) {
1191 // Perform corrections to aspect ratio based on configuration
1192 switch ((string)$this->procOptions['plainImageMode']) {
1193 case 'lockDimensions':
1194 $attribArray['width'] = $imageInfo[0];
1195 $attribArray['height'] = $imageInfo[1];
1196 break;
1197 case 'lockRatioWhenSmaller':
1198 if ($attribArray['width'] > $imageInfo[0]) {
1199 $attribArray['width'] = $imageInfo[0];
1200 }
1201 if ($imageInfo[0] > 0) {
1202 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1203 }
1204 break;
1205 case 'lockRatio':
1206 if ($imageInfo[0] > 0) {
1207 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1208 }
1209 break;
1210 }
1211 }
1212 return $attribArray;
1213 }
1214
1215 /**
1216 * Called before any processing / transformation is made
1217 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1218 * CR has a very disturbing effect, so just remove all CR and rely on LF
1219 *
1220 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1221 *
1222 * @param string $content the content to process
1223 * @return string the modified content
1224 */
1225 protected function streamlineLineBreaksForProcessing(string $content)
1226 {
1227 return str_replace(CR, '', $content);
1228 }
1229
1230 /**
1231 * Called after any processing / transformation was made
1232 * just before the content is returned by the RTE parser all line breaks
1233 * get unified to be "CRLF"s again.
1234 *
1235 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1236 *
1237 * @param string $content the content to process
1238 * @return string the modified content
1239 */
1240 protected function streamlineLineBreaksAfterProcessing(string $content)
1241 {
1242 // Make sure no \r\n sequences has entered in the meantime
1243 $content = $this->streamlineLineBreaksForProcessing($content);
1244 // ... and then change all \n into \r\n
1245 return str_replace(LF, CRLF, $content);
1246 }
1247
1248 /**
1249 * Content Transformation from DB to RTE
1250 * Checks all <a> tags which reference a t3://page and checks if the page is available
1251 * If not, some offensive styling is added.
1252 *
1253 * @param string $content
1254 * @return string the modified content
1255 */
1256 protected function markBrokenLinks(string $content): string
1257 {
1258 $blocks = $this->splitIntoBlock('A', $content);
1259 $linkService = GeneralUtility::makeInstance(LinkService::class);
1260 foreach ($blocks as $position => $value) {
1261 if ($position % 2 === 0) {
1262 continue;
1263 }
1264 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1265 if (empty($attributes['href'])) {
1266 continue;
1267 }
1268 $hrefInformation = $linkService->resolve($attributes['href']);
1269 if ($hrefInformation['type'] === LinkService::TYPE_PAGE && $hrefInformation['pageuid'] !== 'current') {
1270 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1271 if (!is_array($pageRecord)) {
1272 // Page does not exist
1273 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1274 $styling = 'background-color: yellow; border:2px red solid; color: black;';
1275 if (empty($attributes['style'])) {
1276 $attributes['style'] = $styling;
1277 } else {
1278 $attributes['style'] .= ' ' . $styling;
1279 }
1280 }
1281 }
1282 // Always rewrite the block to allow the nested calling even if a page is found
1283 $blocks[$position] =
1284 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1285 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1286 . '</a>';
1287 }
1288 return implode('', $blocks);
1289 }
1290
1291 /**
1292 * Content Transformation from RTE to DB
1293 * Removes link information error attributes from <a> tags that are added to broken links
1294 *
1295 * @param string $content the content to process
1296 * @return string the modified content
1297 */
1298 protected function removeBrokenLinkMarkers(string $content): string
1299 {
1300 $blocks = $this->splitIntoBlock('A', $content);
1301 foreach ($blocks as $position => $value) {
1302 if ($position % 2 === 0) {
1303 continue;
1304 }
1305 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1306 if (empty($attributes['href'])) {
1307 continue;
1308 }
1309 // Always remove the styling again (regardless of the page was found or not)
1310 // so the database does not contain ugly stuff
1311 unset($attributes['data-rte-error']);
1312 if (isset($attributes['style'])) {
1313 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1314 if (empty($attributes['style'])) {
1315 unset($attributes['style']);
1316 }
1317 }
1318 $blocks[$position] =
1319 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1320 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1321 . '</a>';
1322 }
1323 return implode('', $blocks);
1324 }
1325
1326 /**
1327 * Instantiates a logger
1328 *
1329 * @return \TYPO3\CMS\Core\Log\Logger
1330 */
1331 protected function getLogger()
1332 {
1333 /** @var $logManager LogManager */
1334 $logManager = GeneralUtility::makeInstance(LogManager::class);
1335 return $logManager->getLogger(get_class($this));
1336 }
1337 }