793505bea36ea6008957a90ff73ca69191fe3701
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\LinkHandling\LinkService;
19 use TYPO3\CMS\Core\Log\LogManager;
20 use TYPO3\CMS\Core\Resource;
21 use TYPO3\CMS\Core\Utility\GeneralUtility;
22 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
23
24 /**
25 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
26 *
27 * Concerning line breaks:
28 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
29 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
30 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
31 */
32 class RteHtmlParser extends HtmlParser
33 {
34 /**
35 * List of elements that are not wrapped into a "p" tag while doing the transformation.
36 * @var string
37 */
38 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
39
40 /**
41 * List of all tags that are allowed by default
42 * @var string
43 */
44 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
45
46 /**
47 * Set this to the pid of the record manipulated by the class.
48 *
49 * @var int
50 */
51 public $recPid = 0;
52
53 /**
54 * Element reference [table]:[field], eg. "tt_content:bodytext"
55 *
56 * @var string
57 */
58 public $elRef = '';
59
60 /**
61 * Current Page TSConfig
62 *
63 * @var array
64 */
65 public $tsConfig = [];
66
67 /**
68 * Set to the TSconfig options coming from Page TSconfig
69 *
70 * @var array
71 */
72 public $procOptions = [];
73
74 /**
75 * Run-away brake for recursive calls.
76 *
77 * @var int
78 */
79 public $TS_transform_db_safecounter = 100;
80
81 /**
82 * Data caching for processing function
83 *
84 * @var array
85 */
86 public $getKeepTags_cache = [];
87
88 /**
89 * Storage of the allowed CSS class names in the RTE
90 *
91 * @var array
92 */
93 public $allowedClasses = [];
94
95 /**
96 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
97 * they have a special place for configuration via 'proc.keepPDIVattribs'
98 *
99 * @var array
100 */
101 protected $allowedAttributesForParagraphTags = [
102 'class',
103 'align',
104 'id',
105 'title',
106 'dir',
107 'lang',
108 'xml:lang',
109 'itemscope',
110 'itemtype',
111 'itemprop'
112 ];
113
114 /**
115 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
116 * plus some special tags like <hr> and <img> (if images are allowed).
117 * Completely overrideable via 'proc.allowTagsOutside'
118 *
119 * @var array
120 */
121 protected $allowedTagsOutsideOfParagraphs = [
122 'address',
123 'article',
124 'aside',
125 'blockquote',
126 'div',
127 'footer',
128 'header',
129 'hr',
130 'nav',
131 'section'
132 ];
133
134 /**
135 * Initialize, setting element reference and record PID
136 *
137 * @param string $elRef Element reference, eg "tt_content:bodytext
138 * @param int $recPid PID of the record (page id)
139 * @return void
140 */
141 public function init($elRef = '', $recPid = 0)
142 {
143 $this->recPid = $recPid;
144 $this->elRef = $elRef;
145 }
146
147 /**********************************************
148 *
149 * Main function
150 *
151 **********************************************/
152 /**
153 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
154 * This is the main function called from DataHandler and transfer data classes
155 *
156 * @param string $value Input value
157 * @param array $specConf deprecated old "defaultExtras" parsed as array
158 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
159 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
160 * @return string Output value
161 */
162 public function RTE_transform($value, $specConf = [], $direction = 'rte', $thisConfig = [])
163 {
164 $this->tsConfig = $thisConfig;
165 $this->procOptions = (array)$thisConfig['proc.'];
166 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
167
168 // Dynamic configuration of blockElementList
169 if ($this->procOptions['blockElementList']) {
170 $this->blockElementList = $this->procOptions['blockElementList'];
171 }
172
173 // Define which attributes are allowed on <p> tags
174 if (isset($this->procOptions['keepPDIVattribs'])) {
175 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
176 }
177 // Override tags which are allowed outside of <p> tags
178 if (isset($this->procOptions['allowTagsOutside'])) {
179 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
180 }
181
182 // Setting modes / transformations to be called
183 if ((string)$this->procOptions['overruleMode'] !== '') {
184 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
185 } else {
186 // Get parameters for rte_transformation:
187 // @deprecated since TYPO3 v8, will be removed in TYPO3 v9 - the else{} part can be removed in v9
188 GeneralUtility::deprecationLog(
189 'Argument 2 of RteHtmlParser::RTE_transform() is deprecated. Transformations should be given in $thisConfig[\'proc.\'][\'overruleMode\']'
190 );
191 $specialFieldConfiguration = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
192 $modes = GeneralUtility::trimExplode('-', $specialFieldConfiguration['mode']);
193 }
194 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
195
196 $value = $this->streamlineLineBreaksForProcessing($value);
197
198 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
199 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
200
201 // Traverse modes
202 foreach ($modes as $cmd) {
203 if ($direction == 'db') {
204 // Checking for user defined transformation:
205 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
206 $_procObj = GeneralUtility::getUserObj($_classRef);
207 $_procObj->pObj = $this;
208 $_procObj->transformationKey = $cmd;
209 $value = $_procObj->transform_db($value, $this);
210 } else {
211 // ... else use defaults:
212 switch ($cmd) {
213 case 'detectbrokenlinks':
214 $value = $this->removeBrokenLinkMarkers($value);
215 break;
216 case 'ts_images':
217 $value = $this->TS_images_db($value);
218 break;
219 case 'ts_links':
220 $value = $this->TS_links_db($value);
221 break;
222 case 'css_transform':
223 // Transform empty paragraphs into spacing paragraphs
224 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
225 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
226 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
227 $value = $this->TS_transform_db($value);
228 break;
229 default:
230 // Do nothing
231 }
232 }
233 } elseif ($direction == 'rte') {
234 // Checking for user defined transformation:
235 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
236 $_procObj = GeneralUtility::getUserObj($_classRef);
237 $_procObj->pObj = $this;
238 $value = $_procObj->transform_rte($value, $this);
239 } else {
240 // ... else use defaults:
241 switch ($cmd) {
242 case 'detectbrokenlinks':
243 $value = $this->markBrokenLinks($value);
244 break;
245 case 'ts_images':
246 $value = $this->TS_images_rte($value);
247 break;
248 case 'ts_links':
249 $value = $this->TS_links_rte($value);
250 break;
251 case 'css_transform':
252 $value = $this->TS_transform_rte($value);
253 break;
254 default:
255 // Do nothing
256 }
257 }
258 }
259 }
260
261 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
262 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
263
264 // Final clean up of linebreaks
265 $value = $this->streamlineLineBreaksAfterProcessing($value);
266
267 return $value;
268 }
269
270 /**
271 * Ensures what transformation modes should be executed, and that they are only executed once.
272 *
273 * @param string $direction
274 * @param array $modes
275 * @return array the resolved transformation modes
276 */
277 protected function resolveAppliedTransformationModes(string $direction, array $modes)
278 {
279 $modeList = implode(',', $modes);
280
281 // Replace the shortcut "default" with all custom modes
282 $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
283 // Replace the shortcut "ts_css" with all custom modes
284 // @deprecated since TYPO3 v8, will be removed in TYPO3 v9 - NEXT line can be removed in v9
285 $modeList = str_replace('ts_css', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
286
287 // Make list unique
288 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
289 // Reverse order if direction is "rte"
290 if ($direction === 'rte') {
291 $modes = array_reverse($modes);
292 }
293
294 return $modes;
295 }
296
297 /**
298 * Runs the HTML parser if it is configured
299 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
300 * is done and thus totally independent processing options you can set up.
301 *
302 * This is only possible via TSconfig (procOptions) currently.
303 *
304 * @param string $content
305 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
306 * @return string the processed content
307 */
308 protected function runHtmlParserIfConfigured($content, $configurationDirective)
309 {
310 if ($this->procOptions[$configurationDirective]) {
311 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
312 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
313 }
314 return $content;
315 }
316
317 /************************************
318 *
319 * Specific RTE TRANSFORMATION functions
320 *
321 *************************************/
322 /**
323 * Transformation handler: 'ts_images' / direction: "db"
324 * Processing images inserted in the RTE.
325 * This is used when content goes from the RTE to the database.
326 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
327 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
328 * Also "magic" images are processed here.
329 *
330 * @param string $value The content from RTE going to Database
331 * @return string Processed content
332 */
333 public function TS_images_db($value)
334 {
335 // Split content by <img> tags and traverse the resulting array for processing:
336 $imgSplit = $this->splitTags('img', $value);
337 if (count($imgSplit) > 1) {
338 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
339 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
340 /** @var $resourceFactory Resource\ResourceFactory */
341 $resourceFactory = Resource\ResourceFactory::getInstance();
342 /** @var $magicImageService Resource\Service\MagicImageService */
343 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
344 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
345 foreach ($imgSplit as $k => $v) {
346 // Image found, do processing:
347 if ($k % 2) {
348 // Get attributes
349 list($attribArray) = $this->get_tag_attributes($v, true);
350 // It's always an absolute URL coming from the RTE into the Database.
351 $absoluteUrl = trim($attribArray['src']);
352 // Make path absolute if it is relative and we have a site path which is not '/'
353 $pI = pathinfo($absoluteUrl);
354 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
355 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
356 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
357 $absoluteUrl = $siteUrl . $absoluteUrl;
358 }
359 // Image dimensions set in the img tag, if any
360 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
361 if ($imgTagDimensions[0]) {
362 $attribArray['width'] = $imgTagDimensions[0];
363 }
364 if ($imgTagDimensions[1]) {
365 $attribArray['height'] = $imgTagDimensions[1];
366 }
367 $originalImageFile = null;
368 if ($attribArray['data-htmlarea-file-uid']) {
369 // An original image file uid is available
370 try {
371 /** @var $originalImageFile Resource\File */
372 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
373 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
374 // Log the fact the file could not be retrieved.
375 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
376 $this->getLogger()->error($message);
377 }
378 }
379 if ($originalImageFile instanceof Resource\File) {
380 // Public url of local file is relative to the site url, absolute otherwise
381 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
382 // This is a plain image, i.e. reference to the original image
383 if ($this->procOptions['plainImageMode']) {
384 // "plain image mode" is configured
385 // Find the dimensions of the original image
386 $imageInfo = [
387 $originalImageFile->getProperty('width'),
388 $originalImageFile->getProperty('height')
389 ];
390 if (!$imageInfo[0] || !$imageInfo[1]) {
391 $filePath = $originalImageFile->getForLocalProcessing(false);
392 $imageInfo = @getimagesize($filePath);
393 }
394 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
395 }
396 } else {
397 // Magic image case: get a processed file with the requested configuration
398 $imageConfiguration = [
399 'width' => $imgTagDimensions[0],
400 'height' => $imgTagDimensions[1]
401 ];
402 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
403 $attribArray['width'] = $magicImage->getProperty('width');
404 $attribArray['height'] = $magicImage->getProperty('height');
405 $attribArray['src'] = $magicImage->getPublicUrl();
406 }
407 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
408 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
409 // Fetch the external image
410 $externalFile = GeneralUtility::getUrl($absoluteUrl);
411 if ($externalFile) {
412 $pU = parse_url($absoluteUrl);
413 $pI = pathinfo($pU['path']);
414 $extension = strtolower($pI['extension']);
415 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
416 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
417 // We insert this image into the user default upload folder
418 list($table, $field) = explode(':', $this->elRef);
419 /** @var Resource\Folder $folder */
420 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
421 /** @var Resource\File $fileObject */
422 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
423 $imageConfiguration = [
424 'width' => $attribArray['width'],
425 'height' => $attribArray['height']
426 ];
427 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
428 $attribArray['width'] = $magicImage->getProperty('width');
429 $attribArray['height'] = $magicImage->getProperty('height');
430 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
431 $attribArray['src'] = $magicImage->getPublicUrl();
432 }
433 }
434 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
435 // Finally, check image as local file (siteURL equals the one of the image)
436 // Image has no data-htmlarea-file-uid attribute
437 // Relative path, rawurldecoded for special characters.
438 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
439 // Absolute filepath, locked to relative path of this project
440 $filepath = GeneralUtility::getFileAbsFileName($path);
441 // Check file existence (in relative directory to this installation!)
442 if ($filepath && @is_file($filepath)) {
443 // Treat it as a plain image
444 if ($this->procOptions['plainImageMode']) {
445 // If "plain image mode" has been configured
446 // Find the original dimensions of the image
447 $imageInfo = @getimagesize($filepath);
448 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
449 }
450 // Let's try to find a file uid for this image
451 try {
452 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
453 if ($fileOrFolderObject instanceof Resource\FileInterface) {
454 $fileIdentifier = $fileOrFolderObject->getIdentifier();
455 /** @var Resource\AbstractFile $fileObject */
456 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
457 // @todo if the retrieved file is a processed file, get the original file...
458 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
459 }
460 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
461 // Nothing to be done if file/folder not found
462 }
463 }
464 }
465 // Remove width and height from style attribute
466 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
467 // Must have alt attribute
468 if (!isset($attribArray['alt'])) {
469 $attribArray['alt'] = '';
470 }
471 // Convert absolute to relative url
472 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
473 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
474 }
475 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
476 }
477 }
478 }
479 return implode('', $imgSplit);
480 }
481
482 /**
483 * Transformation handler: 'ts_images' / direction: "rte"
484 * Processing images from database content going into the RTE.
485 * Processing includes converting the src attribute to an absolute URL.
486 *
487 * @param string $value Content input
488 * @return string Content output
489 */
490 public function TS_images_rte($value)
491 {
492 // Split content by <img> tags and traverse the resulting array for processing:
493 $imgSplit = $this->splitTags('img', $value);
494 if (count($imgSplit) > 1) {
495 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
496 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
497 foreach ($imgSplit as $k => $v) {
498 // Image found
499 if ($k % 2) {
500 // Get the attributes of the img tag
501 list($attribArray) = $this->get_tag_attributes($v, true);
502 $absoluteUrl = trim($attribArray['src']);
503 // Transform the src attribute into an absolute url, if it not already
504 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
505 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
506 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
507 $attribArray['src'] = $siteUrl . $attribArray['src'];
508 }
509 // Must have alt attribute
510 if (!isset($attribArray['alt'])) {
511 $attribArray['alt'] = '';
512 }
513 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
514 }
515 }
516 }
517 // Return processed content:
518 return implode('', $imgSplit);
519 }
520
521 /**
522 * Transformation handler: 'ts_links' / direction: "db"
523 * Processing anchor tags, and resolves them correctly again via the LinkService syntax
524 *
525 * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render
526 * the result.
527 *
528 * @param string $value Content input
529 * @return string Content output
530 * @see TS_links_rte()
531 */
532 public function TS_links_db($value)
533 {
534 $blockSplit = $this->splitIntoBlock('A', $value);
535 foreach ($blockSplit as $k => $v) {
536 if ($k % 2) {
537 list($tagAttributes) = $this->get_tag_attributes($this->getFirstTag($v), true);
538 $linkService = GeneralUtility::makeInstance(LinkService::class);
539 $linkInformation = $linkService->resolve($tagAttributes['href']);
540
541 // Modify parameters, this hook should be deprecated
542 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])
543 && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
544 $parameters = [
545 'currentBlock' => $v,
546 'linkInformation' => $linkInformation,
547 'url' => $linkInformation['href'],
548 'attributes' => $tagAttributes
549 ];
550 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
551 $processor = GeneralUtility::getUserObj($objRef);
552 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
553 }
554 } else {
555 // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
556 $tagAttributes['href'] = $linkService->asString($linkInformation);
557 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
558 . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>';
559 }
560 }
561 }
562 return implode('', $blockSplit);
563 }
564
565 /**
566 * Transformation handler: 'ts_links' / direction: "rte"
567 * Converting TYPO3-specific <link> tags to <a> tags
568 *
569 * This functionality is only used to convert legacy <link> tags to the new linking syntax using <a> tags, and will
570 * not be converted back to <link> tags anymore.
571 *
572 * @param string $value Content input
573 * @return string Content output
574 */
575 public function TS_links_rte($value)
576 {
577 $value = $this->TS_AtagToAbs($value);
578 // Split content by the TYPO3 pseudo tag "<link>"
579 $blockSplit = $this->splitIntoBlock('link', $value, true);
580 foreach ($blockSplit as $k => $v) {
581 // Block
582 if ($k % 2) {
583 // Split away the first "<link " part
584 $typoLinkData = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
585 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
586
587 // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
588 $linkService = GeneralUtility::makeInstance(LinkService::class);
589 $linkInformation = $linkService->resolve($tagCode['url']);
590
591 $href = $linkService->asString($linkInformation);
592
593 // Modify parameters by a hook
594 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
595 // backwards-compatibility: show an error message if the page is not found
596 $error = '';
597 if ($linkInformation['type'] === LinkService::TYPE_PAGE) {
598 $pageRecord = BackendUtility::getRecord('pages', $linkInformation['pageuid']);
599 // Page does not exist
600 if (!is_array($pageRecord)) {
601 $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
602 }
603 }
604 $parameters = [
605 'currentBlock' => $v,
606 'url' => $href,
607 'tagCode' => $tagCode,
608 'external' => $linkInformation['type'] === LinkService::TYPE_URL,
609 'error' => $error
610 ];
611 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
612 $processor = GeneralUtility::getUserObj($objRef);
613 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
614 }
615 } else {
616 $anchorAttributes = [
617 'href' => $href,
618 'target' => $tagCode['target'],
619 'class' => $tagCode['class'],
620 'title' => $tagCode['title']
621 ];
622
623 // Setting the <a> tag
624 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
625 . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k]))
626 . '</a>';
627 }
628 }
629 }
630 return implode('', $blockSplit);
631 }
632
633 /**
634 * Transformation handler: 'css_transform' / direction: "db"
635 * Cleaning (->db) for standard content elements (ts)
636 *
637 * @param string $value Content input
638 * @return string Content output
639 * @see TS_transform_rte()
640 */
641 public function TS_transform_db($value)
642 {
643 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
644 $this->TS_transform_db_safecounter--;
645 if ($this->TS_transform_db_safecounter < 0) {
646 return $value;
647 }
648 // Split the content from RTE by the occurrence of these blocks:
649 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
650
651 // Avoid superfluous linebreaks by transform_db after ending headListTag
652 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
653 array_pop($blockSplit);
654 }
655
656 // Traverse the blocks
657 foreach ($blockSplit as $k => $v) {
658 if ($k % 2) {
659 // Inside block:
660 // Init:
661 $tag = $this->getFirstTag($v);
662 $tagName = strtolower($this->getFirstTagName($v));
663 // Process based on the tag:
664 switch ($tagName) {
665 case 'blockquote':
666 case 'dd':
667 case 'div':
668 case 'header':
669 case 'section':
670 case 'footer':
671 case 'nav':
672 case 'article':
673 case 'aside':
674 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
675 break;
676 case 'pre':
677 break;
678 default:
679 // usually <hx> tags and <table> tags where no other block elements are within the tags
680 // Eliminate true linebreaks inside block element tags
681 $blockSplit[$k] = preg_replace(('/[' . LF . ']+/'), ' ', $blockSplit[$k]);
682 }
683 } else {
684 // NON-block:
685 if (trim($blockSplit[$k]) !== '') {
686 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
687 // Remove linebreaks preceding hr tags
688 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
689 // Remove linebreaks following hr tags
690 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
691 // Replace other linebreaks with space
692 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
693 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
694 } else {
695 unset($blockSplit[$k]);
696 }
697 }
698 }
699 $this->TS_transform_db_safecounter++;
700 return implode(LF, $blockSplit);
701 }
702
703 /**
704 * Wraps a-tags that contain a style attribute with a span-tag
705 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
706 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
707 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
708 * allowed.
709 *
710 * @param string $value Content input
711 * @return string Content output
712 */
713 public function transformStyledATags($value)
714 {
715 $blockSplit = $this->splitIntoBlock('A', $value);
716 foreach ($blockSplit as $k => $v) {
717 // If an A-tag was found
718 if ($k % 2) {
719 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
720 // If "style" attribute is set and rteerror is not set!
721 if ($attribArray['style'] && !$attribArray['rteerror']) {
722 $attribArray_copy['style'] = $attribArray['style'];
723 unset($attribArray['style']);
724 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
725 $eTag = '</a></span>';
726 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
727 }
728 }
729 }
730 return implode('', $blockSplit);
731 }
732
733 /**
734 * Transformation handler: css_transform / direction: "rte"
735 * Set (->rte) for standard content elements (ts)
736 *
737 * @param string $value Content input
738 * @return string Content output
739 * @see TS_transform_db()
740 */
741 public function TS_transform_rte($value)
742 {
743 // Split the content from database by the occurrence of the block elements
744 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
745 // Traverse the blocks
746 foreach ($blockSplit as $k => $v) {
747 if ($k % 2) {
748 // Inside one of the blocks:
749 // Init:
750 $tag = $this->getFirstTag($v);
751 $tagName = strtolower($this->getFirstTagName($v));
752 // Based on tagname, we do transformations:
753 switch ($tagName) {
754 case 'blockquote':
755 case 'dd':
756 case 'div':
757 case 'header':
758 case 'section':
759 case 'footer':
760 case 'nav':
761 case 'article':
762 case 'aside':
763 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
764 break;
765 }
766 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
767 } else {
768 // NON-block:
769 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
770 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
771 // If the line is followed by a block or is the last line:
772 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
773 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
774 if (!$onlyLineBreaks) {
775 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
776 } else {
777 // If the line contains only linebreaks, remove the leading linebreak
778 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
779 }
780 }
781 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
782 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
783 unset($blockSplit[$k]);
784 } else {
785 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
786 }
787 }
788 }
789 return implode(LF, $blockSplit);
790 }
791
792 /***************************************************************
793 *
794 * Generic RTE transformation, analysis and helper functions
795 *
796 **************************************************************/
797
798 /**
799 * Function for cleaning content going into the database.
800 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
801 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
802 *
803 * @param string $content Content to clean up
804 * @return string Clean content
805 * @see getKeepTags()
806 */
807 public function HTMLcleaner_db($content)
808 {
809 $keepTags = $this->getKeepTags('db');
810 // Default: remove unknown tags.
811 $keepUnknownTags = (bool)$this->procOptions['dontRemoveUnknownTags_db'];
812 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
813 }
814
815 /**
816 * Creates an array of configuration for the HTMLcleaner function based on whether content
817 * go TO or FROM the Rich Text Editor ($direction)
818 *
819 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
820 * @return array Configuration array
821 * @see HTMLcleaner_db()
822 */
823 public function getKeepTags($direction = 'rte')
824 {
825 if (!is_array($this->getKeepTags_cache[$direction])) {
826 // Setting up allowed tags:
827 // Default is to get allowed/denied tags from internal array of processing options:
828 // Construct default list of tags to keep:
829 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($this->procOptions['allowTags']), true));
830 // For tags to deny, remove them from $keepTags array:
831 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
832 foreach ($denyTags as $dKe) {
833 unset($keepTags[$dKe]);
834 }
835 // Based on the direction of content, set further options:
836 switch ($direction) {
837 case 'rte':
838 // Transforming keepTags array so it can be understood by the HTMLcleaner function.
839 // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
840 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
841 break;
842 case 'db':
843 // Setting up span tags if they are allowed:
844 if (isset($keepTags['span'])) {
845 $keepTags['span'] = [
846 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
847 'fixAttrib' => [
848 'class' => [
849 'removeIfFalse' => 1
850 ]
851 ],
852 'rmTagIfNoAttrib' => 1
853 ];
854 if (!empty($this->allowedClasses)) {
855 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
856 }
857 }
858 // Setting further options, getting them from the processing options
859 $TSc = $this->procOptions['HTMLparser_db.'];
860 if (!$TSc['globalNesting']) {
861 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
862 }
863 if (!$TSc['noAttrib']) {
864 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
865 }
866 // Transforming the array from TypoScript to regular array:
867 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
868 break;
869 }
870 // Caching (internally, in object memory) the result
871 $this->getKeepTags_cache[$direction] = $keepTags;
872 }
873 // Return result:
874 return $this->getKeepTags_cache[$direction];
875 }
876
877 /**
878 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
879 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
880 * The function ->setDivTags does the opposite.
881 * This function processes content to go into the database.
882 *
883 * @param string $value Value to process.
884 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
885 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
886 * @return string Processed input value.
887 * @see setDivTags()
888 */
889 public function divideIntoLines($value, $count = 5, $returnArray = false)
890 {
891 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
892 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
893 // Returns plainly the content if there was no p sections in it
894 if (count($paragraphBlocks) <= 1 || $count <= 0) {
895 return $this->sanitizeLineBreaksForContentOnly($value);
896 }
897
898 // Traverse the splitted sections
899 foreach ($paragraphBlocks as $k => $v) {
900 if ($k % 2) {
901 // Inside a <p> section
902 $v = $this->removeFirstAndLastTag($v);
903 // Fetching 'sub-lines' - which will explode any further p nesting recursively
904 $subLines = $this->divideIntoLines($v, $count - 1, true);
905 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
906 if (is_array($subLines)) {
907 $paragraphBlocks[$k] = implode(LF, $subLines);
908 } else {
909 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
910 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
911 }
912 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
913 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
914 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
915 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
916 $paragraphBlocks[$k] = '';
917 }
918 } else {
919 // Outside a paragraph, if there is still something in there, just add a <p> tag
920 // Remove positions which are outside <p> tags and without content
921 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
922 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
923 if ((string)$paragraphBlocks[$k] === '') {
924 unset($paragraphBlocks[$k]);
925 } else {
926 // add <p> tags around the content
927 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
928 }
929 }
930 }
931 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
932 }
933
934 /**
935 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
936 * For processing of content going FROM database TO RTE.
937 *
938 * @param string $value Value to convert
939 * @return string Processed value.
940 * @see divideIntoLines()
941 */
942 public function setDivTags($value)
943 {
944 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
945 $keepTags = $this->getKeepTags('rte');
946 // Divide the content into lines
947 $parts = explode(LF, $value);
948 foreach ($parts as $k => $v) {
949 // Processing of line content:
950 // If the line is blank, set it to &nbsp;
951 if (trim($parts[$k]) === '') {
952 $parts[$k] = '&nbsp;';
953 } else {
954 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
955 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
956 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
957 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
958 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
959 }
960 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
961 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
962 $testStr = strtolower(trim($parts[$k]));
963 if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
964 if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
965 // Only set p-tags if there is not already div or p tags:
966 $parts[$k] = '<p>' . $parts[$k] . '</p>';
967 }
968 }
969 }
970 }
971 // Implode result:
972 return implode(LF, $parts);
973 }
974
975 /**
976 * Used for transformation from RTE to DB
977 *
978 * Works on a single line within a <p> tag when storing into the database
979 * This always adds <p> tags and validates the arguments,
980 * additionally the content is cleaned up via the HTMLcleaner.
981 *
982 * @param string $content the content within the <p> tag
983 * @param string $fullContentWithTag the whole <p> tag surrounded as well
984 *
985 * @return string the full <p> tag with cleaned content
986 */
987 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
988 {
989 // clean up the content
990 $content = $this->HTMLcleaner_db($content);
991 // Get the <p> tag, and validate the attributes
992 $fTag = $this->getFirstTag($fullContentWithTag);
993 // Check which attributes of the <p> tag to keep attributes
994 if (!empty($this->allowedAttributesForParagraphTags)) {
995 list($tagAttributes) = $this->get_tag_attributes($fTag);
996 // Make sure the tag attributes only contain the ones that are defined to be allowed
997 $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
998
999 // Only allow classes that are whitelisted in $this->allowedClasses
1000 if (trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1001 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1002 $classes = array_intersect($classes, $this->allowedClasses);
1003 if (!empty($classes)) {
1004 $tagAttributes['class'] = implode(' ', $classes);
1005 } else {
1006 unset($tagAttributes['class']);
1007 }
1008 }
1009 } else {
1010 $tagAttributes = [];
1011 }
1012 // Remove any line break
1013 $content = str_replace(LF, '', $content);
1014 // Compile the surrounding <p> tag
1015 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1016 return $content;
1017 }
1018
1019 /**
1020 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1021 *
1022 * @param string $content
1023 * @return string the modified content
1024 */
1025 protected function sanitizeLineBreaksForContentOnly(string $content)
1026 {
1027 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1028 $content = str_replace(LF . LF, LF, $content);
1029 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1030 return $content;
1031 }
1032
1033 /**
1034 * Finds width and height from attrib-array
1035 * If the width and height is found in the style-attribute, use that!
1036 *
1037 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1038 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1039 */
1040 public function getWHFromAttribs($attribArray)
1041 {
1042 $style = trim($attribArray['style']);
1043 $w = 0;
1044 $h = 0;
1045 if ($style) {
1046 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1047 // Width
1048 $reg = [];
1049 preg_match('/width' . $regex . '/i', $style, $reg);
1050 $w = (int)$reg[1];
1051 // Height
1052 preg_match('/height' . $regex . '/i', $style, $reg);
1053 $h = (int)$reg[1];
1054 }
1055 if (!$w) {
1056 $w = $attribArray['width'];
1057 }
1058 if (!$h) {
1059 $h = $attribArray['height'];
1060 }
1061 return [(int)$w, (int)$h];
1062 }
1063
1064 /**
1065 * Parse <A>-tag href and return status of email,external,file or page
1066 * This functionality is not in use anymore
1067 *
1068 * @param string $url URL to analyse.
1069 * @return array Information in an array about the URL
1070 */
1071 public function urlInfoForLinkTags($url)
1072 {
1073 $info = [];
1074 $url = trim($url);
1075 if (substr(strtolower($url), 0, 7) == 'mailto:') {
1076 $info['url'] = trim(substr($url, 7));
1077 $info['type'] = 'email';
1078 } elseif (strpos($url, '?file:') !== false) {
1079 $info['type'] = 'file';
1080 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1081 } else {
1082 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1083 $urlLength = strlen($url);
1084 for ($a = 0; $a < $urlLength; $a++) {
1085 if ($url[$a] != $curURL[$a]) {
1086 break;
1087 }
1088 }
1089 $info['relScriptPath'] = substr($curURL, $a);
1090 $info['relUrl'] = substr($url, $a);
1091 $info['url'] = $url;
1092 $info['type'] = 'ext';
1093 $siteUrl_parts = parse_url($url);
1094 $curUrl_parts = parse_url($curURL);
1095 // Hosts should match
1096 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1097 // If the script path seems to match or is empty (FE-EDIT)
1098 // New processing order 100502
1099 $uP = parse_url($info['relUrl']);
1100 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1101 $info['url'] = $info['relUrl'];
1102 $info['type'] = 'anchor';
1103 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1104 // URL is a page (id parameter)
1105 $pp = preg_split('/^id=/', $uP['query']);
1106 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1107 $parameters = explode('&', $pp[1]);
1108 $id = array_shift($parameters);
1109 if ($id) {
1110 $info['pageid'] = $id;
1111 $info['cElement'] = $uP['fragment'];
1112 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1113 $info['type'] = 'page';
1114 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1115 }
1116 } else {
1117 $info['url'] = $info['relUrl'];
1118 $info['type'] = 'file';
1119 }
1120 } else {
1121 unset($info['relScriptPath']);
1122 unset($info['relUrl']);
1123 }
1124 }
1125 return $info;
1126 }
1127
1128 /**
1129 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1130 *
1131 * @param string $value Content input
1132 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set. (not in use anymore)
1133 * @return string Content output
1134 */
1135 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1136 {
1137 $blockSplit = $this->splitIntoBlock('A', $value);
1138 foreach ($blockSplit as $k => $v) {
1139 // Block
1140 if ($k % 2) {
1141 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1142 // Checking if there is a scheme, and if not, prepend the current url.
1143 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1144 if ($attribArray['href'] !== '') {
1145 $uP = parse_url(strtolower($attribArray['href']));
1146 if (!$uP['scheme']) {
1147 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1148 }
1149 }
1150 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1151 $eTag = '</a>';
1152 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1153 }
1154 }
1155 return implode('', $blockSplit);
1156 }
1157
1158 /**
1159 * Apply plain image settings to the dimensions of the image
1160 *
1161 * @param array $imageInfo: info array of the image
1162 * @param array $attribArray: array of attributes of an image tag
1163 *
1164 * @return array a modified attributes array
1165 */
1166 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1167 {
1168 if ($this->procOptions['plainImageMode']) {
1169 // Perform corrections to aspect ratio based on configuration
1170 switch ((string)$this->procOptions['plainImageMode']) {
1171 case 'lockDimensions':
1172 $attribArray['width'] = $imageInfo[0];
1173 $attribArray['height'] = $imageInfo[1];
1174 break;
1175 case 'lockRatioWhenSmaller':
1176 if ($attribArray['width'] > $imageInfo[0]) {
1177 $attribArray['width'] = $imageInfo[0];
1178 }
1179 if ($imageInfo[0] > 0) {
1180 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1181 }
1182 break;
1183 case 'lockRatio':
1184 if ($imageInfo[0] > 0) {
1185 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1186 }
1187 break;
1188 }
1189 }
1190 return $attribArray;
1191 }
1192
1193 /**
1194 * Called before any processing / transformation is made
1195 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1196 * CR has a very disturbing effect, so just remove all CR and rely on LF
1197 *
1198 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1199 *
1200 * @param string $content the content to process
1201 * @return string the modified content
1202 */
1203 protected function streamlineLineBreaksForProcessing(string $content)
1204 {
1205 return str_replace(CR, '', $content);
1206 }
1207
1208 /**
1209 * Called after any processing / transformation was made
1210 * just before the content is returned by the RTE parser all line breaks
1211 * get unified to be "CRLF"s again.
1212 *
1213 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1214 *
1215 * @param string $content the content to process
1216 * @return string the modified content
1217 */
1218 protected function streamlineLineBreaksAfterProcessing(string $content)
1219 {
1220 // Make sure no \r\n sequences has entered in the meantime
1221 $content = $this->streamlineLineBreaksForProcessing($content);
1222 // ... and then change all \n into \r\n
1223 return str_replace(LF, CRLF, $content);
1224 }
1225
1226 /**
1227 * Content Transformation from DB to RTE
1228 * Checks all <a> tags which reference a t3://page and checks if the page is available
1229 * If not, some offensive styling is added.
1230 *
1231 * @param string $content
1232 * @return string the modified content
1233 */
1234 protected function markBrokenLinks(string $content): string
1235 {
1236 $blocks = $this->splitIntoBlock('A', $content);
1237 $linkService = GeneralUtility::makeInstance(LinkService::class);
1238 foreach ($blocks as $position => $value) {
1239 if ($position % 2 === 0) {
1240 continue;
1241 }
1242 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1243 if (empty($attributes['href'])) {
1244 continue;
1245 }
1246 $hrefInformation = $linkService->resolve($attributes['href']);
1247 if ($hrefInformation['type'] === LinkService::TYPE_PAGE) {
1248 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1249 if (!is_array($pageRecord)) {
1250 // Page does not exist
1251 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1252 $styling = 'background-color: yellow; border:2px red solid; color: black;';
1253 if (empty($attributes['style'])) {
1254 $attributes['style'] = $styling;
1255 } else {
1256 $attributes['style'] .= ' ' . $styling;
1257 }
1258 }
1259 }
1260 // Always rewrite the block to allow the nested calling even if a page is found
1261 $blocks[$position] =
1262 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1263 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1264 . '</a>';
1265 }
1266 return implode('', $blocks);
1267 }
1268
1269 /**
1270 * Content Transformation from RTE to DB
1271 * Removes link information error attributes from <a> tags that are added to broken links
1272 *
1273 * @param string $content the content to process
1274 * @return string the modified content
1275 */
1276 protected function removeBrokenLinkMarkers(string $content): string
1277 {
1278 $blocks = $this->splitIntoBlock('A', $content);
1279 foreach ($blocks as $position => $value) {
1280 if ($position % 2 === 0) {
1281 continue;
1282 }
1283 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1284 if (empty($attributes['href'])) {
1285 continue;
1286 }
1287 // Always remove the styling again (regardless of the page was found or not)
1288 // so the database does not contain ugly stuff
1289 unset($attributes['data-rte-error']);
1290 if (isset($attributes['style'])) {
1291 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1292 if (empty($attributes['style'])) {
1293 unset($attributes['style']);
1294 }
1295 }
1296 $blocks[$position] =
1297 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1298 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1299 . '</a>';
1300 }
1301 return implode('', $blocks);
1302 }
1303
1304 /**
1305 * Instantiates a logger
1306 *
1307 * @return \TYPO3\CMS\Core\Log\Logger
1308 */
1309 protected function getLogger()
1310 {
1311 /** @var $logManager LogManager */
1312 $logManager = GeneralUtility::makeInstance(LogManager::class);
1313 return $logManager->getLogger(get_class($this));
1314 }
1315 }