d569085ed5c7a169f6a21da9005dbee7d13e0e61
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use Psr\Log\LoggerAwareInterface;
18 use Psr\Log\LoggerAwareTrait;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
20 use TYPO3\CMS\Core\LinkHandling\Exception\UnknownLinkHandlerException;
21 use TYPO3\CMS\Core\LinkHandling\LinkService;
22 use TYPO3\CMS\Core\Resource;
23 use TYPO3\CMS\Core\Type\File\ImageInfo;
24 use TYPO3\CMS\Core\Utility\GeneralUtility;
25 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
26
27 /**
28 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
29 *
30 * Concerning line breaks:
31 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
32 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
33 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
34 */
35 class RteHtmlParser extends HtmlParser implements LoggerAwareInterface
36 {
37 use LoggerAwareTrait;
38
39 /**
40 * List of elements that are not wrapped into a "p" tag while doing the transformation.
41 * @var string
42 */
43 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
44
45 /**
46 * List of all tags that are allowed by default
47 * @var string
48 */
49 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
50
51 /**
52 * Set this to the pid of the record manipulated by the class.
53 *
54 * @var int
55 */
56 public $recPid = 0;
57
58 /**
59 * Element reference [table]:[field], eg. "tt_content:bodytext"
60 *
61 * @var string
62 */
63 public $elRef = '';
64
65 /**
66 * Current Page TSConfig
67 *
68 * @var array
69 */
70 public $tsConfig = [];
71
72 /**
73 * Set to the TSconfig options coming from Page TSconfig
74 *
75 * @var array
76 */
77 public $procOptions = [];
78
79 /**
80 * Run-away brake for recursive calls.
81 *
82 * @var int
83 */
84 public $TS_transform_db_safecounter = 100;
85
86 /**
87 * Data caching for processing function
88 *
89 * @var array
90 */
91 public $getKeepTags_cache = [];
92
93 /**
94 * Storage of the allowed CSS class names in the RTE
95 *
96 * @var array
97 */
98 public $allowedClasses = [];
99
100 /**
101 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
102 * they have a special place for configuration via 'proc.keepPDIVattribs'
103 *
104 * @var array
105 */
106 protected $allowedAttributesForParagraphTags = [
107 'class',
108 'align',
109 'id',
110 'title',
111 'dir',
112 'lang',
113 'xml:lang',
114 'itemscope',
115 'itemtype',
116 'itemprop'
117 ];
118
119 /**
120 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
121 * plus some special tags like <hr> and <img> (if images are allowed).
122 * Completely overrideable via 'proc.allowTagsOutside'
123 *
124 * @var array
125 */
126 protected $allowedTagsOutsideOfParagraphs = [
127 'address',
128 'article',
129 'aside',
130 'blockquote',
131 'div',
132 'footer',
133 'header',
134 'hr',
135 'nav',
136 'section'
137 ];
138
139 /**
140 * Initialize, setting element reference and record PID
141 *
142 * @param string $elRef Element reference, eg "tt_content:bodytext
143 * @param int $recPid PID of the record (page id)
144 */
145 public function init($elRef = '', $recPid = 0)
146 {
147 $this->recPid = $recPid;
148 $this->elRef = $elRef;
149 }
150
151 /**********************************************
152 *
153 * Main function
154 *
155 **********************************************/
156 /**
157 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
158 * This is the main function called from DataHandler and transfer data classes
159 *
160 * @param string $value Input value
161 * @param null $_ unused
162 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
163 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
164 * @return string Output value
165 */
166 public function RTE_transform($value, $_ = null, $direction = 'rte', $thisConfig = [])
167 {
168 $this->tsConfig = $thisConfig;
169 $this->procOptions = (array)$thisConfig['proc.'];
170 if (isset($this->procOptions['allowedClasses.'])) {
171 $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
172 } else {
173 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'] ?? '', true);
174 }
175
176 // Dynamic configuration of blockElementList
177 if (!empty($this->procOptions['blockElementList'])) {
178 $this->blockElementList = $this->procOptions['blockElementList'];
179 }
180
181 // Define which attributes are allowed on <p> tags
182 if (isset($this->procOptions['allowAttributes.'])) {
183 $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
184 } elseif (isset($this->procOptions['keepPDIVattribs'])) {
185 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
186 }
187 // Override tags which are allowed outside of <p> tags
188 if (isset($this->procOptions['allowTagsOutside'])) {
189 if (!isset($this->procOptions['allowTagsOutside.'])) {
190 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
191 } else {
192 $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
193 }
194 }
195
196 // Setting modes / transformations to be called
197 if ((string)$this->procOptions['overruleMode'] !== '') {
198 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
199 } else {
200 $modes = [$this->procOptions['mode']];
201 }
202 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
203
204 $value = $this->streamlineLineBreaksForProcessing($value);
205
206 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
207 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
208
209 // Traverse modes
210 foreach ($modes as $cmd) {
211 if ($direction === 'db') {
212 // Checking for user defined transformation:
213 if (!empty($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
214 $_procObj = GeneralUtility::makeInstance($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
215 $_procObj->pObj = $this;
216 $_procObj->transformationKey = $cmd;
217 $value = $_procObj->transform_db($value, $this);
218 } else {
219 // ... else use defaults:
220 switch ($cmd) {
221 case 'detectbrokenlinks':
222 $value = $this->removeBrokenLinkMarkers($value);
223 break;
224 case 'ts_images':
225 $value = $this->TS_images_db($value);
226 break;
227 case 'ts_links':
228 $value = $this->TS_links_db($value);
229 break;
230 case 'css_transform':
231 // Transform empty paragraphs into spacing paragraphs
232 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
233 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
234 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
235 $value = $this->TS_transform_db($value);
236 break;
237 default:
238 // Do nothing
239 }
240 }
241 } elseif ($direction === 'rte') {
242 // Checking for user defined transformation:
243 if (!empty($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
244 $_procObj = GeneralUtility::makeInstance($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
245 $_procObj->pObj = $this;
246 $value = $_procObj->transform_rte($value, $this);
247 } else {
248 // ... else use defaults:
249 switch ($cmd) {
250 case 'detectbrokenlinks':
251 $value = $this->markBrokenLinks($value);
252 break;
253 case 'ts_images':
254 $value = $this->TS_images_rte($value);
255 break;
256 case 'ts_links':
257 $value = $this->TS_links_rte($value, true);
258 break;
259 case 'css_transform':
260 $value = $this->TS_transform_rte($value);
261 break;
262 default:
263 // Do nothing
264 }
265 }
266 }
267 }
268
269 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
270 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
271
272 // Final clean up of linebreaks
273 $value = $this->streamlineLineBreaksAfterProcessing($value);
274
275 return $value;
276 }
277
278 /**
279 * Ensures what transformation modes should be executed, and that they are only executed once.
280 *
281 * @param string $direction
282 * @param array $modes
283 * @return array the resolved transformation modes
284 */
285 protected function resolveAppliedTransformationModes(string $direction, array $modes)
286 {
287 $modeList = implode(',', $modes);
288
289 // Replace the shortcut "default" with all custom modes
290 $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
291
292 // Make list unique
293 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
294 // Reverse order if direction is "rte"
295 if ($direction === 'rte') {
296 $modes = array_reverse($modes);
297 }
298
299 return $modes;
300 }
301
302 /**
303 * Runs the HTML parser if it is configured
304 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
305 * is done and thus totally independent processing options you can set up.
306 *
307 * This is only possible via TSconfig (procOptions) currently.
308 *
309 * @param string $content
310 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
311 * @return string the processed content
312 */
313 protected function runHtmlParserIfConfigured($content, $configurationDirective)
314 {
315 if (!empty($this->procOptions[$configurationDirective])) {
316 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
317 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
318 }
319 return $content;
320 }
321
322 /************************************
323 *
324 * Specific RTE TRANSFORMATION functions
325 *
326 *************************************/
327 /**
328 * Transformation handler: 'ts_images' / direction: "db"
329 * Processing images inserted in the RTE.
330 * This is used when content goes from the RTE to the database.
331 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
332 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
333 * Also "magic" images are processed here.
334 *
335 * @param string $value The content from RTE going to Database
336 * @return string Processed content
337 */
338 public function TS_images_db($value)
339 {
340 // Split content by <img> tags and traverse the resulting array for processing:
341 $imgSplit = $this->splitTags('img', $value);
342 if (count($imgSplit) > 1) {
343 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
344 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
345 /** @var Resource\ResourceFactory $resourceFactory */
346 $resourceFactory = Resource\ResourceFactory::getInstance();
347 /** @var Resource\Service\MagicImageService $magicImageService */
348 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
349 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
350 foreach ($imgSplit as $k => $v) {
351 // Image found, do processing:
352 if ($k % 2) {
353 // Get attributes
354 list($attribArray) = $this->get_tag_attributes($v, true);
355 // It's always an absolute URL coming from the RTE into the Database.
356 $absoluteUrl = trim($attribArray['src']);
357 // Make path absolute if it is relative and we have a site path which is not '/'
358 $pI = pathinfo($absoluteUrl);
359 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
360 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
361 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
362 $absoluteUrl = $siteUrl . $absoluteUrl;
363 }
364 // Image dimensions set in the img tag, if any
365 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
366 if ($imgTagDimensions[0]) {
367 $attribArray['width'] = $imgTagDimensions[0];
368 }
369 if ($imgTagDimensions[1]) {
370 $attribArray['height'] = $imgTagDimensions[1];
371 }
372 $originalImageFile = null;
373 if ($attribArray['data-htmlarea-file-uid']) {
374 // An original image file uid is available
375 try {
376 /** @var Resource\File $originalImageFile */
377 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
378 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
379 // Log the fact the file could not be retrieved.
380 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
381 $this->logger->error($message);
382 }
383 }
384 if ($originalImageFile instanceof Resource\File) {
385 // Public url of local file is relative to the site url, absolute otherwise
386 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
387 // This is a plain image, i.e. reference to the original image
388 if ($this->procOptions['plainImageMode']) {
389 // "plain image mode" is configured
390 // Find the dimensions of the original image
391 $imageInfo = [
392 $originalImageFile->getProperty('width'),
393 $originalImageFile->getProperty('height')
394 ];
395 if (!$imageInfo[0] || !$imageInfo[1]) {
396 $filePath = $originalImageFile->getForLocalProcessing(false);
397 $imageInfoObject = GeneralUtility::makeInstance(ImageInfo::class, $filePath);
398 $imageInfo = [
399 $imageInfoObject->getWidth(),
400 $imageInfoObject->getHeight()
401 ];
402 }
403 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
404 }
405 } else {
406 // Magic image case: get a processed file with the requested configuration
407 $imageConfiguration = [
408 'width' => $imgTagDimensions[0],
409 'height' => $imgTagDimensions[1]
410 ];
411 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
412 $attribArray['width'] = $magicImage->getProperty('width');
413 $attribArray['height'] = $magicImage->getProperty('height');
414 $attribArray['src'] = $magicImage->getPublicUrl();
415 }
416 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
417 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
418 // Fetch the external image
419 $externalFile = GeneralUtility::getUrl($absoluteUrl);
420 if ($externalFile) {
421 $pU = parse_url($absoluteUrl);
422 $pI = pathinfo($pU['path']);
423 $extension = strtolower($pI['extension']);
424 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
425 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
426 // We insert this image into the user default upload folder
427 list($table, $field) = explode(':', $this->elRef);
428 /** @var Resource\Folder $folder */
429 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
430 /** @var Resource\File $fileObject */
431 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
432 $imageConfiguration = [
433 'width' => $attribArray['width'],
434 'height' => $attribArray['height']
435 ];
436 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
437 $attribArray['width'] = $magicImage->getProperty('width');
438 $attribArray['height'] = $magicImage->getProperty('height');
439 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
440 $attribArray['src'] = $magicImage->getPublicUrl();
441 }
442 }
443 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
444 // Finally, check image as local file (siteURL equals the one of the image)
445 // Image has no data-htmlarea-file-uid attribute
446 // Relative path, rawurldecoded for special characters.
447 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
448 // Absolute filepath, locked to relative path of this project
449 $filepath = GeneralUtility::getFileAbsFileName($path);
450 // Check file existence (in relative directory to this installation!)
451 if ($filepath && @is_file($filepath)) {
452 // Treat it as a plain image
453 if ($this->procOptions['plainImageMode']) {
454 // If "plain image mode" has been configured
455 // Find the original dimensions of the image
456 $imageInfoObject = GeneralUtility::makeInstance(ImageInfo::class, $filepath);
457 $imageInfo = [
458 $imageInfoObject->getWidth(),
459 $imageInfoObject->getHeight()
460 ];
461 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
462 }
463 // Let's try to find a file uid for this image
464 try {
465 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
466 if ($fileOrFolderObject instanceof Resource\FileInterface) {
467 $fileIdentifier = $fileOrFolderObject->getIdentifier();
468 /** @var Resource\AbstractFile $fileObject */
469 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
470 // @todo if the retrieved file is a processed file, get the original file...
471 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
472 }
473 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
474 // Nothing to be done if file/folder not found
475 }
476 }
477 }
478 // Remove width and height from style attribute
479 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
480 // Must have alt attribute
481 if (!isset($attribArray['alt'])) {
482 $attribArray['alt'] = '';
483 }
484 // Convert absolute to relative url
485 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
486 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
487 }
488 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
489 }
490 }
491 }
492 return implode('', $imgSplit);
493 }
494
495 /**
496 * Transformation handler: 'ts_images' / direction: "rte"
497 * Processing images from database content going into the RTE.
498 * Processing includes converting the src attribute to an absolute URL.
499 *
500 * @param string $value Content input
501 * @return string Content output
502 */
503 public function TS_images_rte($value)
504 {
505 // Split content by <img> tags and traverse the resulting array for processing:
506 $imgSplit = $this->splitTags('img', $value);
507 if (count($imgSplit) > 1) {
508 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
509 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
510 foreach ($imgSplit as $k => $v) {
511 // Image found
512 if ($k % 2) {
513 // Get the attributes of the img tag
514 list($attribArray) = $this->get_tag_attributes($v, true);
515 $absoluteUrl = trim($attribArray['src']);
516 // Transform the src attribute into an absolute url, if it not already
517 if (stripos($absoluteUrl, 'http') !== 0) {
518 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
519 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
520 $attribArray['src'] = $siteUrl . $attribArray['src'];
521 }
522 // Must have alt attribute
523 if (!isset($attribArray['alt'])) {
524 $attribArray['alt'] = '';
525 }
526 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
527 }
528 }
529 }
530 // Return processed content:
531 return implode('', $imgSplit);
532 }
533
534 /**
535 * Transformation handler: 'ts_links' / direction: "db"
536 * Processing anchor tags, and resolves them correctly again via the LinkService syntax
537 *
538 * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render
539 * the result.
540 *
541 * @param string $value Content input
542 * @return string Content output
543 * @see TS_links_rte()
544 */
545 public function TS_links_db($value)
546 {
547 $blockSplit = $this->splitIntoBlock('A', $value);
548 foreach ($blockSplit as $k => $v) {
549 if ($k % 2) {
550 list($tagAttributes) = $this->get_tag_attributes($this->getFirstTag($v), true);
551 $linkService = GeneralUtility::makeInstance(LinkService::class);
552 $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
553
554 // Modify parameters, this hook should be deprecated
555 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
556 trigger_error('The hook "t3lib/class.t3lib_parsehtml_proc.php->modifyParams_LinksDb_PostProc" will be removed in TYPO3 v10, use LinkService syntax to modify links to be stored in the database.', E_USER_DEPRECATED);
557 $parameters = [
558 'currentBlock' => $v,
559 'linkInformation' => $linkInformation,
560 'url' => $linkInformation['href'],
561 'attributes' => $tagAttributes
562 ];
563 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] ?? [] as $className) {
564 $processor = GeneralUtility::makeInstance($className);
565 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
566 }
567 } else {
568 // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
569 $tagAttributes['href'] = $linkService->asString($linkInformation);
570 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
571 . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>';
572 }
573 }
574 }
575 return implode('', $blockSplit);
576 }
577
578 /**
579 * Transformation handler: 'ts_links' / direction: "rte"
580 * Converting TYPO3-specific <link> tags to <a> tags
581 *
582 * This functionality is only used to convert legacy <link> tags to the new linking syntax using <a> tags, and will
583 * not be converted back to <link> tags anymore.
584 *
585 * @param string $value Content input
586 * @param bool $internallyCalledFromCore internal option for calls where the Core is still using this function, to supress method deprecations
587 * @return string Content output
588 * @deprecated will be removed in TYPO3 v10, only ->TS_AtagToAbs() should be called directly, <link> syntax is deprecated
589 */
590 public function TS_links_rte($value, $internallyCalledFromCore = null)
591 {
592 if ($internallyCalledFromCore === null) {
593 trigger_error('This method will be removed in TYPO3 v10, use TS_AtagToAbs() directly and do not use <link> syntax anymore', E_USER_DEPRECATED);
594 }
595 $hasLinkTags = false;
596 $value = $this->TS_AtagToAbs($value);
597 // Split content by the TYPO3 pseudo tag "<link>"
598 $blockSplit = $this->splitIntoBlock('link', $value, true);
599 foreach ($blockSplit as $k => $v) {
600 // Block
601 if ($k % 2) {
602 $hasLinkTags = true;
603 // Split away the first "<link " part
604 $typoLinkData = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
605 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
606
607 // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
608 $linkService = GeneralUtility::makeInstance(LinkService::class);
609 $linkInformation = $linkService->resolve($tagCode['url']);
610
611 try {
612 $href = $linkService->asString($linkInformation);
613 } catch (UnknownLinkHandlerException $e) {
614 $href = '';
615 }
616
617 // Modify parameters by a hook
618 if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] ?? false)) {
619 trigger_error('The hook "t3lib/class.t3lib_parsehtml_proc.php->modifyParams_LinksRte_PostProc" will be removed in TYPO3 v10, use the link service to properly use ', E_USER_DEPRECATED);
620 // backwards-compatibility: show an error message if the page is not found
621 $error = '';
622 if ($linkInformation['type'] === LinkService::TYPE_PAGE) {
623 $pageRecord = BackendUtility::getRecord('pages', $linkInformation['pageuid']);
624 // Page does not exist
625 if (!is_array($pageRecord)) {
626 $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
627 }
628 }
629 $parameters = [
630 'currentBlock' => $v,
631 'url' => $href,
632 'tagCode' => $tagCode,
633 'external' => $linkInformation['type'] === LinkService::TYPE_URL,
634 'error' => $error
635 ];
636 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $className) {
637 $processor = GeneralUtility::makeInstance($className);
638 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
639 }
640 } else {
641 $anchorAttributes = [
642 'href' => $href,
643 'target' => $tagCode['target'],
644 'class' => $tagCode['class'],
645 'title' => $tagCode['title']
646 ];
647
648 // Setting the <a> tag
649 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
650 . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k]), $internallyCalledFromCore)
651 . '</a>';
652 }
653 }
654 }
655 if ($hasLinkTags) {
656 trigger_error('Content with <link> syntax was found, update your content to use the t3:// syntax, and migrate your content via the upgrade wizard in the install tool', E_USER_DEPRECATED);
657 }
658 return implode('', $blockSplit);
659 }
660
661 /**
662 * Transformation handler: 'css_transform' / direction: "db"
663 * Cleaning (->db) for standard content elements (ts)
664 *
665 * @param string $value Content input
666 * @return string Content output
667 * @see TS_transform_rte()
668 */
669 public function TS_transform_db($value)
670 {
671 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
672 $this->TS_transform_db_safecounter--;
673 if ($this->TS_transform_db_safecounter < 0) {
674 return $value;
675 }
676 // Split the content from RTE by the occurrence of these blocks:
677 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
678
679 // Avoid superfluous linebreaks by transform_db after ending headListTag
680 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
681 array_pop($blockSplit);
682 }
683
684 // Traverse the blocks
685 foreach ($blockSplit as $k => $v) {
686 if ($k % 2) {
687 // Inside block:
688 // Init:
689 $tag = $this->getFirstTag($v);
690 $tagName = strtolower($this->getFirstTagName($v));
691 // Process based on the tag:
692 switch ($tagName) {
693 case 'blockquote':
694 case 'dd':
695 case 'div':
696 case 'header':
697 case 'section':
698 case 'footer':
699 case 'nav':
700 case 'article':
701 case 'aside':
702 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
703 break;
704 case 'pre':
705 break;
706 default:
707 // usually <hx> tags and <table> tags where no other block elements are within the tags
708 // Eliminate true linebreaks inside block element tags
709 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
710 }
711 } else {
712 // NON-block:
713 if (trim($blockSplit[$k]) !== '') {
714 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
715 // Remove linebreaks preceding hr tags
716 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
717 // Remove linebreaks following hr tags
718 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
719 // Replace other linebreaks with space
720 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
721 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
722 } else {
723 unset($blockSplit[$k]);
724 }
725 }
726 }
727 $this->TS_transform_db_safecounter++;
728 return implode(LF, $blockSplit);
729 }
730
731 /**
732 * Wraps a-tags that contain a style attribute with a span-tag
733 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
734 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
735 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
736 * allowed.
737 *
738 * @param string $value Content input
739 * @return string Content output
740 * @deprecated since TYPO3 v9.0, will be removed in TYPO3 v10, see comment above, adding attribuet "rteerror" is not necessary anymore.
741 */
742 public function transformStyledATags($value)
743 {
744 trigger_error('This method will be removed in TYPO3 v10. TYPO3 can handle style attribute in anchor tags properly since TYPO3 v8 LTS', E_USER_DEPRECATED);
745 $blockSplit = $this->splitIntoBlock('A', $value);
746 foreach ($blockSplit as $k => $v) {
747 // If an A-tag was found
748 if ($k % 2) {
749 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
750 // If "style" attribute is set and rteerror is not set!
751 if ($attribArray['style'] && !$attribArray['rteerror']) {
752 $attribArray_copy['style'] = $attribArray['style'];
753 unset($attribArray['style']);
754 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
755 $eTag = '</a></span>';
756 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
757 }
758 }
759 }
760 return implode('', $blockSplit);
761 }
762
763 /**
764 * Transformation handler: css_transform / direction: "rte"
765 * Set (->rte) for standard content elements (ts)
766 *
767 * @param string $value Content input
768 * @return string Content output
769 * @see TS_transform_db()
770 */
771 public function TS_transform_rte($value)
772 {
773 // Split the content from database by the occurrence of the block elements
774 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
775 // Traverse the blocks
776 foreach ($blockSplit as $k => $v) {
777 if ($k % 2) {
778 // Inside one of the blocks:
779 // Init:
780 $tag = $this->getFirstTag($v);
781 $tagName = strtolower($this->getFirstTagName($v));
782 // Based on tagname, we do transformations:
783 switch ($tagName) {
784 case 'blockquote':
785 case 'dd':
786 case 'div':
787 case 'header':
788 case 'section':
789 case 'footer':
790 case 'nav':
791 case 'article':
792 case 'aside':
793 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
794 break;
795 }
796 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
797 } else {
798 // NON-block:
799 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1] ?? '');
800 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
801 // If the line is followed by a block or is the last line:
802 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
803 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
804 if (!$onlyLineBreaks) {
805 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
806 } else {
807 // If the line contains only linebreaks, remove the leading linebreak
808 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
809 }
810 }
811 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
812 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
813 unset($blockSplit[$k]);
814 } else {
815 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
816 }
817 }
818 }
819 return implode(LF, $blockSplit);
820 }
821
822 /***************************************************************
823 *
824 * Generic RTE transformation, analysis and helper functions
825 *
826 **************************************************************/
827
828 /**
829 * Function for cleaning content going into the database.
830 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
831 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
832 *
833 * @param string $content Content to clean up
834 * @return string Clean content
835 * @see getKeepTags()
836 */
837 public function HTMLcleaner_db($content)
838 {
839 $keepTags = $this->getKeepTags('db');
840 // Default: remove unknown tags.
841 $keepUnknownTags = (bool)($this->procOptions['dontRemoveUnknownTags_db'] ?? false);
842 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
843 }
844
845 /**
846 * Creates an array of configuration for the HTMLcleaner function based on whether content
847 * go TO or FROM the Rich Text Editor ($direction)
848 *
849 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
850 * @return array Configuration array
851 * @see HTMLcleaner_db()
852 */
853 public function getKeepTags($direction = 'rte')
854 {
855 if (!isset($this->getKeepTags_cache[$direction]) || !is_array($this->getKeepTags_cache[$direction])) {
856 // Setting up allowed tags:
857 // Default is to get allowed/denied tags from internal array of processing options:
858 // Construct default list of tags to keep:
859 if (isset($this->procOptions['allowTags.']) && is_array($this->procOptions['allowTags.'])) {
860 $keepTags = implode(',', $this->procOptions['allowTags.']);
861 } else {
862 $keepTags = $this->procOptions['allowTags'] ?? '';
863 }
864 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
865 // For tags to deny, remove them from $keepTags array:
866 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'] ?? '', true);
867 foreach ($denyTags as $dKe) {
868 unset($keepTags[$dKe]);
869 }
870 // Based on the direction of content, set further options:
871 switch ($direction) {
872 case 'rte':
873 // Transforming keepTags array so it can be understood by the HTMLcleaner function.
874 // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
875 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'] ?? [], $keepTags);
876 break;
877 case 'db':
878 // Setting up span tags if they are allowed:
879 if (isset($keepTags['span'])) {
880 $keepTags['span'] = [
881 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
882 'fixAttrib' => [
883 'class' => [
884 'removeIfFalse' => 1
885 ]
886 ],
887 'rmTagIfNoAttrib' => 1
888 ];
889 if (!empty($this->allowedClasses)) {
890 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
891 }
892 }
893 // Setting further options, getting them from the processing options
894 $TSc = $this->procOptions['HTMLparser_db.'] ?? [];
895 if (empty($TSc['globalNesting'])) {
896 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
897 }
898 if (empty($TSc['noAttrib'])) {
899 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
900 }
901 // Transforming the array from TypoScript to regular array:
902 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
903 break;
904 }
905 // Caching (internally, in object memory) the result
906 $this->getKeepTags_cache[$direction] = $keepTags;
907 }
908 // Return result:
909 return $this->getKeepTags_cache[$direction];
910 }
911
912 /**
913 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
914 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
915 * The function ->setDivTags does the opposite.
916 * This function processes content to go into the database.
917 *
918 * @param string $value Value to process.
919 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
920 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
921 * @return string|array Processed input value.
922 * @see setDivTags()
923 */
924 public function divideIntoLines($value, $count = 5, $returnArray = false)
925 {
926 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
927 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
928 // Returns plainly the content if there was no p sections in it
929 if (count($paragraphBlocks) <= 1 || $count <= 0) {
930 return $this->sanitizeLineBreaksForContentOnly($value);
931 }
932
933 // Traverse the splitted sections
934 foreach ($paragraphBlocks as $k => $v) {
935 if ($k % 2) {
936 // Inside a <p> section
937 $v = $this->removeFirstAndLastTag($v);
938 // Fetching 'sub-lines' - which will explode any further p nesting recursively
939 $subLines = $this->divideIntoLines($v, $count - 1, true);
940 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
941 if (is_array($subLines)) {
942 $paragraphBlocks[$k] = implode(LF, $subLines);
943 } else {
944 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
945 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
946 }
947 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
948 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
949 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
950 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
951 $paragraphBlocks[$k] = '';
952 }
953 } else {
954 // Outside a paragraph, if there is still something in there, just add a <p> tag
955 // Remove positions which are outside <p> tags and without content
956 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
957 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
958 if ((string)$paragraphBlocks[$k] === '') {
959 unset($paragraphBlocks[$k]);
960 } else {
961 // add <p> tags around the content
962 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
963 }
964 }
965 }
966 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
967 }
968
969 /**
970 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
971 * For processing of content going FROM database TO RTE.
972 *
973 * @param string $value Value to convert
974 * @return string Processed value.
975 * @see divideIntoLines()
976 */
977 public function setDivTags($value)
978 {
979 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
980 $keepTags = $this->getKeepTags('rte');
981 // Divide the content into lines
982 $parts = explode(LF, $value);
983 foreach ($parts as $k => $v) {
984 // Processing of line content:
985 // If the line is blank, set it to &nbsp;
986 if (trim($parts[$k]) === '') {
987 $parts[$k] = '&nbsp;';
988 } else {
989 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
990 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
991 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
992 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
993 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
994 }
995 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
996 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
997 $testStr = strtolower(trim($parts[$k]));
998 if (strpos($testStr, '<div') !== 0 || substr($testStr, -6) !== '</div>') {
999 if (strpos($testStr, '<p') !== 0 || substr($testStr, -4) !== '</p>') {
1000 // Only set p-tags if there is not already div or p tags:
1001 $parts[$k] = '<p>' . $parts[$k] . '</p>';
1002 }
1003 }
1004 }
1005 }
1006 // Implode result:
1007 return implode(LF, $parts);
1008 }
1009
1010 /**
1011 * Used for transformation from RTE to DB
1012 *
1013 * Works on a single line within a <p> tag when storing into the database
1014 * This always adds <p> tags and validates the arguments,
1015 * additionally the content is cleaned up via the HTMLcleaner.
1016 *
1017 * @param string $content the content within the <p> tag
1018 * @param string $fullContentWithTag the whole <p> tag surrounded as well
1019 *
1020 * @return string the full <p> tag with cleaned content
1021 */
1022 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
1023 {
1024 // clean up the content
1025 $content = $this->HTMLcleaner_db($content);
1026 // Get the <p> tag, and validate the attributes
1027 $fTag = $this->getFirstTag($fullContentWithTag);
1028 // Check which attributes of the <p> tag to keep attributes
1029 if (!empty($this->allowedAttributesForParagraphTags)) {
1030 list($tagAttributes) = $this->get_tag_attributes($fTag);
1031 // Make sure the tag attributes only contain the ones that are defined to be allowed
1032 $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
1033
1034 // Only allow classes that are whitelisted in $this->allowedClasses
1035 if (isset($tagAttributes['class']) && trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1036 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1037 $classes = array_intersect($classes, $this->allowedClasses);
1038 if (!empty($classes)) {
1039 $tagAttributes['class'] = implode(' ', $classes);
1040 } else {
1041 unset($tagAttributes['class']);
1042 }
1043 }
1044 } else {
1045 $tagAttributes = [];
1046 }
1047 // Remove any line break
1048 $content = str_replace(LF, '', $content);
1049 // Compile the surrounding <p> tag
1050 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1051 return $content;
1052 }
1053
1054 /**
1055 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1056 *
1057 * @param string $content
1058 * @return string the modified content
1059 */
1060 protected function sanitizeLineBreaksForContentOnly(string $content)
1061 {
1062 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1063 $content = str_replace(LF . LF, LF, $content);
1064 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1065 return $content;
1066 }
1067
1068 /**
1069 * Finds width and height from attrib-array
1070 * If the width and height is found in the style-attribute, use that!
1071 *
1072 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1073 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1074 */
1075 public function getWHFromAttribs($attribArray)
1076 {
1077 $style = trim($attribArray['style']);
1078 $w = 0;
1079 $h = 0;
1080 if ($style) {
1081 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1082 // Width
1083 $reg = [];
1084 preg_match('/width' . $regex . '/i', $style, $reg);
1085 $w = (int)$reg[1];
1086 // Height
1087 preg_match('/height' . $regex . '/i', $style, $reg);
1088 $h = (int)$reg[1];
1089 }
1090 if (!$w) {
1091 $w = $attribArray['width'];
1092 }
1093 if (!$h) {
1094 $h = $attribArray['height'];
1095 }
1096 return [(int)$w, (int)$h];
1097 }
1098
1099 /**
1100 * Parse <A>-tag href and return status of email,external,file or page
1101 * This functionality is not in use anymore
1102 *
1103 * @param string $url URL to analyze.
1104 * @return array Information in an array about the URL
1105 */
1106 public function urlInfoForLinkTags($url)
1107 {
1108 $info = [];
1109 $url = trim($url);
1110 if (strpos(strtolower($url), 'mailto:') === 0) {
1111 $info['url'] = trim(substr($url, 7));
1112 $info['type'] = 'email';
1113 } elseif (strpos($url, '?file:') !== false) {
1114 $info['type'] = 'file';
1115 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1116 } else {
1117 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1118 $urlLength = strlen($url);
1119 $a = 0;
1120 for (; $a < $urlLength; $a++) {
1121 if ($url[$a] != $curURL[$a]) {
1122 break;
1123 }
1124 }
1125 $info['relScriptPath'] = substr($curURL, $a);
1126 $info['relUrl'] = substr($url, $a);
1127 $info['url'] = $url;
1128 $info['type'] = 'ext';
1129 $siteUrl_parts = parse_url($url);
1130 $curUrl_parts = parse_url($curURL);
1131 // Hosts should match
1132 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && strpos($info['relScriptPath'], TYPO3_mainDir) === 0)) {
1133 // If the script path seems to match or is empty (FE-EDIT)
1134 // New processing order 100502
1135 $uP = parse_url($info['relUrl']);
1136 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1137 $info['url'] = $info['relUrl'];
1138 $info['type'] = 'anchor';
1139 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1140 // URL is a page (id parameter)
1141 $pp = preg_split('/^id=/', $uP['query']);
1142 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1143 $parameters = explode('&', $pp[1]);
1144 $id = array_shift($parameters);
1145 if ($id) {
1146 $info['pageid'] = $id;
1147 $info['cElement'] = $uP['fragment'];
1148 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1149 $info['type'] = 'page';
1150 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1151 }
1152 } else {
1153 $info['url'] = $info['relUrl'];
1154 $info['type'] = 'file';
1155 }
1156 } else {
1157 unset($info['relScriptPath']);
1158 unset($info['relUrl']);
1159 }
1160 }
1161 return $info;
1162 }
1163
1164 /**
1165 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1166 *
1167 * @param string $value Content input
1168 * @return string Content output
1169 */
1170 public function TS_AtagToAbs($value)
1171 {
1172 if (func_num_args() > 1) {
1173 trigger_error('Second argument of TS_AtagToAbs() is not in use and is removed, however the argument in the callers code can be removed without side-effects.', E_USER_DEPRECATED);
1174 }
1175 $blockSplit = $this->splitIntoBlock('A', $value);
1176 foreach ($blockSplit as $k => $v) {
1177 // Block
1178 if ($k % 2) {
1179 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1180 // Checking if there is a scheme, and if not, prepend the current url.
1181 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1182 if ($attribArray['href'] !== '') {
1183 $uP = parse_url(strtolower($attribArray['href']));
1184 if (!$uP['scheme']) {
1185 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1186 }
1187 }
1188 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1189 $eTag = '</a>';
1190 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1191 }
1192 }
1193 return implode('', $blockSplit);
1194 }
1195
1196 /**
1197 * Apply plain image settings to the dimensions of the image
1198 *
1199 * @param array $imageInfo: info array of the image
1200 * @param array $attribArray: array of attributes of an image tag
1201 *
1202 * @return array a modified attributes array
1203 */
1204 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1205 {
1206 if ($this->procOptions['plainImageMode']) {
1207 // Perform corrections to aspect ratio based on configuration
1208 switch ((string)$this->procOptions['plainImageMode']) {
1209 case 'lockDimensions':
1210 $attribArray['width'] = $imageInfo[0];
1211 $attribArray['height'] = $imageInfo[1];
1212 break;
1213 case 'lockRatioWhenSmaller':
1214 if ($attribArray['width'] > $imageInfo[0]) {
1215 $attribArray['width'] = $imageInfo[0];
1216 }
1217 if ($imageInfo[0] > 0) {
1218 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1219 }
1220 break;
1221 case 'lockRatio':
1222 if ($imageInfo[0] > 0) {
1223 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1224 }
1225 break;
1226 }
1227 }
1228 return $attribArray;
1229 }
1230
1231 /**
1232 * Called before any processing / transformation is made
1233 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1234 * CR has a very disturbing effect, so just remove all CR and rely on LF
1235 *
1236 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1237 *
1238 * @param string $content the content to process
1239 * @return string the modified content
1240 */
1241 protected function streamlineLineBreaksForProcessing(string $content)
1242 {
1243 return str_replace(CR, '', $content);
1244 }
1245
1246 /**
1247 * Called after any processing / transformation was made
1248 * just before the content is returned by the RTE parser all line breaks
1249 * get unified to be "CRLF"s again.
1250 *
1251 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1252 *
1253 * @param string $content the content to process
1254 * @return string the modified content
1255 */
1256 protected function streamlineLineBreaksAfterProcessing(string $content)
1257 {
1258 // Make sure no \r\n sequences has entered in the meantime
1259 $content = $this->streamlineLineBreaksForProcessing($content);
1260 // ... and then change all \n into \r\n
1261 return str_replace(LF, CRLF, $content);
1262 }
1263
1264 /**
1265 * Content Transformation from DB to RTE
1266 * Checks all <a> tags which reference a t3://page and checks if the page is available
1267 * If not, some offensive styling is added.
1268 *
1269 * @param string $content
1270 * @return string the modified content
1271 */
1272 protected function markBrokenLinks(string $content): string
1273 {
1274 $blocks = $this->splitIntoBlock('A', $content);
1275 $linkService = GeneralUtility::makeInstance(LinkService::class);
1276 foreach ($blocks as $position => $value) {
1277 if ($position % 2 === 0) {
1278 continue;
1279 }
1280 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1281 if (empty($attributes['href'])) {
1282 continue;
1283 }
1284 $hrefInformation = $linkService->resolve($attributes['href']);
1285 if ($hrefInformation['type'] === LinkService::TYPE_PAGE && $hrefInformation['pageuid'] !== 'current') {
1286 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1287 if (!is_array($pageRecord)) {
1288 // Page does not exist
1289 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1290 }
1291 }
1292 // Always rewrite the block to allow the nested calling even if a page is found
1293 $blocks[$position] =
1294 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1295 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1296 . '</a>';
1297 }
1298 return implode('', $blocks);
1299 }
1300
1301 /**
1302 * Content Transformation from RTE to DB
1303 * Removes link information error attributes from <a> tags that are added to broken links
1304 *
1305 * @param string $content the content to process
1306 * @return string the modified content
1307 */
1308 protected function removeBrokenLinkMarkers(string $content): string
1309 {
1310 $blocks = $this->splitIntoBlock('A', $content);
1311 foreach ($blocks as $position => $value) {
1312 if ($position % 2 === 0) {
1313 continue;
1314 }
1315 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1316 if (empty($attributes['href'])) {
1317 continue;
1318 }
1319 // Always remove the styling again (regardless of the page was found or not)
1320 // so the database does not contain ugly stuff
1321 unset($attributes['data-rte-error']);
1322 if (isset($attributes['style'])) {
1323 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1324 if (empty($attributes['style'])) {
1325 unset($attributes['style']);
1326 }
1327 }
1328 $blocks[$position] =
1329 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1330 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1331 . '</a>';
1332 }
1333 return implode('', $blocks);
1334 }
1335 }