[TASK] Use ImageInfo class instead of direct PHP GD call
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\LinkHandling\Exception\UnknownLinkHandlerException;
19 use TYPO3\CMS\Core\LinkHandling\LinkService;
20 use TYPO3\CMS\Core\Log\LogManager;
21 use TYPO3\CMS\Core\Resource;
22 use TYPO3\CMS\Core\Type\File\ImageInfo;
23 use TYPO3\CMS\Core\Utility\GeneralUtility;
24 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
25
26 /**
27 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
28 *
29 * Concerning line breaks:
30 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
31 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
32 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
33 */
34 class RteHtmlParser extends HtmlParser
35 {
36 /**
37 * List of elements that are not wrapped into a "p" tag while doing the transformation.
38 * @var string
39 */
40 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
41
42 /**
43 * List of all tags that are allowed by default
44 * @var string
45 */
46 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
47
48 /**
49 * Set this to the pid of the record manipulated by the class.
50 *
51 * @var int
52 */
53 public $recPid = 0;
54
55 /**
56 * Element reference [table]:[field], eg. "tt_content:bodytext"
57 *
58 * @var string
59 */
60 public $elRef = '';
61
62 /**
63 * Current Page TSConfig
64 *
65 * @var array
66 */
67 public $tsConfig = [];
68
69 /**
70 * Set to the TSconfig options coming from Page TSconfig
71 *
72 * @var array
73 */
74 public $procOptions = [];
75
76 /**
77 * Run-away brake for recursive calls.
78 *
79 * @var int
80 */
81 public $TS_transform_db_safecounter = 100;
82
83 /**
84 * Data caching for processing function
85 *
86 * @var array
87 */
88 public $getKeepTags_cache = [];
89
90 /**
91 * Storage of the allowed CSS class names in the RTE
92 *
93 * @var array
94 */
95 public $allowedClasses = [];
96
97 /**
98 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
99 * they have a special place for configuration via 'proc.keepPDIVattribs'
100 *
101 * @var array
102 */
103 protected $allowedAttributesForParagraphTags = [
104 'class',
105 'align',
106 'id',
107 'title',
108 'dir',
109 'lang',
110 'xml:lang',
111 'itemscope',
112 'itemtype',
113 'itemprop'
114 ];
115
116 /**
117 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
118 * plus some special tags like <hr> and <img> (if images are allowed).
119 * Completely overrideable via 'proc.allowTagsOutside'
120 *
121 * @var array
122 */
123 protected $allowedTagsOutsideOfParagraphs = [
124 'address',
125 'article',
126 'aside',
127 'blockquote',
128 'div',
129 'footer',
130 'header',
131 'hr',
132 'nav',
133 'section'
134 ];
135
136 /**
137 * Initialize, setting element reference and record PID
138 *
139 * @param string $elRef Element reference, eg "tt_content:bodytext
140 * @param int $recPid PID of the record (page id)
141 */
142 public function init($elRef = '', $recPid = 0)
143 {
144 $this->recPid = $recPid;
145 $this->elRef = $elRef;
146 }
147
148 /**********************************************
149 *
150 * Main function
151 *
152 **********************************************/
153 /**
154 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
155 * This is the main function called from DataHandler and transfer data classes
156 *
157 * @param string $value Input value
158 * @param array $specConf deprecated old "defaultExtras" parsed as array
159 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
160 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
161 * @return string Output value
162 */
163 public function RTE_transform($value, $specConf = [], $direction = 'rte', $thisConfig = [])
164 {
165 $this->tsConfig = $thisConfig;
166 $this->procOptions = (array)$thisConfig['proc.'];
167 if (isset($this->procOptions['allowedClasses.'])) {
168 $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
169 } else {
170 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
171 }
172
173 // Dynamic configuration of blockElementList
174 if ($this->procOptions['blockElementList']) {
175 $this->blockElementList = $this->procOptions['blockElementList'];
176 }
177
178 // Define which attributes are allowed on <p> tags
179 if (isset($this->procOptions['allowAttributes.'])) {
180 $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
181 } elseif (isset($this->procOptions['keepPDIVattribs'])) {
182 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
183 }
184 // Override tags which are allowed outside of <p> tags
185 if (isset($this->procOptions['allowTagsOutside'])) {
186 if (!isset($this->procOptions['allowTagsOutside.'])) {
187 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
188 } else {
189 $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
190 }
191 }
192
193 // Setting modes / transformations to be called
194 if ((string)$this->procOptions['overruleMode'] !== '') {
195 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
196 } elseif (!empty($this->procOptions['mode'])) {
197 $modes = [$this->procOptions['mode']];
198 } else {
199 // Get parameters for rte_transformation:
200 // @deprecated since TYPO3 v8, will be removed in TYPO3 v9 - the else{} part can be removed in v9
201 GeneralUtility::deprecationLog(
202 'Argument 2 of RteHtmlParser::RTE_transform() is deprecated. Transformations should be given in $thisConfig[\'proc.\'][\'overruleMode\']'
203 );
204 $specialFieldConfiguration = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
205 $modes = GeneralUtility::trimExplode('-', $specialFieldConfiguration['mode']);
206 }
207 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
208
209 $value = $this->streamlineLineBreaksForProcessing($value);
210
211 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
212 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
213
214 // Traverse modes
215 foreach ($modes as $cmd) {
216 if ($direction === 'db') {
217 // Checking for user defined transformation:
218 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
219 $_procObj = GeneralUtility::getUserObj($_classRef);
220 $_procObj->pObj = $this;
221 $_procObj->transformationKey = $cmd;
222 $value = $_procObj->transform_db($value, $this);
223 } else {
224 // ... else use defaults:
225 switch ($cmd) {
226 case 'detectbrokenlinks':
227 $value = $this->removeBrokenLinkMarkers($value);
228 break;
229 case 'ts_images':
230 $value = $this->TS_images_db($value);
231 break;
232 case 'ts_links':
233 $value = $this->TS_links_db($value);
234 break;
235 case 'css_transform':
236 // Transform empty paragraphs into spacing paragraphs
237 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
238 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
239 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
240 $value = $this->TS_transform_db($value);
241 break;
242 default:
243 // Do nothing
244 }
245 }
246 } elseif ($direction === 'rte') {
247 // Checking for user defined transformation:
248 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
249 $_procObj = GeneralUtility::getUserObj($_classRef);
250 $_procObj->pObj = $this;
251 $value = $_procObj->transform_rte($value, $this);
252 } else {
253 // ... else use defaults:
254 switch ($cmd) {
255 case 'detectbrokenlinks':
256 $value = $this->markBrokenLinks($value);
257 break;
258 case 'ts_images':
259 $value = $this->TS_images_rte($value);
260 break;
261 case 'ts_links':
262 $value = $this->TS_links_rte($value);
263 break;
264 case 'css_transform':
265 $value = $this->TS_transform_rte($value);
266 break;
267 default:
268 // Do nothing
269 }
270 }
271 }
272 }
273
274 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
275 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
276
277 // Final clean up of linebreaks
278 $value = $this->streamlineLineBreaksAfterProcessing($value);
279
280 return $value;
281 }
282
283 /**
284 * Ensures what transformation modes should be executed, and that they are only executed once.
285 *
286 * @param string $direction
287 * @param array $modes
288 * @return array the resolved transformation modes
289 */
290 protected function resolveAppliedTransformationModes(string $direction, array $modes)
291 {
292 $modeList = implode(',', $modes);
293
294 // Replace the shortcut "default" with all custom modes
295 $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
296 // Replace the shortcut "ts_css" with all custom modes
297 // @deprecated since TYPO3 v8, will be removed in TYPO3 v9 - NEXT line can be removed in v9
298 $modeList = str_replace('ts_css', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
299
300 // Make list unique
301 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
302 // Reverse order if direction is "rte"
303 if ($direction === 'rte') {
304 $modes = array_reverse($modes);
305 }
306
307 return $modes;
308 }
309
310 /**
311 * Runs the HTML parser if it is configured
312 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
313 * is done and thus totally independent processing options you can set up.
314 *
315 * This is only possible via TSconfig (procOptions) currently.
316 *
317 * @param string $content
318 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
319 * @return string the processed content
320 */
321 protected function runHtmlParserIfConfigured($content, $configurationDirective)
322 {
323 if ($this->procOptions[$configurationDirective]) {
324 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
325 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
326 }
327 return $content;
328 }
329
330 /************************************
331 *
332 * Specific RTE TRANSFORMATION functions
333 *
334 *************************************/
335 /**
336 * Transformation handler: 'ts_images' / direction: "db"
337 * Processing images inserted in the RTE.
338 * This is used when content goes from the RTE to the database.
339 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
340 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
341 * Also "magic" images are processed here.
342 *
343 * @param string $value The content from RTE going to Database
344 * @return string Processed content
345 */
346 public function TS_images_db($value)
347 {
348 // Split content by <img> tags and traverse the resulting array for processing:
349 $imgSplit = $this->splitTags('img', $value);
350 if (count($imgSplit) > 1) {
351 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
352 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
353 /** @var $resourceFactory Resource\ResourceFactory */
354 $resourceFactory = Resource\ResourceFactory::getInstance();
355 /** @var $magicImageService Resource\Service\MagicImageService */
356 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
357 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
358 foreach ($imgSplit as $k => $v) {
359 // Image found, do processing:
360 if ($k % 2) {
361 // Get attributes
362 list($attribArray) = $this->get_tag_attributes($v, true);
363 // It's always an absolute URL coming from the RTE into the Database.
364 $absoluteUrl = trim($attribArray['src']);
365 // Make path absolute if it is relative and we have a site path which is not '/'
366 $pI = pathinfo($absoluteUrl);
367 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
368 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
369 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
370 $absoluteUrl = $siteUrl . $absoluteUrl;
371 }
372 // Image dimensions set in the img tag, if any
373 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
374 if ($imgTagDimensions[0]) {
375 $attribArray['width'] = $imgTagDimensions[0];
376 }
377 if ($imgTagDimensions[1]) {
378 $attribArray['height'] = $imgTagDimensions[1];
379 }
380 $originalImageFile = null;
381 if ($attribArray['data-htmlarea-file-uid']) {
382 // An original image file uid is available
383 try {
384 /** @var $originalImageFile Resource\File */
385 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
386 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
387 // Log the fact the file could not be retrieved.
388 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
389 $this->getLogger()->error($message);
390 }
391 }
392 if ($originalImageFile instanceof Resource\File) {
393 // Public url of local file is relative to the site url, absolute otherwise
394 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
395 // This is a plain image, i.e. reference to the original image
396 if ($this->procOptions['plainImageMode']) {
397 // "plain image mode" is configured
398 // Find the dimensions of the original image
399 $imageInfo = [
400 $originalImageFile->getProperty('width'),
401 $originalImageFile->getProperty('height')
402 ];
403 if (!$imageInfo[0] || !$imageInfo[1]) {
404 $filePath = $originalImageFile->getForLocalProcessing(false);
405 $imageInfoObject = GeneralUtility::makeInstance(ImageInfo::class, $filePath);
406 $imageInfo = [
407 $imageInfoObject->getWidth(),
408 $imageInfoObject->getHeight()
409 ];
410 }
411 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
412 }
413 } else {
414 // Magic image case: get a processed file with the requested configuration
415 $imageConfiguration = [
416 'width' => $imgTagDimensions[0],
417 'height' => $imgTagDimensions[1]
418 ];
419 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
420 $attribArray['width'] = $magicImage->getProperty('width');
421 $attribArray['height'] = $magicImage->getProperty('height');
422 $attribArray['src'] = $magicImage->getPublicUrl();
423 }
424 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
425 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
426 // Fetch the external image
427 $externalFile = GeneralUtility::getUrl($absoluteUrl);
428 if ($externalFile) {
429 $pU = parse_url($absoluteUrl);
430 $pI = pathinfo($pU['path']);
431 $extension = strtolower($pI['extension']);
432 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
433 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
434 // We insert this image into the user default upload folder
435 list($table, $field) = explode(':', $this->elRef);
436 /** @var Resource\Folder $folder */
437 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
438 /** @var Resource\File $fileObject */
439 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
440 $imageConfiguration = [
441 'width' => $attribArray['width'],
442 'height' => $attribArray['height']
443 ];
444 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
445 $attribArray['width'] = $magicImage->getProperty('width');
446 $attribArray['height'] = $magicImage->getProperty('height');
447 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
448 $attribArray['src'] = $magicImage->getPublicUrl();
449 }
450 }
451 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
452 // Finally, check image as local file (siteURL equals the one of the image)
453 // Image has no data-htmlarea-file-uid attribute
454 // Relative path, rawurldecoded for special characters.
455 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
456 // Absolute filepath, locked to relative path of this project
457 $filepath = GeneralUtility::getFileAbsFileName($path);
458 // Check file existence (in relative directory to this installation!)
459 if ($filepath && @is_file($filepath)) {
460 // Treat it as a plain image
461 if ($this->procOptions['plainImageMode']) {
462 // If "plain image mode" has been configured
463 // Find the original dimensions of the image
464 $imageInfoObject = GeneralUtility::makeInstance(ImageInfo::class, $filepath);
465 $imageInfo = [
466 $imageInfoObject->getWidth(),
467 $imageInfoObject->getHeight()
468 ];
469 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
470 }
471 // Let's try to find a file uid for this image
472 try {
473 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
474 if ($fileOrFolderObject instanceof Resource\FileInterface) {
475 $fileIdentifier = $fileOrFolderObject->getIdentifier();
476 /** @var Resource\AbstractFile $fileObject */
477 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
478 // @todo if the retrieved file is a processed file, get the original file...
479 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
480 }
481 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
482 // Nothing to be done if file/folder not found
483 }
484 }
485 }
486 // Remove width and height from style attribute
487 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
488 // Must have alt attribute
489 if (!isset($attribArray['alt'])) {
490 $attribArray['alt'] = '';
491 }
492 // Convert absolute to relative url
493 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
494 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
495 }
496 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
497 }
498 }
499 }
500 return implode('', $imgSplit);
501 }
502
503 /**
504 * Transformation handler: 'ts_images' / direction: "rte"
505 * Processing images from database content going into the RTE.
506 * Processing includes converting the src attribute to an absolute URL.
507 *
508 * @param string $value Content input
509 * @return string Content output
510 */
511 public function TS_images_rte($value)
512 {
513 // Split content by <img> tags and traverse the resulting array for processing:
514 $imgSplit = $this->splitTags('img', $value);
515 if (count($imgSplit) > 1) {
516 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
517 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
518 foreach ($imgSplit as $k => $v) {
519 // Image found
520 if ($k % 2) {
521 // Get the attributes of the img tag
522 list($attribArray) = $this->get_tag_attributes($v, true);
523 $absoluteUrl = trim($attribArray['src']);
524 // Transform the src attribute into an absolute url, if it not already
525 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
526 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
527 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
528 $attribArray['src'] = $siteUrl . $attribArray['src'];
529 }
530 // Must have alt attribute
531 if (!isset($attribArray['alt'])) {
532 $attribArray['alt'] = '';
533 }
534 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
535 }
536 }
537 }
538 // Return processed content:
539 return implode('', $imgSplit);
540 }
541
542 /**
543 * Transformation handler: 'ts_links' / direction: "db"
544 * Processing anchor tags, and resolves them correctly again via the LinkService syntax
545 *
546 * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render
547 * the result.
548 *
549 * @param string $value Content input
550 * @return string Content output
551 * @see TS_links_rte()
552 */
553 public function TS_links_db($value)
554 {
555 $blockSplit = $this->splitIntoBlock('A', $value);
556 foreach ($blockSplit as $k => $v) {
557 if ($k % 2) {
558 list($tagAttributes) = $this->get_tag_attributes($this->getFirstTag($v), true);
559 $linkService = GeneralUtility::makeInstance(LinkService::class);
560 $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
561
562 // Modify parameters, this hook should be deprecated
563 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])
564 && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
565 $parameters = [
566 'currentBlock' => $v,
567 'linkInformation' => $linkInformation,
568 'url' => $linkInformation['href'],
569 'attributes' => $tagAttributes
570 ];
571 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
572 $processor = GeneralUtility::getUserObj($objRef);
573 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
574 }
575 } else {
576 // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
577 $tagAttributes['href'] = $linkService->asString($linkInformation);
578 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
579 . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>';
580 }
581 }
582 }
583 return implode('', $blockSplit);
584 }
585
586 /**
587 * Transformation handler: 'ts_links' / direction: "rte"
588 * Converting TYPO3-specific <link> tags to <a> tags
589 *
590 * This functionality is only used to convert legacy <link> tags to the new linking syntax using <a> tags, and will
591 * not be converted back to <link> tags anymore.
592 *
593 * @param string $value Content input
594 * @return string Content output
595 */
596 public function TS_links_rte($value)
597 {
598 $value = $this->TS_AtagToAbs($value);
599 // Split content by the TYPO3 pseudo tag "<link>"
600 $blockSplit = $this->splitIntoBlock('link', $value, true);
601 foreach ($blockSplit as $k => $v) {
602 // Block
603 if ($k % 2) {
604 // Split away the first "<link " part
605 $typoLinkData = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
606 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
607
608 // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
609 $linkService = GeneralUtility::makeInstance(LinkService::class);
610 $linkInformation = $linkService->resolve($tagCode['url']);
611
612 try {
613 $href = $linkService->asString($linkInformation);
614 } catch (UnknownLinkHandlerException $e) {
615 $href = '';
616 }
617
618 // Modify parameters by a hook
619 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
620 // backwards-compatibility: show an error message if the page is not found
621 $error = '';
622 if ($linkInformation['type'] === LinkService::TYPE_PAGE) {
623 $pageRecord = BackendUtility::getRecord('pages', $linkInformation['pageuid']);
624 // Page does not exist
625 if (!is_array($pageRecord)) {
626 $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
627 }
628 }
629 $parameters = [
630 'currentBlock' => $v,
631 'url' => $href,
632 'tagCode' => $tagCode,
633 'external' => $linkInformation['type'] === LinkService::TYPE_URL,
634 'error' => $error
635 ];
636 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
637 $processor = GeneralUtility::getUserObj($objRef);
638 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
639 }
640 } else {
641 $anchorAttributes = [
642 'href' => $href,
643 'target' => $tagCode['target'],
644 'class' => $tagCode['class'],
645 'title' => $tagCode['title']
646 ];
647
648 // Setting the <a> tag
649 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
650 . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k]))
651 . '</a>';
652 }
653 }
654 }
655 return implode('', $blockSplit);
656 }
657
658 /**
659 * Transformation handler: 'css_transform' / direction: "db"
660 * Cleaning (->db) for standard content elements (ts)
661 *
662 * @param string $value Content input
663 * @return string Content output
664 * @see TS_transform_rte()
665 */
666 public function TS_transform_db($value)
667 {
668 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
669 $this->TS_transform_db_safecounter--;
670 if ($this->TS_transform_db_safecounter < 0) {
671 return $value;
672 }
673 // Split the content from RTE by the occurrence of these blocks:
674 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
675
676 // Avoid superfluous linebreaks by transform_db after ending headListTag
677 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
678 array_pop($blockSplit);
679 }
680
681 // Traverse the blocks
682 foreach ($blockSplit as $k => $v) {
683 if ($k % 2) {
684 // Inside block:
685 // Init:
686 $tag = $this->getFirstTag($v);
687 $tagName = strtolower($this->getFirstTagName($v));
688 // Process based on the tag:
689 switch ($tagName) {
690 case 'blockquote':
691 case 'dd':
692 case 'div':
693 case 'header':
694 case 'section':
695 case 'footer':
696 case 'nav':
697 case 'article':
698 case 'aside':
699 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
700 break;
701 case 'pre':
702 break;
703 default:
704 // usually <hx> tags and <table> tags where no other block elements are within the tags
705 // Eliminate true linebreaks inside block element tags
706 $blockSplit[$k] = preg_replace(('/[' . LF . ']+/'), ' ', $blockSplit[$k]);
707 }
708 } else {
709 // NON-block:
710 if (trim($blockSplit[$k]) !== '') {
711 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
712 // Remove linebreaks preceding hr tags
713 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
714 // Remove linebreaks following hr tags
715 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
716 // Replace other linebreaks with space
717 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
718 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
719 } else {
720 unset($blockSplit[$k]);
721 }
722 }
723 }
724 $this->TS_transform_db_safecounter++;
725 return implode(LF, $blockSplit);
726 }
727
728 /**
729 * Wraps a-tags that contain a style attribute with a span-tag
730 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
731 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
732 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
733 * allowed.
734 *
735 * @param string $value Content input
736 * @return string Content output
737 */
738 public function transformStyledATags($value)
739 {
740 $blockSplit = $this->splitIntoBlock('A', $value);
741 foreach ($blockSplit as $k => $v) {
742 // If an A-tag was found
743 if ($k % 2) {
744 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
745 // If "style" attribute is set and rteerror is not set!
746 if ($attribArray['style'] && !$attribArray['rteerror']) {
747 $attribArray_copy['style'] = $attribArray['style'];
748 unset($attribArray['style']);
749 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
750 $eTag = '</a></span>';
751 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
752 }
753 }
754 }
755 return implode('', $blockSplit);
756 }
757
758 /**
759 * Transformation handler: css_transform / direction: "rte"
760 * Set (->rte) for standard content elements (ts)
761 *
762 * @param string $value Content input
763 * @return string Content output
764 * @see TS_transform_db()
765 */
766 public function TS_transform_rte($value)
767 {
768 // Split the content from database by the occurrence of the block elements
769 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
770 // Traverse the blocks
771 foreach ($blockSplit as $k => $v) {
772 if ($k % 2) {
773 // Inside one of the blocks:
774 // Init:
775 $tag = $this->getFirstTag($v);
776 $tagName = strtolower($this->getFirstTagName($v));
777 // Based on tagname, we do transformations:
778 switch ($tagName) {
779 case 'blockquote':
780 case 'dd':
781 case 'div':
782 case 'header':
783 case 'section':
784 case 'footer':
785 case 'nav':
786 case 'article':
787 case 'aside':
788 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
789 break;
790 }
791 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
792 } else {
793 // NON-block:
794 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
795 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
796 // If the line is followed by a block or is the last line:
797 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
798 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
799 if (!$onlyLineBreaks) {
800 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
801 } else {
802 // If the line contains only linebreaks, remove the leading linebreak
803 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
804 }
805 }
806 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
807 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
808 unset($blockSplit[$k]);
809 } else {
810 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
811 }
812 }
813 }
814 return implode(LF, $blockSplit);
815 }
816
817 /***************************************************************
818 *
819 * Generic RTE transformation, analysis and helper functions
820 *
821 **************************************************************/
822
823 /**
824 * Function for cleaning content going into the database.
825 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
826 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
827 *
828 * @param string $content Content to clean up
829 * @return string Clean content
830 * @see getKeepTags()
831 */
832 public function HTMLcleaner_db($content)
833 {
834 $keepTags = $this->getKeepTags('db');
835 // Default: remove unknown tags.
836 $keepUnknownTags = (bool)$this->procOptions['dontRemoveUnknownTags_db'];
837 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
838 }
839
840 /**
841 * Creates an array of configuration for the HTMLcleaner function based on whether content
842 * go TO or FROM the Rich Text Editor ($direction)
843 *
844 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
845 * @return array Configuration array
846 * @see HTMLcleaner_db()
847 */
848 public function getKeepTags($direction = 'rte')
849 {
850 if (!is_array($this->getKeepTags_cache[$direction])) {
851 // Setting up allowed tags:
852 // Default is to get allowed/denied tags from internal array of processing options:
853 // Construct default list of tags to keep:
854 if (is_array($this->procOptions['allowTags.'])) {
855 $keepTags = implode(',', $this->procOptions['allowTags.']);
856 } else {
857 $keepTags = $this->procOptions['allowTags'];
858 }
859 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
860 // For tags to deny, remove them from $keepTags array:
861 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
862 foreach ($denyTags as $dKe) {
863 unset($keepTags[$dKe]);
864 }
865 // Based on the direction of content, set further options:
866 switch ($direction) {
867 case 'rte':
868 // Transforming keepTags array so it can be understood by the HTMLcleaner function.
869 // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
870 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
871 break;
872 case 'db':
873 // Setting up span tags if they are allowed:
874 if (isset($keepTags['span'])) {
875 $keepTags['span'] = [
876 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
877 'fixAttrib' => [
878 'class' => [
879 'removeIfFalse' => 1
880 ]
881 ],
882 'rmTagIfNoAttrib' => 1
883 ];
884 if (!empty($this->allowedClasses)) {
885 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
886 }
887 }
888 // Setting further options, getting them from the processing options
889 $TSc = $this->procOptions['HTMLparser_db.'];
890 if (!$TSc['globalNesting']) {
891 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
892 }
893 if (!$TSc['noAttrib']) {
894 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
895 }
896 // Transforming the array from TypoScript to regular array:
897 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
898 break;
899 }
900 // Caching (internally, in object memory) the result
901 $this->getKeepTags_cache[$direction] = $keepTags;
902 }
903 // Return result:
904 return $this->getKeepTags_cache[$direction];
905 }
906
907 /**
908 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
909 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
910 * The function ->setDivTags does the opposite.
911 * This function processes content to go into the database.
912 *
913 * @param string $value Value to process.
914 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
915 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
916 * @return string|array Processed input value.
917 * @see setDivTags()
918 */
919 public function divideIntoLines($value, $count = 5, $returnArray = false)
920 {
921 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
922 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
923 // Returns plainly the content if there was no p sections in it
924 if (count($paragraphBlocks) <= 1 || $count <= 0) {
925 return $this->sanitizeLineBreaksForContentOnly($value);
926 }
927
928 // Traverse the splitted sections
929 foreach ($paragraphBlocks as $k => $v) {
930 if ($k % 2) {
931 // Inside a <p> section
932 $v = $this->removeFirstAndLastTag($v);
933 // Fetching 'sub-lines' - which will explode any further p nesting recursively
934 $subLines = $this->divideIntoLines($v, $count - 1, true);
935 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
936 if (is_array($subLines)) {
937 $paragraphBlocks[$k] = implode(LF, $subLines);
938 } else {
939 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
940 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
941 }
942 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
943 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
944 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
945 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
946 $paragraphBlocks[$k] = '';
947 }
948 } else {
949 // Outside a paragraph, if there is still something in there, just add a <p> tag
950 // Remove positions which are outside <p> tags and without content
951 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
952 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
953 if ((string)$paragraphBlocks[$k] === '') {
954 unset($paragraphBlocks[$k]);
955 } else {
956 // add <p> tags around the content
957 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
958 }
959 }
960 }
961 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
962 }
963
964 /**
965 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
966 * For processing of content going FROM database TO RTE.
967 *
968 * @param string $value Value to convert
969 * @return string Processed value.
970 * @see divideIntoLines()
971 */
972 public function setDivTags($value)
973 {
974 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
975 $keepTags = $this->getKeepTags('rte');
976 // Divide the content into lines
977 $parts = explode(LF, $value);
978 foreach ($parts as $k => $v) {
979 // Processing of line content:
980 // If the line is blank, set it to &nbsp;
981 if (trim($parts[$k]) === '') {
982 $parts[$k] = '&nbsp;';
983 } else {
984 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
985 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
986 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
987 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
988 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
989 }
990 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
991 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
992 $testStr = strtolower(trim($parts[$k]));
993 if (substr($testStr, 0, 4) !== '<div' || substr($testStr, -6) !== '</div>') {
994 if (substr($testStr, 0, 2) !== '<p' || substr($testStr, -4) !== '</p>') {
995 // Only set p-tags if there is not already div or p tags:
996 $parts[$k] = '<p>' . $parts[$k] . '</p>';
997 }
998 }
999 }
1000 }
1001 // Implode result:
1002 return implode(LF, $parts);
1003 }
1004
1005 /**
1006 * Used for transformation from RTE to DB
1007 *
1008 * Works on a single line within a <p> tag when storing into the database
1009 * This always adds <p> tags and validates the arguments,
1010 * additionally the content is cleaned up via the HTMLcleaner.
1011 *
1012 * @param string $content the content within the <p> tag
1013 * @param string $fullContentWithTag the whole <p> tag surrounded as well
1014 *
1015 * @return string the full <p> tag with cleaned content
1016 */
1017 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
1018 {
1019 // clean up the content
1020 $content = $this->HTMLcleaner_db($content);
1021 // Get the <p> tag, and validate the attributes
1022 $fTag = $this->getFirstTag($fullContentWithTag);
1023 // Check which attributes of the <p> tag to keep attributes
1024 if (!empty($this->allowedAttributesForParagraphTags)) {
1025 list($tagAttributes) = $this->get_tag_attributes($fTag);
1026 // Make sure the tag attributes only contain the ones that are defined to be allowed
1027 $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
1028
1029 // Only allow classes that are whitelisted in $this->allowedClasses
1030 if (trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1031 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1032 $classes = array_intersect($classes, $this->allowedClasses);
1033 if (!empty($classes)) {
1034 $tagAttributes['class'] = implode(' ', $classes);
1035 } else {
1036 unset($tagAttributes['class']);
1037 }
1038 }
1039 } else {
1040 $tagAttributes = [];
1041 }
1042 // Remove any line break
1043 $content = str_replace(LF, '', $content);
1044 // Compile the surrounding <p> tag
1045 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1046 return $content;
1047 }
1048
1049 /**
1050 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1051 *
1052 * @param string $content
1053 * @return string the modified content
1054 */
1055 protected function sanitizeLineBreaksForContentOnly(string $content)
1056 {
1057 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1058 $content = str_replace(LF . LF, LF, $content);
1059 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1060 return $content;
1061 }
1062
1063 /**
1064 * Finds width and height from attrib-array
1065 * If the width and height is found in the style-attribute, use that!
1066 *
1067 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1068 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1069 */
1070 public function getWHFromAttribs($attribArray)
1071 {
1072 $style = trim($attribArray['style']);
1073 $w = 0;
1074 $h = 0;
1075 if ($style) {
1076 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1077 // Width
1078 $reg = [];
1079 preg_match('/width' . $regex . '/i', $style, $reg);
1080 $w = (int)$reg[1];
1081 // Height
1082 preg_match('/height' . $regex . '/i', $style, $reg);
1083 $h = (int)$reg[1];
1084 }
1085 if (!$w) {
1086 $w = $attribArray['width'];
1087 }
1088 if (!$h) {
1089 $h = $attribArray['height'];
1090 }
1091 return [(int)$w, (int)$h];
1092 }
1093
1094 /**
1095 * Parse <A>-tag href and return status of email,external,file or page
1096 * This functionality is not in use anymore
1097 *
1098 * @param string $url URL to analyse.
1099 * @return array Information in an array about the URL
1100 */
1101 public function urlInfoForLinkTags($url)
1102 {
1103 $info = [];
1104 $url = trim($url);
1105 if (substr(strtolower($url), 0, 7) === 'mailto:') {
1106 $info['url'] = trim(substr($url, 7));
1107 $info['type'] = 'email';
1108 } elseif (strpos($url, '?file:') !== false) {
1109 $info['type'] = 'file';
1110 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1111 } else {
1112 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1113 $urlLength = strlen($url);
1114 $a = 0;
1115 for (; $a < $urlLength; $a++) {
1116 if ($url[$a] != $curURL[$a]) {
1117 break;
1118 }
1119 }
1120 $info['relScriptPath'] = substr($curURL, $a);
1121 $info['relUrl'] = substr($url, $a);
1122 $info['url'] = $url;
1123 $info['type'] = 'ext';
1124 $siteUrl_parts = parse_url($url);
1125 $curUrl_parts = parse_url($curURL);
1126 // Hosts should match
1127 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1128 // If the script path seems to match or is empty (FE-EDIT)
1129 // New processing order 100502
1130 $uP = parse_url($info['relUrl']);
1131 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1132 $info['url'] = $info['relUrl'];
1133 $info['type'] = 'anchor';
1134 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1135 // URL is a page (id parameter)
1136 $pp = preg_split('/^id=/', $uP['query']);
1137 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1138 $parameters = explode('&', $pp[1]);
1139 $id = array_shift($parameters);
1140 if ($id) {
1141 $info['pageid'] = $id;
1142 $info['cElement'] = $uP['fragment'];
1143 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1144 $info['type'] = 'page';
1145 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1146 }
1147 } else {
1148 $info['url'] = $info['relUrl'];
1149 $info['type'] = 'file';
1150 }
1151 } else {
1152 unset($info['relScriptPath']);
1153 unset($info['relUrl']);
1154 }
1155 }
1156 return $info;
1157 }
1158
1159 /**
1160 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1161 *
1162 * @param string $value Content input
1163 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set. (not in use anymore)
1164 * @return string Content output
1165 */
1166 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1167 {
1168 $blockSplit = $this->splitIntoBlock('A', $value);
1169 foreach ($blockSplit as $k => $v) {
1170 // Block
1171 if ($k % 2) {
1172 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1173 // Checking if there is a scheme, and if not, prepend the current url.
1174 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1175 if ($attribArray['href'] !== '') {
1176 $uP = parse_url(strtolower($attribArray['href']));
1177 if (!$uP['scheme']) {
1178 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1179 }
1180 }
1181 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1182 $eTag = '</a>';
1183 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1184 }
1185 }
1186 return implode('', $blockSplit);
1187 }
1188
1189 /**
1190 * Apply plain image settings to the dimensions of the image
1191 *
1192 * @param array $imageInfo: info array of the image
1193 * @param array $attribArray: array of attributes of an image tag
1194 *
1195 * @return array a modified attributes array
1196 */
1197 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1198 {
1199 if ($this->procOptions['plainImageMode']) {
1200 // Perform corrections to aspect ratio based on configuration
1201 switch ((string)$this->procOptions['plainImageMode']) {
1202 case 'lockDimensions':
1203 $attribArray['width'] = $imageInfo[0];
1204 $attribArray['height'] = $imageInfo[1];
1205 break;
1206 case 'lockRatioWhenSmaller':
1207 if ($attribArray['width'] > $imageInfo[0]) {
1208 $attribArray['width'] = $imageInfo[0];
1209 }
1210 if ($imageInfo[0] > 0) {
1211 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1212 }
1213 break;
1214 case 'lockRatio':
1215 if ($imageInfo[0] > 0) {
1216 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1217 }
1218 break;
1219 }
1220 }
1221 return $attribArray;
1222 }
1223
1224 /**
1225 * Called before any processing / transformation is made
1226 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1227 * CR has a very disturbing effect, so just remove all CR and rely on LF
1228 *
1229 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1230 *
1231 * @param string $content the content to process
1232 * @return string the modified content
1233 */
1234 protected function streamlineLineBreaksForProcessing(string $content)
1235 {
1236 return str_replace(CR, '', $content);
1237 }
1238
1239 /**
1240 * Called after any processing / transformation was made
1241 * just before the content is returned by the RTE parser all line breaks
1242 * get unified to be "CRLF"s again.
1243 *
1244 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1245 *
1246 * @param string $content the content to process
1247 * @return string the modified content
1248 */
1249 protected function streamlineLineBreaksAfterProcessing(string $content)
1250 {
1251 // Make sure no \r\n sequences has entered in the meantime
1252 $content = $this->streamlineLineBreaksForProcessing($content);
1253 // ... and then change all \n into \r\n
1254 return str_replace(LF, CRLF, $content);
1255 }
1256
1257 /**
1258 * Content Transformation from DB to RTE
1259 * Checks all <a> tags which reference a t3://page and checks if the page is available
1260 * If not, some offensive styling is added.
1261 *
1262 * @param string $content
1263 * @return string the modified content
1264 */
1265 protected function markBrokenLinks(string $content): string
1266 {
1267 $blocks = $this->splitIntoBlock('A', $content);
1268 $linkService = GeneralUtility::makeInstance(LinkService::class);
1269 foreach ($blocks as $position => $value) {
1270 if ($position % 2 === 0) {
1271 continue;
1272 }
1273 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1274 if (empty($attributes['href'])) {
1275 continue;
1276 }
1277 $hrefInformation = $linkService->resolve($attributes['href']);
1278 if ($hrefInformation['type'] === LinkService::TYPE_PAGE && $hrefInformation['pageuid'] !== 'current') {
1279 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1280 if (!is_array($pageRecord)) {
1281 // Page does not exist
1282 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1283 $styling = 'background-color: yellow; border:2px red solid; color: black;';
1284 if (empty($attributes['style'])) {
1285 $attributes['style'] = $styling;
1286 } else {
1287 $attributes['style'] .= ' ' . $styling;
1288 }
1289 }
1290 }
1291 // Always rewrite the block to allow the nested calling even if a page is found
1292 $blocks[$position] =
1293 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1294 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1295 . '</a>';
1296 }
1297 return implode('', $blocks);
1298 }
1299
1300 /**
1301 * Content Transformation from RTE to DB
1302 * Removes link information error attributes from <a> tags that are added to broken links
1303 *
1304 * @param string $content the content to process
1305 * @return string the modified content
1306 */
1307 protected function removeBrokenLinkMarkers(string $content): string
1308 {
1309 $blocks = $this->splitIntoBlock('A', $content);
1310 foreach ($blocks as $position => $value) {
1311 if ($position % 2 === 0) {
1312 continue;
1313 }
1314 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1315 if (empty($attributes['href'])) {
1316 continue;
1317 }
1318 // Always remove the styling again (regardless of the page was found or not)
1319 // so the database does not contain ugly stuff
1320 unset($attributes['data-rte-error']);
1321 if (isset($attributes['style'])) {
1322 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1323 if (empty($attributes['style'])) {
1324 unset($attributes['style']);
1325 }
1326 }
1327 $blocks[$position] =
1328 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1329 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1330 . '</a>';
1331 }
1332 return implode('', $blocks);
1333 }
1334
1335 /**
1336 * Instantiates a logger
1337 *
1338 * @return \TYPO3\CMS\Core\Log\Logger
1339 */
1340 protected function getLogger()
1341 {
1342 /** @var $logManager LogManager */
1343 $logManager = GeneralUtility::makeInstance(LogManager::class);
1344 return $logManager->getLogger(get_class($this));
1345 }
1346 }