[BUGFIX] Catch Exceptions in RTE with invalid links
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use Psr\Log\LoggerAwareInterface;
18 use Psr\Log\LoggerAwareTrait;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
20 use TYPO3\CMS\Core\Compatibility\PublicMethodDeprecationTrait;
21 use TYPO3\CMS\Core\Compatibility\PublicPropertyDeprecationTrait;
22 use TYPO3\CMS\Core\LinkHandling\Exception\UnknownLinkHandlerException;
23 use TYPO3\CMS\Core\LinkHandling\LinkService;
24 use TYPO3\CMS\Core\Resource;
25 use TYPO3\CMS\Core\Type\File\ImageInfo;
26 use TYPO3\CMS\Core\Utility\GeneralUtility;
27 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
28
29 /**
30 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
31 *
32 * Concerning line breaks:
33 * Regardless if LF (Unix-style) or CRLF (Windows) was put in, the HtmlParser works with LFs and migrates all
34 * line breaks to LFs internally, however when all transformations are done, all LFs are transformed to CRLFs.
35 * This means: RteHtmlParser always returns CRLFs to be maximum compatible with all formats.
36 */
37 class RteHtmlParser extends HtmlParser implements LoggerAwareInterface
38 {
39 use LoggerAwareTrait;
40 use PublicPropertyDeprecationTrait;
41 use PublicMethodDeprecationTrait;
42
43 protected $deprecatedPublicProperties = [
44 'blockElementList' => 'Using $blockElementList of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
45 'recPid' => 'Using $recPid of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
46 'elRef' => 'Using $elRef of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
47 'tsConfig' => 'Using $tsConfig of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
48 'procOptions' => 'Using $procOptions of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
49 'TS_transform_db_safecounter' => 'Using $TS_transform_db_safecounter of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
50 'getKeepTags_cache' => 'Using $getKeepTags_cache of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
51 'allowedClasses' => 'Using $allowedClasses of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
52 ];
53
54 protected $deprecatedPublicMethods = [
55 'TS_images_db' => 'Using TS_images_db() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
56 'TS_links_db' => 'Using TS_links_db() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
57 'TS_transform_db' => 'Using TS_transform_db() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
58 'TS_transform_rte' => 'Using TS_transform_rte() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
59 'HTMLcleaner_db' => 'Using HTMLcleaner_db() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
60 'getKeepTags' => 'Using getKeepTags() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
61 'divideIntoLines' => 'Using divideIntoLines() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
62 'setDivTags' => 'Using setDivTags() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
63 'getWHFromAttribs' => 'Using getWHFromAttribs() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
64 'urlInfoForLinkTags' => 'Using urlInfoForLinkTags() of class RteHtmlParser from the outside is discouraged, as this method is not in use anymore and will be removed.',
65 'TS_AtagToAbs' => 'Using TS_AtagToAbs() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
66 ];
67
68 /**
69 * List of elements that are not wrapped into a "p" tag while doing the transformation.
70 * @var string
71 */
72 protected $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
73
74 /**
75 * List of all tags that are allowed by default
76 * @var string
77 */
78 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span,abbr,acronym,dfn';
79
80 /**
81 * Set this to the pid of the record manipulated by the class.
82 *
83 * @var int
84 */
85 protected $recPid = 0;
86
87 /**
88 * Element reference [table]:[field], eg. "tt_content:bodytext"
89 *
90 * @var string
91 */
92 protected $elRef = '';
93
94 /**
95 * Current Page TSConfig
96 *
97 * @var array
98 */
99 protected $tsConfig = [];
100
101 /**
102 * Set to the TSconfig options coming from Page TSconfig
103 *
104 * @var array
105 */
106 protected $procOptions = [];
107
108 /**
109 * Run-away brake for recursive calls.
110 *
111 * @var int
112 */
113 protected $TS_transform_db_safecounter = 100;
114
115 /**
116 * Data caching for processing function
117 *
118 * @var array
119 */
120 protected $getKeepTags_cache = [];
121
122 /**
123 * Storage of the allowed CSS class names in the RTE
124 *
125 * @var array
126 */
127 protected $allowedClasses = [];
128
129 /**
130 * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling,
131 * they have a special place for configuration via 'proc.keepPDIVattribs'
132 *
133 * @var array
134 */
135 protected $allowedAttributesForParagraphTags = [
136 'class',
137 'align',
138 'id',
139 'title',
140 'dir',
141 'lang',
142 'xml:lang',
143 'itemscope',
144 'itemtype',
145 'itemprop'
146 ];
147
148 /**
149 * Any tags that are allowed outside of <p> sections - usually similar to the block elements
150 * plus some special tags like <hr> and <img> (if images are allowed).
151 * Completely overrideable via 'proc.allowTagsOutside'
152 *
153 * @var array
154 */
155 protected $allowedTagsOutsideOfParagraphs = [
156 'address',
157 'article',
158 'aside',
159 'blockquote',
160 'div',
161 'footer',
162 'header',
163 'hr',
164 'nav',
165 'section'
166 ];
167
168 /**
169 * Initialize, setting element reference and record PID
170 *
171 * @param string $elRef Element reference, eg "tt_content:bodytext
172 * @param int $recPid PID of the record (page id)
173 */
174 public function init($elRef = '', $recPid = 0)
175 {
176 $this->recPid = $recPid;
177 $this->elRef = $elRef;
178 }
179
180 /**********************************************
181 *
182 * Main function
183 *
184 **********************************************/
185 /**
186 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
187 * This is the main function called from DataHandler and transfer data classes
188 *
189 * @param string $value Input value
190 * @param null $_ unused
191 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
192 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
193 * @return string Output value
194 */
195 public function RTE_transform($value, $_ = null, $direction = 'rte', $thisConfig = [])
196 {
197 $this->tsConfig = $thisConfig;
198 $this->procOptions = (array)$thisConfig['proc.'];
199 if (isset($this->procOptions['allowedClasses.'])) {
200 $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
201 } else {
202 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'] ?? '', true);
203 }
204
205 // Dynamic configuration of blockElementList
206 if (!empty($this->procOptions['blockElementList'])) {
207 $this->blockElementList = $this->procOptions['blockElementList'];
208 }
209
210 // Define which attributes are allowed on <p> tags
211 if (isset($this->procOptions['allowAttributes.'])) {
212 $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
213 } elseif (isset($this->procOptions['keepPDIVattribs'])) {
214 trigger_error('HTML parsing option "keepPDIVattribs" will not be evaluated anymore in TYPO3 v10.0. Use "allowedAttributes" instead.', E_USER_DEPRECATED);
215 $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
216 }
217 // Override tags which are allowed outside of <p> tags
218 if (isset($this->procOptions['allowTagsOutside'])) {
219 if (!isset($this->procOptions['allowTagsOutside.'])) {
220 $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
221 } else {
222 $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
223 }
224 }
225
226 // Setting modes / transformations to be called
227 if ((string)$this->procOptions['overruleMode'] !== '') {
228 $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
229 } else {
230 $modes = [$this->procOptions['mode']];
231 }
232 $modes = $this->resolveAppliedTransformationModes($direction, $modes);
233
234 $value = $this->streamlineLineBreaksForProcessing($value);
235
236 // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
237 $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
238
239 // Traverse modes
240 foreach ($modes as $cmd) {
241 if ($direction === 'db') {
242 // Checking for user defined transformation:
243 if (!empty($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
244 $_procObj = GeneralUtility::makeInstance($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
245 $_procObj->pObj = $this;
246 $_procObj->transformationKey = $cmd;
247 $value = $_procObj->transform_db($value, $this);
248 } else {
249 // ... else use defaults:
250 switch ($cmd) {
251 case 'detectbrokenlinks':
252 $value = $this->removeBrokenLinkMarkers($value);
253 break;
254 case 'ts_images':
255 $value = $this->TS_images_db($value);
256 break;
257 case 'ts_links':
258 $value = $this->TS_links_db($value);
259 break;
260 case 'css_transform':
261 // Transform empty paragraphs into spacing paragraphs
262 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
263 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
264 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
265 $value = $this->TS_transform_db($value);
266 break;
267 default:
268 // Do nothing
269 }
270 }
271 } elseif ($direction === 'rte') {
272 // Checking for user defined transformation:
273 if (!empty($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
274 $_procObj = GeneralUtility::makeInstance($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
275 $_procObj->pObj = $this;
276 $value = $_procObj->transform_rte($value, $this);
277 } else {
278 // ... else use defaults:
279 switch ($cmd) {
280 case 'detectbrokenlinks':
281 $value = $this->markBrokenLinks($value);
282 break;
283 case 'ts_images':
284 $value = $this->TS_images_rte($value);
285 break;
286 case 'ts_links':
287 $value = $this->TS_links_rte($value, true);
288 break;
289 case 'css_transform':
290 $value = $this->TS_transform_rte($value);
291 break;
292 default:
293 // Do nothing
294 }
295 }
296 }
297 }
298
299 // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
300 $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
301
302 // Final clean up of linebreaks
303 $value = $this->streamlineLineBreaksAfterProcessing($value);
304
305 return $value;
306 }
307
308 /**
309 * Ensures what transformation modes should be executed, and that they are only executed once.
310 *
311 * @param string $direction
312 * @param array $modes
313 * @return array the resolved transformation modes
314 */
315 protected function resolveAppliedTransformationModes(string $direction, array $modes)
316 {
317 $modeList = implode(',', $modes);
318
319 // Replace the shortcut "default" with all custom modes
320 $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
321
322 // Make list unique
323 $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
324 // Reverse order if direction is "rte"
325 if ($direction === 'rte') {
326 $modes = array_reverse($modes);
327 }
328
329 return $modes;
330 }
331
332 /**
333 * Runs the HTML parser if it is configured
334 * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation
335 * is done and thus totally independent processing options you can set up.
336 *
337 * This is only possible via TSconfig (procOptions) currently.
338 *
339 * @param string $content
340 * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the
341 * @return string the processed content
342 */
343 protected function runHtmlParserIfConfigured($content, $configurationDirective)
344 {
345 if (!empty($this->procOptions[$configurationDirective])) {
346 list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
347 $content = $this->HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
348 }
349 return $content;
350 }
351
352 /************************************
353 *
354 * Specific RTE TRANSFORMATION functions
355 *
356 *************************************/
357 /**
358 * Transformation handler: 'ts_images' / direction: "db"
359 * Processing images inserted in the RTE.
360 * This is used when content goes from the RTE to the database.
361 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
362 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
363 * Also "magic" images are processed here.
364 *
365 * @param string $value The content from RTE going to Database
366 * @return string Processed content
367 */
368 protected function TS_images_db($value)
369 {
370 // Split content by <img> tags and traverse the resulting array for processing:
371 $imgSplit = $this->splitTags('img', $value);
372 if (count($imgSplit) > 1) {
373 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
374 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
375 /** @var Resource\ResourceFactory $resourceFactory */
376 $resourceFactory = Resource\ResourceFactory::getInstance();
377 /** @var Resource\Service\MagicImageService $magicImageService */
378 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
379 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
380 foreach ($imgSplit as $k => $v) {
381 // Image found, do processing:
382 if ($k % 2) {
383 // Get attributes
384 list($attribArray) = $this->get_tag_attributes($v, true);
385 // It's always an absolute URL coming from the RTE into the Database.
386 $absoluteUrl = trim($attribArray['src']);
387 // Make path absolute if it is relative and we have a site path which is not '/'
388 $pI = pathinfo($absoluteUrl);
389 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
390 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
391 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
392 $absoluteUrl = $siteUrl . $absoluteUrl;
393 }
394 // Image dimensions set in the img tag, if any
395 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
396 if ($imgTagDimensions[0]) {
397 $attribArray['width'] = $imgTagDimensions[0];
398 }
399 if ($imgTagDimensions[1]) {
400 $attribArray['height'] = $imgTagDimensions[1];
401 }
402 $originalImageFile = null;
403 if ($attribArray['data-htmlarea-file-uid']) {
404 // An original image file uid is available
405 try {
406 /** @var Resource\File $originalImageFile */
407 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
408 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
409 // Log the fact the file could not be retrieved.
410 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
411 $this->logger->error($message);
412 }
413 }
414 if ($originalImageFile instanceof Resource\File) {
415 // Public url of local file is relative to the site url, absolute otherwise
416 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
417 // This is a plain image, i.e. reference to the original image
418 if ($this->procOptions['plainImageMode']) {
419 // "plain image mode" is configured
420 // Find the dimensions of the original image
421 $imageInfo = [
422 $originalImageFile->getProperty('width'),
423 $originalImageFile->getProperty('height')
424 ];
425 if (!$imageInfo[0] || !$imageInfo[1]) {
426 $filePath = $originalImageFile->getForLocalProcessing(false);
427 $imageInfoObject = GeneralUtility::makeInstance(ImageInfo::class, $filePath);
428 $imageInfo = [
429 $imageInfoObject->getWidth(),
430 $imageInfoObject->getHeight()
431 ];
432 }
433 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
434 }
435 } else {
436 // Magic image case: get a processed file with the requested configuration
437 $imageConfiguration = [
438 'width' => $imgTagDimensions[0],
439 'height' => $imgTagDimensions[1]
440 ];
441 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
442 $attribArray['width'] = $magicImage->getProperty('width');
443 $attribArray['height'] = $magicImage->getProperty('height');
444 $attribArray['src'] = $magicImage->getPublicUrl();
445 }
446 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
447 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
448 // Fetch the external image
449 $externalFile = GeneralUtility::getUrl($absoluteUrl);
450 if ($externalFile) {
451 $pU = parse_url($absoluteUrl);
452 $pI = pathinfo($pU['path']);
453 $extension = strtolower($pI['extension']);
454 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
455 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
456 // We insert this image into the user default upload folder
457 list($table, $field) = explode(':', $this->elRef);
458 /** @var Resource\Folder $folder */
459 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
460 /** @var Resource\File $fileObject */
461 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
462 $imageConfiguration = [
463 'width' => $attribArray['width'],
464 'height' => $attribArray['height']
465 ];
466 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
467 $attribArray['width'] = $magicImage->getProperty('width');
468 $attribArray['height'] = $magicImage->getProperty('height');
469 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
470 $attribArray['src'] = $magicImage->getPublicUrl();
471 }
472 }
473 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
474 // Finally, check image as local file (siteURL equals the one of the image)
475 // Image has no data-htmlarea-file-uid attribute
476 // Relative path, rawurldecoded for special characters.
477 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
478 // Absolute filepath, locked to relative path of this project
479 $filepath = GeneralUtility::getFileAbsFileName($path);
480 // Check file existence (in relative directory to this installation!)
481 if ($filepath && @is_file($filepath)) {
482 // Treat it as a plain image
483 if ($this->procOptions['plainImageMode']) {
484 // If "plain image mode" has been configured
485 // Find the original dimensions of the image
486 $imageInfoObject = GeneralUtility::makeInstance(ImageInfo::class, $filepath);
487 $imageInfo = [
488 $imageInfoObject->getWidth(),
489 $imageInfoObject->getHeight()
490 ];
491 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
492 }
493 // Let's try to find a file uid for this image
494 try {
495 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
496 if ($fileOrFolderObject instanceof Resource\FileInterface) {
497 $fileIdentifier = $fileOrFolderObject->getIdentifier();
498 /** @var Resource\AbstractFile $fileObject */
499 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
500 // @todo if the retrieved file is a processed file, get the original file...
501 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
502 }
503 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
504 // Nothing to be done if file/folder not found
505 }
506 }
507 }
508 // Remove width and height from style attribute
509 $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
510 // Must have alt attribute
511 if (!isset($attribArray['alt'])) {
512 $attribArray['alt'] = '';
513 }
514 // Convert absolute to relative url
515 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
516 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
517 }
518 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
519 }
520 }
521 }
522 return implode('', $imgSplit);
523 }
524
525 /**
526 * Transformation handler: 'ts_images' / direction: "rte"
527 * Processing images from database content going into the RTE.
528 * Processing includes converting the src attribute to an absolute URL.
529 *
530 * @param string $value Content input
531 * @return string Content output
532 */
533 public function TS_images_rte($value)
534 {
535 // Split content by <img> tags and traverse the resulting array for processing:
536 $imgSplit = $this->splitTags('img', $value);
537 if (count($imgSplit) > 1) {
538 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
539 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
540 foreach ($imgSplit as $k => $v) {
541 // Image found
542 if ($k % 2) {
543 // Get the attributes of the img tag
544 list($attribArray) = $this->get_tag_attributes($v, true);
545 $absoluteUrl = trim($attribArray['src']);
546 // Transform the src attribute into an absolute url, if it not already
547 if (stripos($absoluteUrl, 'http') !== 0) {
548 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
549 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
550 $attribArray['src'] = $siteUrl . $attribArray['src'];
551 }
552 // Must have alt attribute
553 if (!isset($attribArray['alt'])) {
554 $attribArray['alt'] = '';
555 }
556 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
557 }
558 }
559 }
560 // Return processed content:
561 return implode('', $imgSplit);
562 }
563
564 /**
565 * Transformation handler: 'ts_links' / direction: "db"
566 * Processing anchor tags, and resolves them correctly again via the LinkService syntax
567 *
568 * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render
569 * the result.
570 *
571 * @param string $value Content input
572 * @return string Content output
573 * @see TS_links_rte()
574 */
575 protected function TS_links_db($value)
576 {
577 $blockSplit = $this->splitIntoBlock('A', $value);
578 foreach ($blockSplit as $k => $v) {
579 if ($k % 2) {
580 list($tagAttributes) = $this->get_tag_attributes($this->getFirstTag($v), true);
581 $linkService = GeneralUtility::makeInstance(LinkService::class);
582 $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
583
584 // Modify parameters, this hook should be deprecated
585 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
586 trigger_error('The hook "t3lib/class.t3lib_parsehtml_proc.php->modifyParams_LinksDb_PostProc" will be removed in TYPO3 v10.0, use LinkService syntax to modify links to be stored in the database.', E_USER_DEPRECATED);
587 $parameters = [
588 'currentBlock' => $v,
589 'linkInformation' => $linkInformation,
590 'url' => $linkInformation['href'],
591 'attributes' => $tagAttributes
592 ];
593 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] ?? [] as $className) {
594 $processor = GeneralUtility::makeInstance($className);
595 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
596 }
597 } else {
598 // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
599 try {
600 $tagAttributes['href'] = $linkService->asString($linkInformation);
601 } catch (UnknownLinkHandlerException $e) {
602 $tagAttributes['href'] = $linkInformation['href'] ?? $tagAttributes['href'];
603 }
604
605 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
606 . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>';
607 }
608 }
609 }
610 return implode('', $blockSplit);
611 }
612
613 /**
614 * Transformation handler: 'ts_links' / direction: "rte"
615 * Converting TYPO3-specific <link> tags to <a> tags
616 *
617 * This functionality is only used to convert legacy <link> tags to the new linking syntax using <a> tags, and will
618 * not be converted back to <link> tags anymore.
619 *
620 * @param string $value Content input
621 * @param bool $internallyCalledFromCore internal option for calls where the Core is still using this function, to supress method deprecations
622 * @return string Content output
623 * @deprecated will be removed in TYPO3 v10.0, only ->TS_AtagToAbs() should be called directly, <link> syntax is deprecated
624 */
625 public function TS_links_rte($value, $internallyCalledFromCore = null)
626 {
627 if ($internallyCalledFromCore === null) {
628 trigger_error('RteHtmlParser->TS_links_rte() will be removed in TYPO3 v10.0, use TS_AtagToAbs() directly and do not use <link> syntax anymore.', E_USER_DEPRECATED);
629 }
630 $hasLinkTags = false;
631 $value = $this->TS_AtagToAbs($value);
632 // Split content by the TYPO3 pseudo tag "<link>"
633 $blockSplit = $this->splitIntoBlock('link', $value, true);
634 foreach ($blockSplit as $k => $v) {
635 // Block
636 if ($k % 2) {
637 $hasLinkTags = true;
638 // Split away the first "<link " part
639 $typoLinkData = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
640 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
641
642 // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
643 $linkService = GeneralUtility::makeInstance(LinkService::class);
644 $linkInformation = $linkService->resolve($tagCode['url']);
645
646 try {
647 $href = $linkService->asString($linkInformation);
648 } catch (UnknownLinkHandlerException $e) {
649 $href = '';
650 }
651
652 // Modify parameters by a hook
653 if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] ?? false)) {
654 trigger_error('The hook "t3lib/class.t3lib_parsehtml_proc.php->modifyParams_LinksRte_PostProc" will be removed in TYPO3 v10.0, use the link service to properly use .', E_USER_DEPRECATED);
655 // backwards-compatibility: show an error message if the page is not found
656 $error = '';
657 if ($linkInformation['type'] === LinkService::TYPE_PAGE) {
658 $pageRecord = BackendUtility::getRecord('pages', $linkInformation['pageuid']);
659 // Page does not exist
660 if (!is_array($pageRecord)) {
661 $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
662 }
663 }
664 $parameters = [
665 'currentBlock' => $v,
666 'url' => $href,
667 'tagCode' => $tagCode,
668 'external' => $linkInformation['type'] === LinkService::TYPE_URL,
669 'error' => $error
670 ];
671 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $className) {
672 $processor = GeneralUtility::makeInstance($className);
673 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
674 }
675 } else {
676 $anchorAttributes = [
677 'href' => $href,
678 'target' => $tagCode['target'],
679 'class' => $tagCode['class'],
680 'title' => $tagCode['title']
681 ];
682
683 // Setting the <a> tag
684 $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
685 . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k]), $internallyCalledFromCore)
686 . '</a>';
687 }
688 }
689 }
690 if ($hasLinkTags) {
691 trigger_error('Content with <link> syntax was found, update your content to use the t3:// syntax, and migrate your content via the upgrade wizard in the install tool.', E_USER_DEPRECATED);
692 }
693 return implode('', $blockSplit);
694 }
695
696 /**
697 * Transformation handler: 'css_transform' / direction: "db"
698 * Cleaning (->db) for standard content elements (ts)
699 *
700 * @param string $value Content input
701 * @return string Content output
702 * @see TS_transform_rte()
703 */
704 protected function TS_transform_db($value)
705 {
706 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
707 $this->TS_transform_db_safecounter--;
708 if ($this->TS_transform_db_safecounter < 0) {
709 return $value;
710 }
711 // Split the content from RTE by the occurrence of these blocks:
712 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
713
714 // Avoid superfluous linebreaks by transform_db after ending headListTag
715 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
716 array_pop($blockSplit);
717 }
718
719 // Traverse the blocks
720 foreach ($blockSplit as $k => $v) {
721 if ($k % 2) {
722 // Inside block:
723 // Init:
724 $tag = $this->getFirstTag($v);
725 $tagName = strtolower($this->getFirstTagName($v));
726 // Process based on the tag:
727 switch ($tagName) {
728 case 'blockquote':
729 case 'dd':
730 case 'div':
731 case 'header':
732 case 'section':
733 case 'footer':
734 case 'nav':
735 case 'article':
736 case 'aside':
737 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
738 break;
739 case 'pre':
740 break;
741 default:
742 // usually <hx> tags and <table> tags where no other block elements are within the tags
743 // Eliminate true linebreaks inside block element tags
744 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
745 }
746 } else {
747 // NON-block:
748 if (trim($blockSplit[$k]) !== '') {
749 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
750 // Remove linebreaks preceding hr tags
751 $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
752 // Remove linebreaks following hr tags
753 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
754 // Replace other linebreaks with space
755 $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
756 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
757 } else {
758 unset($blockSplit[$k]);
759 }
760 }
761 }
762 $this->TS_transform_db_safecounter++;
763 return implode(LF, $blockSplit);
764 }
765
766 /**
767 * Wraps a-tags that contain a style attribute with a span-tag
768 * This is not in use anymore, but was necessary before because <a> tags are transformed into <link> tags
769 * in the database, but <link> tags cannot handle style attributes. However, this is considered a
770 * bad approach as it leaves an ugly <span> tag in the database, if allowedTags=span with style attributes are
771 * allowed.
772 *
773 * @param string $value Content input
774 * @return string Content output
775 * @deprecated since TYPO3 v9.0, will be removed in TYPO3 v10.0, see comment above, adding attribuet "rteerror" is not necessary anymore.
776 */
777 public function transformStyledATags($value)
778 {
779 trigger_error('RteHtmlParser->transformStyledATags() will be removed in TYPO3 v10.0. TYPO3 can handle style attribute in anchor tags properly since TYPO3 v8 LTS.', E_USER_DEPRECATED);
780 $blockSplit = $this->splitIntoBlock('A', $value);
781 foreach ($blockSplit as $k => $v) {
782 // If an A-tag was found
783 if ($k % 2) {
784 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
785 // If "style" attribute is set and rteerror is not set!
786 if ($attribArray['style'] && !$attribArray['rteerror']) {
787 $attribArray_copy['style'] = $attribArray['style'];
788 unset($attribArray['style']);
789 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
790 $eTag = '</a></span>';
791 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
792 }
793 }
794 }
795 return implode('', $blockSplit);
796 }
797
798 /**
799 * Transformation handler: css_transform / direction: "rte"
800 * Set (->rte) for standard content elements (ts)
801 *
802 * @param string $value Content input
803 * @return string Content output
804 * @see TS_transform_db()
805 */
806 protected function TS_transform_rte($value)
807 {
808 // Split the content from database by the occurrence of the block elements
809 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
810 // Traverse the blocks
811 foreach ($blockSplit as $k => $v) {
812 if ($k % 2) {
813 // Inside one of the blocks:
814 // Init:
815 $tag = $this->getFirstTag($v);
816 $tagName = strtolower($this->getFirstTagName($v));
817 // Based on tagname, we do transformations:
818 switch ($tagName) {
819 case 'blockquote':
820 case 'dd':
821 case 'div':
822 case 'header':
823 case 'section':
824 case 'footer':
825 case 'nav':
826 case 'article':
827 case 'aside':
828 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
829 break;
830 }
831 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
832 } else {
833 // NON-block:
834 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1] ?? '');
835 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
836 // If the line is followed by a block or is the last line:
837 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
838 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
839 if (!$onlyLineBreaks) {
840 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
841 } else {
842 // If the line contains only linebreaks, remove the leading linebreak
843 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
844 }
845 }
846 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
847 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
848 unset($blockSplit[$k]);
849 } else {
850 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
851 }
852 }
853 }
854 return implode(LF, $blockSplit);
855 }
856
857 /***************************************************************
858 *
859 * Generic RTE transformation, analysis and helper functions
860 *
861 **************************************************************/
862
863 /**
864 * Function for cleaning content going into the database.
865 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
866 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
867 *
868 * @param string $content Content to clean up
869 * @return string Clean content
870 * @see getKeepTags()
871 */
872 protected function HTMLcleaner_db($content)
873 {
874 $keepTags = $this->getKeepTags('db');
875 // Default: remove unknown tags.
876 if (isset($this->procOptions['dontRemoveUnknownTags_db'])) {
877 trigger_error('HTMLParser option "dontRemoveUnknownTags_db" will not be evaluted anymore in TYPO3 v10.0. Remove its usages.', E_USER_DEPRECATED);
878 }
879 $keepUnknownTags = (bool)($this->procOptions['dontRemoveUnknownTags_db'] ?? false);
880 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
881 }
882
883 /**
884 * Creates an array of configuration for the HTMLcleaner function based on whether content
885 * go TO or FROM the Rich Text Editor ($direction)
886 *
887 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
888 * @return array Configuration array
889 * @see HTMLcleaner_db()
890 */
891 protected function getKeepTags($direction = 'rte')
892 {
893 if (!isset($this->getKeepTags_cache[$direction]) || !is_array($this->getKeepTags_cache[$direction])) {
894 // Setting up allowed tags:
895 // Default is to get allowed/denied tags from internal array of processing options:
896 // Construct default list of tags to keep:
897 if (isset($this->procOptions['allowTags.']) && is_array($this->procOptions['allowTags.'])) {
898 $keepTags = implode(',', $this->procOptions['allowTags.']);
899 } else {
900 $keepTags = $this->procOptions['allowTags'] ?? '';
901 }
902 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
903 // For tags to deny, remove them from $keepTags array:
904 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'] ?? '', true);
905 foreach ($denyTags as $dKe) {
906 unset($keepTags[$dKe]);
907 }
908 // Based on the direction of content, set further options:
909 switch ($direction) {
910 case 'rte':
911 // Transforming keepTags array so it can be understood by the HTMLcleaner function.
912 // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
913 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'] ?? [], $keepTags);
914 break;
915 case 'db':
916 // Setting up span tags if they are allowed:
917 if (isset($keepTags['span'])) {
918 $keepTags['span'] = [
919 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
920 'fixAttrib' => [
921 'class' => [
922 'removeIfFalse' => 1
923 ]
924 ],
925 'rmTagIfNoAttrib' => 1
926 ];
927 if (!empty($this->allowedClasses)) {
928 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
929 }
930 }
931 // Setting further options, getting them from the processing options
932 $TSc = $this->procOptions['HTMLparser_db.'] ?? [];
933 if (empty($TSc['globalNesting'])) {
934 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
935 }
936 if (empty($TSc['noAttrib'])) {
937 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
938 }
939 // Transforming the array from TypoScript to regular array:
940 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
941 break;
942 }
943 // Caching (internally, in object memory) the result
944 $this->getKeepTags_cache[$direction] = $keepTags;
945 }
946 // Return result:
947 return $this->getKeepTags_cache[$direction];
948 }
949
950 /**
951 * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF.
952 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
953 * The function ->setDivTags does the opposite.
954 * This function processes content to go into the database.
955 *
956 * @param string $value Value to process.
957 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags).
958 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
959 * @return string|array Processed input value.
960 * @see setDivTags()
961 */
962 protected function divideIntoLines($value, $count = 5, $returnArray = false)
963 {
964 // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
965 $paragraphBlocks = $this->splitIntoBlock('p', $value, true);
966 // Returns plainly the content if there was no p sections in it
967 if (count($paragraphBlocks) <= 1 || $count <= 0) {
968 return $this->sanitizeLineBreaksForContentOnly($value);
969 }
970
971 // Traverse the splitted sections
972 foreach ($paragraphBlocks as $k => $v) {
973 if ($k % 2) {
974 // Inside a <p> section
975 $v = $this->removeFirstAndLastTag($v);
976 // Fetching 'sub-lines' - which will explode any further p nesting recursively
977 $subLines = $this->divideIntoLines($v, $count - 1, true);
978 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
979 if (is_array($subLines)) {
980 $paragraphBlocks[$k] = implode(LF, $subLines);
981 } else {
982 //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
983 $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
984 }
985 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
986 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
987 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
988 if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
989 $paragraphBlocks[$k] = '';
990 }
991 } else {
992 // Outside a paragraph, if there is still something in there, just add a <p> tag
993 // Remove positions which are outside <p> tags and without content
994 $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
995 $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
996 if ((string)$paragraphBlocks[$k] === '') {
997 unset($paragraphBlocks[$k]);
998 } else {
999 // add <p> tags around the content
1000 $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
1001 }
1002 }
1003 }
1004 return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
1005 }
1006
1007 /**
1008 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
1009 * For processing of content going FROM database TO RTE.
1010 *
1011 * @param string $value Value to convert
1012 * @return string Processed value.
1013 * @see divideIntoLines()
1014 */
1015 protected function setDivTags($value)
1016 {
1017 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1018 $keepTags = $this->getKeepTags('rte');
1019 // Divide the content into lines
1020 $parts = explode(LF, $value);
1021 foreach ($parts as $k => $v) {
1022 // Processing of line content:
1023 // If the line is blank, set it to &nbsp;
1024 if (trim($parts[$k]) === '') {
1025 $parts[$k] = '&nbsp;';
1026 } else {
1027 // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
1028 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, 'protect');
1029 // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
1030 // This was previously an option to disable called "dontConvAmpInNBSP_rte"
1031 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
1032 }
1033 // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
1034 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
1035 $testStr = strtolower(trim($parts[$k]));
1036 if (strpos($testStr, '<div') !== 0 || substr($testStr, -6) !== '</div>') {
1037 if (strpos($testStr, '<p') !== 0 || substr($testStr, -4) !== '</p>') {
1038 // Only set p-tags if there is not already div or p tags:
1039 $parts[$k] = '<p>' . $parts[$k] . '</p>';
1040 }
1041 }
1042 }
1043 }
1044 // Implode result:
1045 return implode(LF, $parts);
1046 }
1047
1048 /**
1049 * Used for transformation from RTE to DB
1050 *
1051 * Works on a single line within a <p> tag when storing into the database
1052 * This always adds <p> tags and validates the arguments,
1053 * additionally the content is cleaned up via the HTMLcleaner.
1054 *
1055 * @param string $content the content within the <p> tag
1056 * @param string $fullContentWithTag the whole <p> tag surrounded as well
1057 *
1058 * @return string the full <p> tag with cleaned content
1059 */
1060 protected function processContentWithinParagraph(string $content, string $fullContentWithTag)
1061 {
1062 // clean up the content
1063 $content = $this->HTMLcleaner_db($content);
1064 // Get the <p> tag, and validate the attributes
1065 $fTag = $this->getFirstTag($fullContentWithTag);
1066 // Check which attributes of the <p> tag to keep attributes
1067 if (!empty($this->allowedAttributesForParagraphTags)) {
1068 list($tagAttributes) = $this->get_tag_attributes($fTag);
1069 // Make sure the tag attributes only contain the ones that are defined to be allowed
1070 $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
1071
1072 // Only allow classes that are whitelisted in $this->allowedClasses
1073 if (isset($tagAttributes['class']) && trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1074 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1075 $classes = array_intersect($classes, $this->allowedClasses);
1076 if (!empty($classes)) {
1077 $tagAttributes['class'] = implode(' ', $classes);
1078 } else {
1079 unset($tagAttributes['class']);
1080 }
1081 }
1082 } else {
1083 $tagAttributes = [];
1084 }
1085 // Remove any line break
1086 $content = str_replace(LF, '', $content);
1087 // Compile the surrounding <p> tag
1088 $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1089 return $content;
1090 }
1091
1092 /**
1093 * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB
1094 *
1095 * @param string $content
1096 * @return string the modified content
1097 */
1098 protected function sanitizeLineBreaksForContentOnly(string $content)
1099 {
1100 $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1101 $content = str_replace(LF . LF, LF, $content);
1102 $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1103 return $content;
1104 }
1105
1106 /**
1107 * Finds width and height from attrib-array
1108 * If the width and height is found in the style-attribute, use that!
1109 *
1110 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1111 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1112 */
1113 protected function getWHFromAttribs($attribArray)
1114 {
1115 $style = trim($attribArray['style']);
1116 $w = 0;
1117 $h = 0;
1118 if ($style) {
1119 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1120 // Width
1121 $reg = [];
1122 preg_match('/width' . $regex . '/i', $style, $reg);
1123 $w = (int)$reg[1];
1124 // Height
1125 preg_match('/height' . $regex . '/i', $style, $reg);
1126 $h = (int)$reg[1];
1127 }
1128 if (!$w) {
1129 $w = $attribArray['width'];
1130 }
1131 if (!$h) {
1132 $h = $attribArray['height'];
1133 }
1134 return [(int)$w, (int)$h];
1135 }
1136
1137 /**
1138 * Parse <A>-tag href and return status of email,external,file or page
1139 * This functionality is not in use anymore
1140 *
1141 * @param string $url URL to analyze.
1142 * @return array Information in an array about the URL
1143 * @deprecated will be removed in TYPO3 v10.0. Not in use anymore.
1144 */
1145 protected function urlInfoForLinkTags($url)
1146 {
1147 $info = [];
1148 $url = trim($url);
1149 if (strpos(strtolower($url), 'mailto:') === 0) {
1150 $info['url'] = trim(substr($url, 7));
1151 $info['type'] = 'email';
1152 } elseif (strpos($url, '?file:') !== false) {
1153 $info['type'] = 'file';
1154 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1155 } else {
1156 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1157 $urlLength = strlen($url);
1158 $a = 0;
1159 for (; $a < $urlLength; $a++) {
1160 if ($url[$a] != $curURL[$a]) {
1161 break;
1162 }
1163 }
1164 $info['relScriptPath'] = substr($curURL, $a);
1165 $info['relUrl'] = substr($url, $a);
1166 $info['url'] = $url;
1167 $info['type'] = 'ext';
1168 $siteUrl_parts = parse_url($url);
1169 $curUrl_parts = parse_url($curURL);
1170 // Hosts should match
1171 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && strpos($info['relScriptPath'], TYPO3_mainDir) === 0)) {
1172 // If the script path seems to match or is empty (FE-EDIT)
1173 // New processing order 100502
1174 $uP = parse_url($info['relUrl']);
1175 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1176 $info['url'] = $info['relUrl'];
1177 $info['type'] = 'anchor';
1178 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1179 // URL is a page (id parameter)
1180 $pp = preg_split('/^id=/', $uP['query']);
1181 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1182 $parameters = explode('&', $pp[1]);
1183 $id = array_shift($parameters);
1184 if ($id) {
1185 $info['pageid'] = $id;
1186 $info['cElement'] = $uP['fragment'];
1187 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1188 $info['type'] = 'page';
1189 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1190 }
1191 } else {
1192 $info['url'] = $info['relUrl'];
1193 $info['type'] = 'file';
1194 }
1195 } else {
1196 unset($info['relScriptPath']);
1197 unset($info['relUrl']);
1198 }
1199 }
1200 return $info;
1201 }
1202
1203 /**
1204 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1205 *
1206 * @param string $value Content input
1207 * @return string Content output
1208 */
1209 protected function TS_AtagToAbs($value)
1210 {
1211 if (func_num_args() > 1) {
1212 trigger_error('Second argument of RteHtmlParser->TS_AtagToAbs() is not in use and will be removed in TYPO3 v10.0, however the argument in the callers code can be removed without side-effects.', E_USER_DEPRECATED);
1213 }
1214 $blockSplit = $this->splitIntoBlock('A', $value);
1215 foreach ($blockSplit as $k => $v) {
1216 // Block
1217 if ($k % 2) {
1218 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1219 // Checking if there is a scheme, and if not, prepend the current url.
1220 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1221 if ($attribArray['href'] !== '') {
1222 $uP = parse_url(strtolower($attribArray['href']));
1223 if (!$uP['scheme']) {
1224 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1225 }
1226 }
1227 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1228 $eTag = '</a>';
1229 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1230 }
1231 }
1232 return implode('', $blockSplit);
1233 }
1234
1235 /**
1236 * Apply plain image settings to the dimensions of the image
1237 *
1238 * @param array $imageInfo: info array of the image
1239 * @param array $attribArray: array of attributes of an image tag
1240 *
1241 * @return array a modified attributes array
1242 */
1243 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1244 {
1245 if ($this->procOptions['plainImageMode']) {
1246 // Perform corrections to aspect ratio based on configuration
1247 switch ((string)$this->procOptions['plainImageMode']) {
1248 case 'lockDimensions':
1249 $attribArray['width'] = $imageInfo[0];
1250 $attribArray['height'] = $imageInfo[1];
1251 break;
1252 case 'lockRatioWhenSmaller':
1253 if ($attribArray['width'] > $imageInfo[0]) {
1254 $attribArray['width'] = $imageInfo[0];
1255 }
1256 if ($imageInfo[0] > 0) {
1257 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1258 }
1259 break;
1260 case 'lockRatio':
1261 if ($imageInfo[0] > 0) {
1262 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1263 }
1264 break;
1265 }
1266 }
1267 return $attribArray;
1268 }
1269
1270 /**
1271 * Called before any processing / transformation is made
1272 * Removing any CRs (char 13) and only deal with LFs (char 10) internally.
1273 * CR has a very disturbing effect, so just remove all CR and rely on LF
1274 *
1275 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1276 *
1277 * @param string $content the content to process
1278 * @return string the modified content
1279 */
1280 protected function streamlineLineBreaksForProcessing(string $content)
1281 {
1282 return str_replace(CR, '', $content);
1283 }
1284
1285 /**
1286 * Called after any processing / transformation was made
1287 * just before the content is returned by the RTE parser all line breaks
1288 * get unified to be "CRLF"s again.
1289 *
1290 * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks.
1291 *
1292 * @param string $content the content to process
1293 * @return string the modified content
1294 */
1295 protected function streamlineLineBreaksAfterProcessing(string $content)
1296 {
1297 // Make sure no \r\n sequences has entered in the meantime
1298 $content = $this->streamlineLineBreaksForProcessing($content);
1299 // ... and then change all \n into \r\n
1300 return str_replace(LF, CRLF, $content);
1301 }
1302
1303 /**
1304 * Content Transformation from DB to RTE
1305 * Checks all <a> tags which reference a t3://page and checks if the page is available
1306 * If not, some offensive styling is added.
1307 *
1308 * @param string $content
1309 * @return string the modified content
1310 */
1311 protected function markBrokenLinks(string $content): string
1312 {
1313 $blocks = $this->splitIntoBlock('A', $content);
1314 $linkService = GeneralUtility::makeInstance(LinkService::class);
1315 foreach ($blocks as $position => $value) {
1316 if ($position % 2 === 0) {
1317 continue;
1318 }
1319 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1320 if (empty($attributes['href'])) {
1321 continue;
1322 }
1323 $hrefInformation = $linkService->resolve($attributes['href']);
1324 if ($hrefInformation['type'] === LinkService::TYPE_PAGE && $hrefInformation['pageuid'] !== 'current') {
1325 $pageRecord = BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1326 if (!is_array($pageRecord)) {
1327 // Page does not exist
1328 $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1329 }
1330 }
1331 // Always rewrite the block to allow the nested calling even if a page is found
1332 $blocks[$position] =
1333 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1334 . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position]))
1335 . '</a>';
1336 }
1337 return implode('', $blocks);
1338 }
1339
1340 /**
1341 * Content Transformation from RTE to DB
1342 * Removes link information error attributes from <a> tags that are added to broken links
1343 *
1344 * @param string $content the content to process
1345 * @return string the modified content
1346 */
1347 protected function removeBrokenLinkMarkers(string $content): string
1348 {
1349 $blocks = $this->splitIntoBlock('A', $content);
1350 foreach ($blocks as $position => $value) {
1351 if ($position % 2 === 0) {
1352 continue;
1353 }
1354 list($attributes) = $this->get_tag_attributes($this->getFirstTag($value), true);
1355 if (empty($attributes['href'])) {
1356 continue;
1357 }
1358 // Always remove the styling again (regardless of the page was found or not)
1359 // so the database does not contain ugly stuff
1360 unset($attributes['data-rte-error']);
1361 if (isset($attributes['style'])) {
1362 $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1363 if (empty($attributes['style'])) {
1364 unset($attributes['style']);
1365 }
1366 }
1367 $blocks[$position] =
1368 '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1369 . $this->removeBrokenLinkMarkers($this->removeFirstAndLastTag($blocks[$position]))
1370 . '</a>';
1371 }
1372 return implode('', $blocks);
1373 }
1374 }