e2316f8a1e5579ee0e2a8bdc6c44fd109958b1b1
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Html / RteHtmlParser.php
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\Log\LogManager;
19 use TYPO3\CMS\Core\Resource;
20 use TYPO3\CMS\Core\Utility\GeneralUtility;
21 use TYPO3\CMS\Core\Utility\MathUtility;
22 use TYPO3\CMS\Core\Utility\StringUtility;
23 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
24
25 /**
26 * Class for parsing HTML for the Rich Text Editor. (also called transformations)
27 */
28 class RteHtmlParser extends HtmlParser
29 {
30 /**
31 * List of elements that are not wrapped into a "p" tag while doing the transformation.
32 * @var string
33 */
34 public $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
35
36 /**
37 * List of all tags that are allowed by default
38 * @var string
39 */
40 protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
41
42 /**
43 * Set this to the pid of the record manipulated by the class.
44 *
45 * @var int
46 */
47 public $recPid = 0;
48
49 /**
50 * Element reference [table]:[field], eg. "tt_content:bodytext"
51 *
52 * @var string
53 */
54 public $elRef = '';
55
56 /**
57 * Current Page TSConfig
58 *
59 * @var array
60 */
61 public $tsConfig = array();
62
63 /**
64 * Set to the TSconfig options coming from Page TSconfig
65 *
66 * @var array
67 */
68 public $procOptions = array();
69
70 /**
71 * Run-away brake for recursive calls.
72 *
73 * @var int
74 */
75 public $TS_transform_db_safecounter = 100;
76
77 /**
78 * Data caching for processing function
79 *
80 * @var array
81 */
82 public $getKeepTags_cache = array();
83
84 /**
85 * Storage of the allowed CSS class names in the RTE
86 *
87 * @var array
88 */
89 public $allowedClasses = array();
90
91 /**
92 * Initialize, setting element reference and record PID
93 *
94 * @param string $elRef Element reference, eg "tt_content:bodytext
95 * @param int $recPid PID of the record (page id)
96 * @return void
97 */
98 public function init($elRef = '', $recPid = 0)
99 {
100 $this->recPid = $recPid;
101 $this->elRef = $elRef;
102 }
103
104 /**********************************************
105 *
106 * Main function
107 *
108 **********************************************/
109 /**
110 * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
111 * This is the main function called from tcemain and transfer data classes
112 *
113 * @param string $value Input value
114 * @param array $specConf Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
115 * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
116 * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
117 * @return string Output value
118 */
119 public function RTE_transform($value, $specConf, $direction = 'rte', $thisConfig = array())
120 {
121 // Init:
122 $this->tsConfig = $thisConfig;
123 $this->procOptions = (array)$thisConfig['proc.'];
124 // dynamic configuration of blockElementList
125 if ($this->procOptions['blockElementList']) {
126 $this->blockElementList = $this->procOptions['blockElementList'];
127 }
128 // Setting modes:
129 if ((string)$this->procOptions['overruleMode'] !== '') {
130 $modes = array_unique(GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']));
131 } else {
132 // Get parameters for rte_transformation:
133 $specialFieldConfiguration = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
134 $modes = array_unique(GeneralUtility::trimExplode('-', $specialFieldConfiguration['mode']));
135 }
136 $revmodes = array_flip($modes);
137 // Find special modes and extract them:
138 if (isset($revmodes['ts_css'])) {
139 $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
140 }
141 // Make list unique
142 $modes = array_unique(GeneralUtility::trimExplode(',', implode(',', $modes), true));
143 // Reverse order if direction is "rte"
144 if ($direction === 'rte') {
145 $modes = array_reverse($modes);
146 }
147 // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independent processing options you can set up:
148 $entry_HTMLparser = $this->procOptions['entryHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_' . $direction . '.']) : '';
149 $exit_HTMLparser = $this->procOptions['exitHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_' . $direction . '.']) : '';
150 // Line breaks of content is unified into char-10 only (removing char 13)
151 if (!$this->procOptions['disableUnifyLineBreaks']) {
152 $value = str_replace(CRLF, LF, $value);
153 }
154 // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
155 if (is_array($entry_HTMLparser)) {
156 $value = $this->HTMLcleaner($value, $entry_HTMLparser[0], $entry_HTMLparser[1], $entry_HTMLparser[2], $entry_HTMLparser[3]);
157 }
158 // Traverse modes:
159 foreach ($modes as $cmd) {
160 // ->DB
161 if ($direction == 'db') {
162 // Checking for user defined transformation:
163 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
164 $_procObj = GeneralUtility::getUserObj($_classRef);
165 $_procObj->pObj = $this;
166 $_procObj->transformationKey = $cmd;
167 $value = $_procObj->transform_db($value, $this);
168 } else {
169 // ... else use defaults:
170 switch ($cmd) {
171 case 'ts_images':
172 $value = $this->TS_images_db($value);
173 break;
174 case 'ts_links':
175 $value = $this->TS_links_db($value);
176 break;
177 case 'css_transform':
178 $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
179 // CR has a very disturbing effect, so just remove all CR and rely on LF
180 $value = str_replace(CR, '', $value);
181 // Transform empty paragraphs into spacing paragraphs
182 $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
183 // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
184 $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
185 $value = $this->TS_transform_db($value);
186 break;
187 default:
188 // Do nothing
189 }
190 }
191 }
192 // ->RTE
193 if ($direction == 'rte') {
194 // Checking for user defined transformation:
195 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
196 $_procObj = GeneralUtility::getUserObj($_classRef);
197 $_procObj->pObj = $this;
198 $value = $_procObj->transform_rte($value, $this);
199 } else {
200 // ... else use defaults:
201 switch ($cmd) {
202 case 'ts_images':
203 $value = $this->TS_images_rte($value);
204 break;
205 case 'ts_links':
206 $value = $this->TS_links_rte($value);
207 break;
208 case 'css_transform':
209 // Has a very disturbing effect, so just remove all '13' - depend on '10'
210 $value = str_replace(CR, '', $value);
211 $value = $this->TS_transform_rte($value);
212 break;
213 default:
214 // Do nothing
215 }
216 }
217 }
218 }
219 // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
220 if (is_array($exit_HTMLparser)) {
221 $value = $this->HTMLcleaner($value, $exit_HTMLparser[0], $exit_HTMLparser[1], $exit_HTMLparser[2], $exit_HTMLparser[3]);
222 }
223 // Final clean up of linebreaks:
224 if (!$this->procOptions['disableUnifyLineBreaks']) {
225 // Make sure no \r\n sequences has entered in the meantime...
226 $value = str_replace(CRLF, LF, $value);
227 // ... and then change all \n into \r\n
228 $value = str_replace(LF, CRLF, $value);
229 }
230 // Return value:
231 return $value;
232 }
233
234 /************************************
235 *
236 * Specific RTE TRANSFORMATION functions
237 *
238 *************************************/
239 /**
240 * Transformation handler: 'ts_images' / direction: "db"
241 * Processing images inserted in the RTE.
242 * This is used when content goes from the RTE to the database.
243 * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
244 * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
245 * Also "magic" images are processed here.
246 *
247 * @param string $value The content from RTE going to Database
248 * @return string Processed content
249 */
250 public function TS_images_db($value)
251 {
252 // Split content by <img> tags and traverse the resulting array for processing:
253 $imgSplit = $this->splitTags('img', $value);
254 if (count($imgSplit) > 1) {
255 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
256 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
257 /** @var $resourceFactory Resource\ResourceFactory */
258 $resourceFactory = Resource\ResourceFactory::getInstance();
259 /** @var $magicImageService Resource\Service\MagicImageService */
260 $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
261 $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
262 foreach ($imgSplit as $k => $v) {
263 // Image found, do processing:
264 if ($k % 2) {
265 // Get attributes
266 list($attribArray) = $this->get_tag_attributes($v, true);
267 // It's always an absolute URL coming from the RTE into the Database.
268 $absoluteUrl = trim($attribArray['src']);
269 // Make path absolute if it is relative and we have a site path which is not '/'
270 $pI = pathinfo($absoluteUrl);
271 if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
272 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
273 $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
274 $absoluteUrl = $siteUrl . $absoluteUrl;
275 }
276 // Image dimensions set in the img tag, if any
277 $imgTagDimensions = $this->getWHFromAttribs($attribArray);
278 if ($imgTagDimensions[0]) {
279 $attribArray['width'] = $imgTagDimensions[0];
280 }
281 if ($imgTagDimensions[1]) {
282 $attribArray['height'] = $imgTagDimensions[1];
283 }
284 $originalImageFile = null;
285 if ($attribArray['data-htmlarea-file-uid']) {
286 // An original image file uid is available
287 try {
288 /** @var $originalImageFile Resource\File */
289 $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
290 } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
291 // Log the fact the file could not be retrieved.
292 $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
293 $this->getLogger()->error($message);
294 }
295 }
296 if ($originalImageFile instanceof Resource\File) {
297 // Public url of local file is relative to the site url, absolute otherwise
298 if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
299 // This is a plain image, i.e. reference to the original image
300 if ($this->procOptions['plainImageMode']) {
301 // "plain image mode" is configured
302 // Find the dimensions of the original image
303 $imageInfo = array(
304 $originalImageFile->getProperty('width'),
305 $originalImageFile->getProperty('height')
306 );
307 if (!$imageInfo[0] || !$imageInfo[1]) {
308 $filePath = $originalImageFile->getForLocalProcessing(false);
309 $imageInfo = @getimagesize($filePath);
310 }
311 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
312 }
313 } else {
314 // Magic image case: get a processed file with the requested configuration
315 $imageConfiguration = array(
316 'width' => $imgTagDimensions[0],
317 'height' => $imgTagDimensions[1]
318 );
319 $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
320 $attribArray['width'] = $magicImage->getProperty('width');
321 $attribArray['height'] = $magicImage->getProperty('height');
322 $attribArray['src'] = $magicImage->getPublicUrl();
323 }
324 } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
325 // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
326 // Fetch the external image
327 $externalFile = GeneralUtility::getUrl($absoluteUrl);
328 if ($externalFile) {
329 $pU = parse_url($absoluteUrl);
330 $pI = pathinfo($pU['path']);
331 $extension = strtolower($pI['extension']);
332 if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
333 $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
334 // We insert this image into the user default upload folder
335 list($table, $field) = explode(':', $this->elRef);
336 $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
337 $fileObject = $folder->createFile($fileName)->setContents($externalFile);
338 $imageConfiguration = array(
339 'width' => $attribArray['width'],
340 'height' => $attribArray['height']
341 );
342 $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
343 $attribArray['width'] = $magicImage->getProperty('width');
344 $attribArray['height'] = $magicImage->getProperty('height');
345 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
346 $attribArray['src'] = $magicImage->getPublicUrl();
347 }
348 }
349 } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
350 // Finally, check image as local file (siteURL equals the one of the image)
351 // Image has no data-htmlarea-file-uid attribute
352 // Relative path, rawurldecoded for special characters.
353 $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
354 // Absolute filepath, locked to relative path of this project
355 $filepath = GeneralUtility::getFileAbsFileName($path);
356 // Check file existence (in relative directory to this installation!)
357 if ($filepath && @is_file($filepath)) {
358 // Treat it as a plain image
359 if ($this->procOptions['plainImageMode']) {
360 // If "plain image mode" has been configured
361 // Find the original dimensions of the image
362 $imageInfo = @getimagesize($filepath);
363 $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
364 }
365 // Let's try to find a file uid for this image
366 try {
367 $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
368 if ($fileOrFolderObject instanceof Resource\FileInterface) {
369 $fileIdentifier = $fileOrFolderObject->getIdentifier();
370 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
371 // @todo if the retrieved file is a processed file, get the original file...
372 $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
373 }
374 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
375 // Nothing to be done if file/folder not found
376 }
377 }
378 }
379 // Remove width and height from style attribute
380 $attribArray['style'] = preg_replace('/((?:^|)\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
381 // Must have alt attribute
382 if (!isset($attribArray['alt'])) {
383 $attribArray['alt'] = '';
384 }
385 // Convert absolute to relative url
386 if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
387 $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
388 }
389 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
390 }
391 }
392 }
393 return implode('', $imgSplit);
394 }
395
396 /**
397 * Transformation handler: 'ts_images' / direction: "rte"
398 * Processing images from database content going into the RTE.
399 * Processing includes converting the src attribute to an absolute URL.
400 *
401 * @param string $value Content input
402 * @return string Content output
403 */
404 public function TS_images_rte($value)
405 {
406 // Split content by <img> tags and traverse the resulting array for processing:
407 $imgSplit = $this->splitTags('img', $value);
408 if (count($imgSplit) > 1) {
409 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
410 $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
411 foreach ($imgSplit as $k => $v) {
412 // Image found
413 if ($k % 2) {
414 // Get the attributes of the img tag
415 list($attribArray) = $this->get_tag_attributes($v, true);
416 $absoluteUrl = trim($attribArray['src']);
417 // Transform the src attribute into an absolute url, if it not already
418 if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
419 // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
420 $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
421 $attribArray['src'] = $siteUrl . $attribArray['src'];
422 }
423 // Must have alt attribute
424 if (!isset($attribArray['alt'])) {
425 $attribArray['alt'] = '';
426 }
427 $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
428 }
429 }
430 }
431 // Return processed content:
432 return implode('', $imgSplit);
433 }
434
435 /**
436 * Transformation handler: 'ts_links' / direction: "db"
437 * Converting <A>-tags to <link tags>
438 *
439 * @param string $value Content input
440 * @return string Content output
441 * @see TS_links_rte()
442 */
443 public function TS_links_db($value)
444 {
445 $conf = array();
446 // Split content into <a> tag blocks and process:
447 $blockSplit = $this->splitIntoBlock('A', $value);
448 foreach ($blockSplit as $k => $v) {
449 // If an A-tag was found:
450 if ($k % 2) {
451 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
452 $info = $this->urlInfoForLinkTags($attribArray['href']);
453 // Check options:
454 $attribArray_copy = $attribArray;
455 unset($attribArray_copy['href']);
456 unset($attribArray_copy['target']);
457 unset($attribArray_copy['class']);
458 unset($attribArray_copy['title']);
459 unset($attribArray_copy['data-htmlarea-external']);
460 // Unset "rteerror" and "style" attributes if "rteerror" is set!
461 if ($attribArray_copy['rteerror']) {
462 unset($attribArray_copy['style']);
463 unset($attribArray_copy['rteerror']);
464 }
465 // Remove additional parameters
466 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'])) {
467 $parameters = array(
468 'conf' => &$conf,
469 'aTagParams' => &$attribArray_copy
470 );
471 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'] as $objRef) {
472 $processor = GeneralUtility::getUserObj($objRef);
473 $attribArray_copy = $processor->removeParams($parameters, $this);
474 }
475 }
476 // Only if href, target, class and tile are the only attributes, we can alter the link!
477 if (empty($attribArray_copy)) {
478 // Quoting class and title attributes if they contain spaces
479 $attribArray['class'] = preg_match('/ /', $attribArray['class']) ? '"' . $attribArray['class'] . '"' : $attribArray['class'];
480 $attribArray['title'] = preg_match('/ /', $attribArray['title']) ? '"' . $attribArray['title'] . '"' : $attribArray['title'];
481 // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
482 // If data-htmlarea-external attribute is set, keep the href unchanged
483 if ($attribArray['data-htmlarea-external']) {
484 $href = $attribArray['href'];
485 } else {
486 $href = $info['url'] . ($info['query'] ? ',0,' . $info['query'] : '');
487 }
488 $typoLink = GeneralUtility::makeInstance(TypoLinkCodecService::class)->encode(array('url' => $href, 'target' => $attribArray['target'], 'class' => trim($attribArray['class'], '"'), 'title' => trim($attribArray['title'], '"'), 'additionalParams' => ''));
489 $bTag = '<link ' . $typoLink . '>';
490 $eTag = '</link>';
491 // Modify parameters
492 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
493 $parameters = array(
494 'conf' => &$conf,
495 'currentBlock' => $v,
496 'url' => $href,
497 'attributes' => $attribArray
498 );
499 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
500 $processor = GeneralUtility::getUserObj($objRef);
501 $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
502 }
503 } else {
504 $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
505 }
506 } else {
507 // ... otherwise store the link as a-tag.
508 // Unsetting 'rtekeep' attribute if that had been set.
509 unset($attribArray['rtekeep']);
510 if (!$attribArray['data-htmlarea-external']) {
511 $siteURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
512 // If the url is local, remove url-prefix
513 if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
514 $attribArray['href'] = substr($attribArray['href'], strlen($siteURL));
515 }
516 // Check for FAL link-handler keyword
517 list($linkHandlerKeyword, $linkHandlerValue) = explode(':', $attribArray['href'], 2);
518 if ($linkHandlerKeyword === '?file') {
519 try {
520 $fileOrFolderObject = Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject(rawurldecode($linkHandlerValue));
521 if ($fileOrFolderObject instanceof Resource\FileInterface || $fileOrFolderObject instanceof Resource\Folder) {
522 $attribArray['href'] = $fileOrFolderObject->getPublicUrl();
523 }
524 } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
525 // The identifier inserted in the RTE is already gone...
526 }
527 }
528 }
529 unset($attribArray['data-htmlarea-external']);
530 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
531 $eTag = '</a>';
532 $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
533 }
534 }
535 }
536 return implode('', $blockSplit);
537 }
538
539 /**
540 * Transformation handler: 'ts_links' / direction: "rte"
541 * Converting <link tags> to <A>-tags
542 *
543 * @param string $value Content input
544 * @return string Content output
545 * @see TS_links_rte()
546 */
547 public function TS_links_rte($value)
548 {
549 $conf = array();
550 $value = $this->TS_AtagToAbs($value);
551 // Split content by the TYPO3 pseudo tag "<link>":
552 $blockSplit = $this->splitIntoBlock('link', $value, 1);
553 $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
554 foreach ($blockSplit as $k => $v) {
555 $error = '';
556 $external = false;
557 // Block
558 if ($k % 2) {
559 // split away the first "<link" part
560 $typolink = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
561 $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typolink);
562
563 $link_param = $tagCode['url'];
564 // Parsing the typolink data. This parsing is roughly done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
565 // Parse URL:
566 $pU = parse_url($link_param);
567 if (strstr($link_param, '@') && (!$pU['scheme'] || $pU['scheme'] == 'mailto')) {
568 // mailadr
569 $href = 'mailto:' . preg_replace('/^mailto:/i', '', $link_param);
570 } elseif ($link_param[0] === '#') {
571 // check if anchor
572 $href = $siteUrl . $link_param;
573 } else {
574 // Check for FAL link-handler keyword:
575 list($linkHandlerKeyword, $linkHandlerValue) = explode(':', trim($link_param), 2);
576 if ($linkHandlerKeyword === 'file' && !StringUtility::beginsWith($link_param, 'file://')) {
577 $href = $siteUrl . '?' . $linkHandlerKeyword . ':' . rawurlencode($linkHandlerValue);
578 } else {
579 $fileChar = (int)strpos($link_param, '/');
580 $urlChar = (int)strpos($link_param, '.');
581 // Detects if a file is found in site-root.
582 list($rootFileDat) = explode('?', $link_param);
583 $rFD_fI = pathinfo($rootFileDat);
584 $fileExtension = strtolower($rFD_fI['extension']);
585 if (strpos($link_param, '/') === false && trim($rootFileDat) && (@is_file(PATH_site . $rootFileDat) || $fileExtension === 'php' || $fileExtension === 'html' || $fileExtension === 'htm')) {
586 $href = $siteUrl . $link_param;
587 } elseif (
588 (
589 $pU['scheme']
590 && !isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][$pU['scheme']])
591 )
592 || $urlChar && (!$fileChar || $urlChar < $fileChar)
593 ) {
594 // url (external): if has scheme or if a '.' comes before a '/'.
595 $href = $link_param;
596 if (!$pU['scheme']) {
597 $href = 'http://' . $href;
598 }
599 $external = true;
600 } elseif ($fileChar) {
601 // It is an internal file or folder
602 // Try to transform the href into a FAL reference
603 try {
604 $fileOrFolderObject = Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject($link_param);
605 } catch (Resource\Exception $exception) {
606 // Nothing to be done if file/folder not found or path invalid
607 $fileOrFolderObject = null;
608 }
609 if ($fileOrFolderObject instanceof Resource\Folder) {
610 // It's a folder
611 $folderIdentifier = $fileOrFolderObject->getIdentifier();
612 $href = $siteUrl . '?file:' . rawurlencode($folderIdentifier);
613 } elseif ($fileOrFolderObject instanceof Resource\FileInterface) {
614 // It's a file
615 $fileIdentifier = $fileOrFolderObject->getIdentifier();
616 $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
617 $href = $siteUrl . '?file:' . $fileObject->getUid();
618 } else {
619 $href = $siteUrl . $link_param;
620 }
621 } else {
622 // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
623 // Splitting the parameter by ',' and if the array counts more than 1 element it's an id/type/parameters triplet
624 $pairParts = GeneralUtility::trimExplode(',', $link_param, true);
625 $idPart = $pairParts[0];
626 $link_params_parts = explode('#', $idPart);
627 $idPart = trim($link_params_parts[0]);
628 $sectionMark = trim($link_params_parts[1]);
629 if ((string)$idPart === '') {
630 $idPart = $this->recPid;
631 }
632 // If no id or alias is given, set it to class record pid
633 // Checking if the id-parameter is an alias.
634 if (!MathUtility::canBeInterpretedAsInteger($idPart)) {
635 list($idPartR) = BackendUtility::getRecordsByField('pages', 'alias', $idPart);
636 $idPart = (int)$idPartR['uid'];
637 }
638 $page = BackendUtility::getRecord('pages', $idPart);
639 if (is_array($page)) {
640 // Page must exist...
641 $href = $siteUrl . '?id=' . $idPart . ($pairParts[2] ? $pairParts[2] : '') . ($sectionMark ? '#' . $sectionMark : '');
642 } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][array_shift(explode(':', $link_param))])) {
643 $href = $link_param;
644 } else {
645 $href = $siteUrl . '?id=' . $link_param;
646 $error = 'No page found: ' . $idPart;
647 }
648 }
649 }
650 }
651 // Setting the A-tag:
652 $bTag = '<a href="' . htmlspecialchars($href) . '"'
653 . ($tagCode['target'] ? ' target="' . htmlspecialchars($tagCode['target']) . '"' : '')
654 . ($tagCode['class'] ? ' class="' . htmlspecialchars($tagCode['class']) . '"' : '')
655 . ($tagCode['title'] ? ' title="' . htmlspecialchars($tagCode['title']) . '"' : '')
656 . ($external ? ' data-htmlarea-external="1"' : '')
657 . ($error ? ' rteerror="' . htmlspecialchars($error) . '" style="background-color: yellow; border:2px red solid; color: black;"' : '') . '>';
658 $eTag = '</a>';
659 // Modify parameters
660 if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
661 $parameters = array(
662 'conf' => &$conf,
663 'currentBlock' => $v,
664 'url' => $href,
665 'tagCode' => $tagCode,
666 'external' => $external,
667 'error' => $error
668 );
669 foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
670 $processor = GeneralUtility::getUserObj($objRef);
671 $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
672 }
673 } else {
674 $blockSplit[$k] = $bTag . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
675 }
676 }
677 }
678 // Return content:
679 return implode('', $blockSplit);
680 }
681
682 /**
683 * Transformation handler: 'css_transform' / direction: "db"
684 * Cleaning (->db) for standard content elements (ts)
685 *
686 * @param string $value Content input
687 * @return string Content output
688 * @see TS_transform_rte()
689 */
690 public function TS_transform_db($value)
691 {
692 // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
693 $this->TS_transform_db_safecounter--;
694 if ($this->TS_transform_db_safecounter < 0) {
695 return $value;
696 }
697 // Split the content from RTE by the occurrence of these blocks:
698 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
699
700 // Avoid superfluous linebreaks by transform_db after ending headListTag
701 while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
702 array_pop($blockSplit);
703 }
704
705 // Traverse the blocks
706 foreach ($blockSplit as $k => $v) {
707 if ($k % 2) {
708 // Inside block:
709 // Init:
710 $tag = $this->getFirstTag($v);
711 $tagName = strtolower($this->getFirstTagName($v));
712 // Process based on the tag:
713 switch ($tagName) {
714 case 'blockquote':
715 case 'dd':
716 case 'div':
717 case 'header':
718 case 'section':
719 case 'footer':
720 case 'nav':
721 case 'article':
722 case 'aside':
723 $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
724 break;
725 default:
726 // usually <hx> tags and <table> tags where no other block elements are within the tags
727 // Eliminate true linebreaks inside block element tags
728 $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k]));
729 }
730 } else {
731 // NON-block:
732 if (trim($blockSplit[$k]) !== '') {
733 $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
734 // Remove linebreaks preceding hr tags
735 $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
736 // Remove linebreaks following hr tags
737 $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . CR . ']+/', '<$1$2/>', $blockSplit[$k]);
738 // Replace other linebreaks with space
739 $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+/', ' ', $blockSplit[$k]);
740 $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]);
741 $blockSplit[$k] = $this->transformStyledATags($blockSplit[$k]);
742 } else {
743 unset($blockSplit[$k]);
744 }
745 }
746 }
747 $this->TS_transform_db_safecounter++;
748 return implode(LF, $blockSplit);
749 }
750
751 /**
752 * Wraps a-tags that contain a style attribute with a span-tag
753 *
754 * @param string $value Content input
755 * @return string Content output
756 */
757 public function transformStyledATags($value)
758 {
759 $blockSplit = $this->splitIntoBlock('A', $value);
760 foreach ($blockSplit as $k => $v) {
761 // If an A-tag was found
762 if ($k % 2) {
763 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
764 // If "style" attribute is set and rteerror is not set!
765 if ($attribArray['style'] && !$attribArray['rteerror']) {
766 $attribArray_copy['style'] = $attribArray['style'];
767 unset($attribArray['style']);
768 $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, 1) . '><a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
769 $eTag = '</a></span>';
770 $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
771 }
772 }
773 }
774 return implode('', $blockSplit);
775 }
776
777 /**
778 * Transformation handler: css_transform / direction: "rte"
779 * Set (->rte) for standard content elements (ts)
780 *
781 * @param string $value Content input
782 * @return string Content output
783 * @see TS_transform_db()
784 */
785 public function TS_transform_rte($value)
786 {
787 // Split the content from database by the occurrence of the block elements
788 $blockSplit = $this->splitIntoBlock($this->blockElementList, $value);
789 // Traverse the blocks
790 foreach ($blockSplit as $k => $v) {
791 if ($k % 2) {
792 // Inside one of the blocks:
793 // Init:
794 $tag = $this->getFirstTag($v);
795 $tagName = strtolower($this->getFirstTagName($v));
796 // Based on tagname, we do transformations:
797 switch ($tagName) {
798 case 'blockquote':
799 case 'dd':
800 case 'div':
801 case 'header':
802 case 'section':
803 case 'footer':
804 case 'nav':
805 case 'article':
806 case 'aside':
807 $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
808 break;
809 }
810 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
811 } else {
812 // NON-block:
813 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
814 $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
815 // If the line is followed by a block or is the last line:
816 if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
817 // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
818 if (!$onlyLineBreaks) {
819 $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
820 } else {
821 // If the line contains only linebreaks, remove the leading linebreak
822 $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
823 }
824 }
825 // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
826 if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
827 unset($blockSplit[$k]);
828 } else {
829 $blockSplit[$k] = $this->setDivTags($blockSplit[$k]);
830 }
831 }
832 }
833 return implode(LF, $blockSplit);
834 }
835
836 /***************************************************************
837 *
838 * Generic RTE transformation, analysis and helper functions
839 *
840 **************************************************************/
841
842 /**
843 * Function for cleaning content going into the database.
844 * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
845 * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
846 *
847 * @param string $content Content to clean up
848 * @return string Clean content
849 * @see getKeepTags()
850 */
851 public function HTMLcleaner_db($content)
852 {
853 $keepTags = $this->getKeepTags('db');
854 // Default: remove unknown tags.
855 $keepUnknownTags = (bool)$this->procOptions['dontRemoveUnknownTags_db'];
856 return $this->HTMLcleaner($content, $keepTags, $keepUnknownTags);
857 }
858
859 /**
860 * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
861 * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
862 *
863 * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
864 * @return array Configuration array
865 * @see HTMLcleaner_db()
866 */
867 public function getKeepTags($direction = 'rte')
868 {
869 if (!is_array($this->getKeepTags_cache[$direction])) {
870 // Setting up allowed tags:
871 // Default is to get allowed/denied tags from internal array of processing options:
872 // Construct default list of tags to keep:
873 $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($this->procOptions['allowTags']), true));
874 // For tags to deny, remove them from $keepTags array:
875 $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
876 foreach ($denyTags as $dKe) {
877 unset($keepTags[$dKe]);
878 }
879 // Based on the direction of content, set further options:
880 switch ($direction) {
881 case 'rte':
882 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
883 // Transform bold/italics tags to strong/em
884 if (isset($keepTags['b'])) {
885 $keepTags['b'] = array('remap' => 'STRONG');
886 }
887 if (isset($keepTags['i'])) {
888 $keepTags['i'] = array('remap' => 'EM');
889 }
890 }
891 // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
892 list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
893 break;
894 case 'db':
895 if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
896 // Transform strong/em back to bold/italics:
897 if (isset($keepTags['strong'])) {
898 $keepTags['strong'] = array('remap' => 'b');
899 }
900 if (isset($keepTags['em'])) {
901 $keepTags['em'] = array('remap' => 'i');
902 }
903 }
904 // Setting up span tags if they are allowed:
905 if (isset($keepTags['span'])) {
906 $keepTags['span'] = array(
907 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
908 'fixAttrib' => array(
909 'class' => array(
910 'removeIfFalse' => 1
911 )
912 ),
913 'rmTagIfNoAttrib' => 1
914 );
915 if (!empty($this->allowedClasses)) {
916 $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses;
917 }
918 }
919 // Setting further options, getting them from the processiong options:
920 $TSc = $this->procOptions['HTMLparser_db.'];
921 if (!$TSc['globalNesting']) {
922 $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
923 }
924 if (!$TSc['noAttrib']) {
925 $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
926 }
927 // Transforming the array from TypoScript to regular array:
928 list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
929 break;
930 }
931 // Caching (internally, in object memory) the result unless tagList is set:
932 $this->getKeepTags_cache[$direction] = $keepTags;
933 }
934 // Return result:
935 return $this->getKeepTags_cache[$direction];
936 }
937
938 /**
939 * This resolves the $value into parts based on <p>-sections and <br />-tags. These are returned as lines separated by LF.
940 * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
941 * The function ->setDivTags does the opposite.
942 * This function processes content to go into the database.
943 *
944 * @param string $value Value to process.
945 * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
946 * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value.
947 * @return string Processed input value.
948 * @see setDivTags()
949 */
950 public function divideIntoLines($value, $count = 5, $returnArray = false)
951 {
952 // Setting configuration for processing:
953 $allowTagsOutside = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside'] ? 'hr,' . $this->procOptions['allowTagsOutside'] : 'hr,img'), true);
954 $divSplit = $this->splitIntoBlock('p', $value, true);
955 // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
956 if ($this->procOptions['keepPDIVattribs']) {
957 $keepAttribListArr = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
958 } else {
959 $keepAttribListArr = array();
960 }
961 // Returns plainly the value if there was no div/p sections in it
962 if (count($divSplit) <= 1 || $count <= 0) {
963 // Wrap hr tags with LF's
964 $newValue = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $value);
965 $newValue = preg_replace('/' . LF . LF . '/i', LF, $newValue);
966 $newValue = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $newValue);
967 return $newValue;
968 }
969 // Traverse the splitted sections:
970 foreach ($divSplit as $k => $v) {
971 if ($k % 2) {
972 // Inside
973 $v = $this->removeFirstAndLastTag($v);
974 // Fetching 'sub-lines' - which will explode any further p nesting...
975 $subLines = $this->divideIntoLines($v, $count - 1, true);
976 // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
977 if (!is_array($subLines)) {
978 //... but if NO subsection was found, we process it as a TRUE line without erronous content:
979 $subLines = array($subLines);
980 // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
981 if (!$this->procOptions['dontConvBRtoParagraph']) {
982 $subLines = preg_split('/<br[[:space:]]*[\\/]?>/i', $v);
983 }
984 // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
985 foreach ($subLines as $sk => $value) {
986 // Clear up the subline for DB.
987 $subLines[$sk] = $this->HTMLcleaner_db($subLines[$sk]);
988 // Get first tag, attributes etc:
989 $fTag = $this->getFirstTag($divSplit[$k]);
990 list($tagAttributes) = $this->get_tag_attributes($fTag);
991 // Keep attributes (lowercase)
992 $newAttribs = array();
993 if (!empty($keepAttribListArr)) {
994 foreach ($keepAttribListArr as $keepA) {
995 if (isset($tagAttributes[$keepA])) {
996 $newAttribs[$keepA] = $tagAttributes[$keepA];
997 }
998 }
999 }
1000 // ALIGN attribute:
1001 if (!$this->procOptions['skipAlign'] && trim($tagAttributes['align']) !== '' && strtolower($tagAttributes['align']) != 'left') {
1002 // Set to value, but not 'left'
1003 $newAttribs['align'] = strtolower($tagAttributes['align']);
1004 }
1005 // CLASS attribute:
1006 // Set to whatever value
1007 if (!$this->procOptions['skipClass'] && trim($tagAttributes['class']) !== '') {
1008 if (empty($this->allowedClasses) || in_array($tagAttributes['class'], $this->allowedClasses)) {
1009 $newAttribs['class'] = $tagAttributes['class'];
1010 } else {
1011 $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1012 $newClasses = array();
1013 foreach ($classes as $class) {
1014 if (in_array($class, $this->allowedClasses)) {
1015 $newClasses[] = $class;
1016 }
1017 }
1018 if (!empty($newClasses)) {
1019 $newAttribs['class'] = implode(' ', $newClasses);
1020 }
1021 }
1022 }
1023 // Remove any line break char (10 or 13)
1024 $subLines[$sk] = preg_replace('/' . LF . '|' . CR . '/', '', $subLines[$sk]);
1025 // If there are any attributes, then do so:
1026 if (!empty($newAttribs)) {
1027 $subLines[$sk] = '<' . trim('p ' . $this->compileTagAttribs($newAttribs)) . '>' . $subLines[$sk] . '</p>';
1028 }
1029 }
1030 }
1031 // Add the processed line(s)
1032 $divSplit[$k] = implode(LF, $subLines);
1033 // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1034 // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1035 // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1036 if (trim(strip_tags($divSplit[$k])) == '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $divSplit[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1037 $divSplit[$k] = '';
1038 }
1039 } else {
1040 // outside div:
1041 // Remove positions which are outside p tags and without content
1042 $divSplit[$k] = trim(strip_tags($divSplit[$k], '<' . implode('><', $allowTagsOutside) . '>'));
1043 // Wrap hr tags with LF's
1044 $divSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $divSplit[$k]);
1045 $divSplit[$k] = preg_replace('/' . LF . LF . '/i', LF, $divSplit[$k]);
1046 $divSplit[$k] = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $divSplit[$k]);
1047 if ((string)$divSplit[$k] === '') {
1048 unset($divSplit[$k]);
1049 }
1050 }
1051 }
1052 // Return value:
1053 return $returnArray ? $divSplit : implode(LF, $divSplit);
1054 }
1055
1056 /**
1057 * Converts all lines into <p></p>-sections (unless the line has a p - tag already)
1058 * For processing of content going FROM database TO RTE.
1059 *
1060 * @param string $value Value to convert
1061 * @return string Processed value.
1062 * @see divideIntoLines()
1063 */
1064 public function setDivTags($value)
1065 {
1066 // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1067 $keepTags = $this->getKeepTags('rte');
1068 // Default: remove unknown tags.
1069 $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect';
1070 // Divide the content into lines, based on LF:
1071 $parts = explode(LF, $value);
1072 foreach ($parts as $k => $v) {
1073 // Processing of line content:
1074 // If the line is blank, set it to &nbsp;
1075 if (trim($parts[$k]) === '') {
1076 $parts[$k] = '&nbsp;';
1077 } else {
1078 // Clean the line content:
1079 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, $kUknown);
1080 if (!$this->procOptions['dontConvAmpInNBSP_rte']) {
1081 $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
1082 }
1083 }
1084 // Wrapping the line in <$dT> if not already wrapped and does not contain an hr tag
1085 if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
1086 $testStr = strtolower(trim($parts[$k]));
1087 if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
1088 if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
1089 // Only set p-tags if there is not already div or p tags:
1090 $parts[$k] = '<p>' . $parts[$k] . '</p>';
1091 }
1092 }
1093 }
1094 }
1095 // Implode result:
1096 return implode(LF, $parts);
1097 }
1098
1099 /**
1100 * Finds width and height from attrib-array
1101 * If the width and height is found in the style-attribute, use that!
1102 *
1103 * @param array $attribArray Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
1104 * @return array Integer w/h in key 0/1. Zero is returned if not found.
1105 */
1106 public function getWHFromAttribs($attribArray)
1107 {
1108 $style = trim($attribArray['style']);
1109 $w = 0;
1110 $h = 0;
1111 if ($style) {
1112 $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1113 // Width
1114 $reg = array();
1115 preg_match('/width' . $regex . '/i', $style, $reg);
1116 $w = (int)$reg[1];
1117 // Height
1118 preg_match('/height' . $regex . '/i', $style, $reg);
1119 $h = (int)$reg[1];
1120 }
1121 if (!$w) {
1122 $w = $attribArray['width'];
1123 }
1124 if (!$h) {
1125 $h = $attribArray['height'];
1126 }
1127 return array((int)$w, (int)$h);
1128 }
1129
1130 /**
1131 * Parse <A>-tag href and return status of email,external,file or page
1132 *
1133 * @param string $url URL to analyse.
1134 * @return array Information in an array about the URL
1135 */
1136 public function urlInfoForLinkTags($url)
1137 {
1138 $info = array();
1139 $url = trim($url);
1140 if (substr(strtolower($url), 0, 7) == 'mailto:') {
1141 $info['url'] = trim(substr($url, 7));
1142 $info['type'] = 'email';
1143 } elseif (strpos($url, '?file:') !== false) {
1144 $info['type'] = 'file';
1145 $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1146 } else {
1147 $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1148 $urlLength = strlen($url);
1149 for ($a = 0; $a < $urlLength; $a++) {
1150 if ($url[$a] != $curURL[$a]) {
1151 break;
1152 }
1153 }
1154 $info['relScriptPath'] = substr($curURL, $a);
1155 $info['relUrl'] = substr($url, $a);
1156 $info['url'] = $url;
1157 $info['type'] = 'ext';
1158 $siteUrl_parts = parse_url($url);
1159 $curUrl_parts = parse_url($curURL);
1160 // Hosts should match
1161 if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1162 // If the script path seems to match or is empty (FE-EDIT)
1163 // New processing order 100502
1164 $uP = parse_url($info['relUrl']);
1165 if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1166 $info['url'] = $info['relUrl'];
1167 $info['type'] = 'anchor';
1168 } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1169 // URL is a page (id parameter)
1170 $pp = preg_split('/^id=/', $uP['query']);
1171 $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1172 $parameters = explode('&', $pp[1]);
1173 $id = array_shift($parameters);
1174 if ($id) {
1175 $info['pageid'] = $id;
1176 $info['cElement'] = $uP['fragment'];
1177 $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1178 $info['type'] = 'page';
1179 $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1180 }
1181 } else {
1182 $info['url'] = $info['relUrl'];
1183 $info['type'] = 'file';
1184 }
1185 } else {
1186 unset($info['relScriptPath']);
1187 unset($info['relUrl']);
1188 }
1189 }
1190 return $info;
1191 }
1192
1193 /**
1194 * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
1195 *
1196 * @param string $value Content input
1197 * @param bool $dontSetRTEKEEP If TRUE, then the "rtekeep" attribute will not be set.
1198 * @return string Content output
1199 */
1200 public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1201 {
1202 $blockSplit = $this->splitIntoBlock('A', $value);
1203 foreach ($blockSplit as $k => $v) {
1204 // Block
1205 if ($k % 2) {
1206 list($attribArray) = $this->get_tag_attributes($this->getFirstTag($v), true);
1207 // Checking if there is a scheme, and if not, prepend the current url.
1208 // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1209 if ($attribArray['href'] !== '') {
1210 $uP = parse_url(strtolower($attribArray['href']));
1211 if (!$uP['scheme']) {
1212 $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1213 } elseif ($uP['scheme'] != 'mailto') {
1214 $attribArray['data-htmlarea-external'] = 1;
1215 }
1216 } else {
1217 $attribArray['rtekeep'] = 1;
1218 }
1219 if (!$dontSetRTEKEEP) {
1220 $attribArray['rtekeep'] = 1;
1221 }
1222 $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1223 $eTag = '</a>';
1224 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1225 }
1226 }
1227 return implode('', $blockSplit);
1228 }
1229
1230 /**
1231 * Apply plain image settings to the dimensions of the image
1232 *
1233 * @param array $imageInfo: info array of the image
1234 * @param array $attribArray: array of attributes of an image tag
1235 *
1236 * @return array a modified attributes array
1237 */
1238 protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1239 {
1240 if ($this->procOptions['plainImageMode']) {
1241 // Perform corrections to aspect ratio based on configuration
1242 switch ((string)$this->procOptions['plainImageMode']) {
1243 case 'lockDimensions':
1244 $attribArray['width'] = $imageInfo[0];
1245 $attribArray['height'] = $imageInfo[1];
1246 break;
1247 case 'lockRatioWhenSmaller':
1248 if ($attribArray['width'] > $imageInfo[0]) {
1249 $attribArray['width'] = $imageInfo[0];
1250 }
1251 case 'lockRatio':
1252 if ($imageInfo[0] > 0) {
1253 $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1254 }
1255 break;
1256 }
1257 }
1258 return $attribArray;
1259 }
1260
1261 /**
1262 * Instantiates a logger
1263 *
1264 * @return \TYPO3\CMS\Core\Log\Logger
1265 */
1266 protected function getLogger()
1267 {
1268 /** @var $logManager LogManager */
1269 $logManager = GeneralUtility::makeInstance(LogManager::class);
1270 return $logManager->getLogger(get_class($this));
1271 }
1272 }