Commit 3dca584c authored by Oliver Hader's avatar Oliver Hader Committed by Oliver Hader
Browse files

[SECURITY] Ensure XSS-safe rich text rendering

Due to missing internal handling of provided RTE configuration, it
was possible to directly persist XSS in database fields. Unless full
blown backend RTE tag configuration is available, this patch still
allows persisting potentially malicious data - which is not reflected
in the backend user interface - but to be sanitized during frontend
rendering (see below).

Corresponding configuration directives (`removeTags`, `allowedAttribs`)
are now considered again. Besides that a new, but simplified sequential
HTML parser ensures that runaway node-boundaries are detected & denied.

To sanitize and purge XSS from markup during frontend rendering, new
custom HTML sanitizer has been introduced, based on `masterminds/html5`.
Both `DefaultBuilder` and `CommonVisitor` provide common configuration
which is in line with expected tags that are allowed in backend RTE.
Using a custom builder instance, it is possible to adjust for individual
demands - however, configuration possibilities cannot be modified using
TypoScript - basically since the existing syntax does not cover all
necessary scenarios.

Resolves: #94375
Related: #83027
Related: #94484
Releases: master, 11.3, 10.4, 9.5
Change-Id: I5f8de43faab57b00052614ad37bd10ea9e384dc0
Security-Bulletin: TYPO3-CORE-SA-2021-013
Security-References: CVE-2021-32768
Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/70345


Tested-by: Oliver Hader's avatarOliver Hader <oliver.hader@typo3.org>
Reviewed-by: Oliver Hader's avatarOliver Hader <oliver.hader@typo3.org>
parent c35316f6
......@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "a6085704e7c7bbfad4c83e03ebaaefc3",
"content-hash": "642d3345779cb3c32f93967f26984507",
"packages": [
{
"name": "bacon/bacon-qr-code",
......@@ -1093,6 +1093,75 @@
},
"time": "2021-04-26T09:17:50+00:00"
},
{
"name": "masterminds/html5",
"version": "2.7.5",
"source": {
"type": "git",
"url": "https://github.com/Masterminds/html5-php.git",
"reference": "f640ac1bdddff06ea333a920c95bbad8872429ab"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Masterminds/html5-php/zipball/f640ac1bdddff06ea333a920c95bbad8872429ab",
"reference": "f640ac1bdddff06ea333a920c95bbad8872429ab",
"shasum": ""
},
"require": {
"ext-ctype": "*",
"ext-dom": "*",
"ext-libxml": "*",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"autoload": {
"psr-4": {
"Masterminds\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Matt Butcher",
"email": "technosophos@gmail.com"
},
{
"name": "Matt Farina",
"email": "matt@mattfarina.com"
},
{
"name": "Asmir Mustafic",
"email": "goetas@gmail.com"
}
],
"description": "An HTML5 parser and serializer.",
"homepage": "http://masterminds.github.io/html5-php",
"keywords": [
"HTML5",
"dom",
"html",
"parser",
"querypath",
"serializer",
"xml"
],
"support": {
"issues": "https://github.com/Masterminds/html5-php/issues",
"source": "https://github.com/Masterminds/html5-php/tree/2.7.5"
},
"time": "2021-07-01T14:25:37+00:00"
},
{
"name": "nikic/php-parser",
"version": "v4.12.0",
......@@ -4851,6 +4920,57 @@
},
"time": "2020-05-04T18:03:13+00:00"
},
{
"name": "typo3/html-sanitizer",
"version": "v2.0.7",
"source": {
"type": "git",
"url": "https://github.com/TYPO3/html-sanitizer.git",
"reference": "c5f1e2eb37f3b7799029ab01e145028e35b2e61a"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/TYPO3/html-sanitizer/zipball/c5f1e2eb37f3b7799029ab01e145028e35b2e61a",
"reference": "c5f1e2eb37f3b7799029ab01e145028e35b2e61a",
"shasum": ""
},
"require": {
"ext-dom": "*",
"masterminds/html5": "^2.7",
"php": "^7.2 || ^8.0",
"psr/log": "^1.0"
},
"require-dev": {
"phpunit/phpunit": "^8.5"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "2.x-dev"
}
},
"autoload": {
"psr-4": {
"TYPO3\\HtmlSanitizer\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Oliver Hader",
"email": "oliver@typo3.org"
}
],
"description": "HTML sanitizer aiming to provide XSS-safe markup based on explicitly allowed tags, attributes and values.",
"support": {
"issues": "https://github.com/TYPO3/html-sanitizer/issues",
"source": "https://github.com/TYPO3/html-sanitizer/tree/v2.0.7"
},
"time": "2021-08-08T20:57:41+00:00"
},
{
"name": "typo3/phar-stream-wrapper",
"version": "v3.1.6",
......
<?php
declare(strict_types=1);
/*
* This file is part of the TYPO3 project.
*
* It is free software; you can redistribute it and/or modify it under the terms
* of the MIT License (MIT). For the full copyright and license information,
* please read the LICENSE file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
namespace TYPO3\CMS\Core\Html;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\Builder\CommonBuilder;
use TYPO3\HtmlSanitizer\Sanitizer;
use TYPO3\HtmlSanitizer\Visitor\CommonVisitor;
/**
* Builder, creating a `Sanitizer` instance with "default"
* behavior for tags, attributes and values.
*
* @internal
*/
class DefaultSanitizerBuilder extends CommonBuilder
{
public function __construct()
{
parent::__construct();
// + URL must be on local host, or is absolute URI path
$isOnCurrentHost = new Behavior\ClosureAttrValue(
function (string $value): bool {
return GeneralUtility::isValidUrl($value) && GeneralUtility::isOnCurrentHost($value)
|| GeneralUtility::isAbsPath($value) && GeneralUtility::isAllowedAbsPath($value); // @todo incorrect abs path!
}
);
// + starting with `t3://`
$isTypo3Uri = new Behavior\RegExpAttrValue('#^t3://#');
// extends common attributes for TYPO3-specific URIs
$this->srcAttr->addValues($isOnCurrentHost);
$this->srcsetAttr->addValues($isOnCurrentHost);
$this->hrefAttr->addValues($isOnCurrentHost, $isTypo3Uri);
// @todo `style` used in Introduction Package, inline CSS should be removed
$this->globalAttrs[] = new Behavior\Attr('style');
}
public function build(): Sanitizer
{
$behavior = $this->createBehavior();
$visitor = GeneralUtility::makeInstance(CommonVisitor::class, $behavior);
return GeneralUtility::makeInstance(Sanitizer::class, $visitor);
}
protected function createBehavior(): Behavior
{
return parent::createBehavior()
->withName('default');
}
}
......@@ -200,12 +200,18 @@ class HtmlParser
*/
public function removeFirstAndLastTag($str)
{
// End of first tag:
$start = strpos($str, '>');
// Begin of last tag:
$end = strrpos($str, '<');
// Return
return substr($str, $start + 1, $end - $start - 1);
$parser = SimpleParser::fromString($str);
$first = $parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
$last = $parser->getLastNode(SimpleNode::TYPE_ELEMENT);
if ($first === null || $first === $last) {
return '';
}
$sequence = array_slice(
$parser->getNodes(),
$first->getIndex() + 1,
$last->getIndex() - $first->getIndex() - 1
);
return implode('', array_map('strval', $sequence));
}
/**
......@@ -217,9 +223,17 @@ class HtmlParser
*/
public function getFirstTag($str)
{
// First:
$endLen = strpos($str, '>');
return $endLen !== false ? substr($str, 0, $endLen + 1) : '';
$parser = SimpleParser::fromString($str);
$first = $parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
if ($first === null) {
return '';
}
$sequence = array_slice(
$parser->getNodes(),
0,
$first->getIndex() + 1
);
return implode('', array_map('strval', $sequence));
}
/**
......@@ -232,12 +246,14 @@ class HtmlParser
*/
public function getFirstTagName($str, $preserveCase = false)
{
$matches = [];
if (preg_match('/^\\s*\\<([^\\s\\>]+)(\\s|\\>)/', $str, $matches) === 1) {
if (!$preserveCase) {
return strtoupper($matches[1]);
$parser = SimpleParser::fromString($str);
$elements = $parser->getNodes(SimpleNode::TYPE_ELEMENT);
foreach ($elements as $element) {
$name = $element->getElementName();
if ($name === null) {
continue;
}
return $matches[1];
return $preserveCase ? $name : strtoupper($name);
}
return '';
}
......@@ -587,7 +603,10 @@ class HtmlParser
if ($endTag || empty($tags[$tagName]['rmTagIfNoAttrib']) || trim($tagParts[1] ?? '')) {
$setTag = true;
// Remove this closing tag if $tagName was among $TSconfig['removeTags']
if ($endTag && isset($tags[$tagName]['allowedAttribs']) && $tags[$tagName]['allowedAttribs'] === 0 && $tags[$tagName]['rmTagIfNoAttrib'] === 1) {
if ($endTag
&& isset($tags[$tagName]['allowedAttribs']) && $tags[$tagName]['allowedAttribs'] === 0
&& isset($tags[$tagName]['rmTagIfNoAttrib']) && $tags[$tagName]['rmTagIfNoAttrib'] === 1
) {
$setTag = false;
}
if (isset($tags[$tagName]['nesting'])) {
......
......@@ -18,11 +18,13 @@ namespace TYPO3\CMS\Core\Html;
use Psr\EventDispatcher\EventDispatcherInterface;
use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerAwareTrait;
use TYPO3\CMS\Core\Configuration\Features;
use TYPO3\CMS\Core\Html\Event\BrokenLinkAnalysisEvent;
use TYPO3\CMS\Core\LinkHandling\Exception\UnknownLinkHandlerException;
use TYPO3\CMS\Core\LinkHandling\LinkService;
use TYPO3\CMS\Core\Resource\Exception\InsufficientFolderAccessPermissionsException;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\HtmlSanitizer\Builder\BuilderInterface;
/**
* Class for parsing HTML for the Rich Text Editor. (also called transformations)
......@@ -255,6 +257,8 @@ class RteHtmlParser extends HtmlParser implements LoggerAwareInterface
}
}
}
// process markup with HTML Sanitizer
$value = $this->htmlSanitize($value, $this->procOptions['HTMLparser_db.'] ?? []);
// If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
$value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_db');
// Final clean up of linebreaks
......@@ -416,8 +420,14 @@ class RteHtmlParser extends HtmlParser implements LoggerAwareInterface
$string = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $string) ?? '';
// Replace other linebreaks with space
$string = preg_replace('/[' . LF . ']+/', ' ', $string);
/** @var string $string */
$blockSplit[$k] = $this->divideIntoLines($string);
// process allowed/removed tags
$string = $this->HTMLcleaner(
(string)$string,
$this->getKeepTags('db'),
$this->procOptions['HTMLparser_db.']['keepNonMatchedTags'] ?? '',
(int)($this->procOptions['HTMLparser_db.']['htmlSpecialChars'] ?? 0)
);
$blockSplit[$k] = (string)$this->divideIntoLines($string);
} else {
unset($blockSplit[$k]);
}
......@@ -841,4 +851,26 @@ class RteHtmlParser extends HtmlParser implements LoggerAwareInterface
}
return implode('', $blocks);
}
protected function htmlSanitize(string $content, array $configuration): string
{
$features = GeneralUtility::makeInstance(Features::class);
// either `htmlSanitize = null` or `htmlSanitize = false`
// or feature flag `rte.htmlSanitize` is explicitly disabled
if (array_key_exists('htmlSanitize', $configuration) && empty($configuration['htmlSanitize'])
|| !$features->isFeatureEnabled('rte.htmlSanitize')
) {
return $content;
}
$build = $configuration['htmlSanitize.']['build'] ?? 'default';
if (class_exists($build) && is_a($build, BuilderInterface::class, true)) {
$builder = GeneralUtility::makeInstance($build);
} else {
$factory = GeneralUtility::makeInstance(SanitizerBuilderFactory::class);
$builder = $factory->build($build);
}
$sanitizer = $builder->build();
return $sanitizer->sanitize($content);
}
}
<?php
declare(strict_types=1);
/*
* This file is part of the TYPO3 project.
*
* It is free software; you can redistribute it and/or modify it under the terms
* of the MIT License (MIT). For the full copyright and license information,
* please read the LICENSE file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
namespace TYPO3\CMS\Core\Html;
use LogicException;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\HtmlSanitizer\Builder\BuilderInterface;
/**
* Factory for creating a (sanitizer) builder instance. Corresponding presets can
* be declared in `$GLOBALS['TYPO3_CONF_VARS']['SYS']['htmlSanitizer']` like e.g.
*
* ```
* $GLOBALS['TYPO3_CONF_VARS']['SYS']['htmlSanitizer'] = [
* 'default' => \TYPO3\CMS\Core\Html\DefaultSanitizerBuilder::class,
* 'custom' => \Vendor\Package\CustomBuilder::class,
* ];
* ```
*
* @internal
*/
class SanitizerBuilderFactory
{
/**
* @var array
*/
protected $configuration;
public function __construct(array $configuration = null)
{
$this->configuration = $configuration ?? $GLOBALS['TYPO3_CONF_VARS']['SYS']['htmlSanitizer'] ?? [];
}
public function build(string $identifier): BuilderInterface
{
if (empty($this->configuration[$identifier])) {
throw new LogicException(
sprintf('Undefined `htmlSanitizer` identifier `%s`', $identifier),
1624876139
);
}
$builder = GeneralUtility::makeInstance($this->configuration[$identifier]);
if (!$builder instanceof BuilderInterface) {
throw new LogicException(
sprintf(
'Builder `%s` must implement interface `%s`',
get_class($builder),
BuilderInterface::class
),
1624876266
);
}
return $builder;
}
}
<?php
declare(strict_types=1);
/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
namespace TYPO3\CMS\Core\Html;
/**
* @internal
*/
class SimpleNode
{
// similar to https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType
public const TYPE_ELEMENT = 1;
public const TYPE_TEXT = 3;
public const TYPE_CDATA = 4;
public const TYPE_COMMENT = 8;
/**
* @var int
*/
protected $type;
/**
* @var int
*/
protected $index;
/**
* @var string
*/
protected $string;
public static function fromString(int $type, int $index, string $string): self
{
return new self($type, $index, $string);
}
public function __construct(int $type, int $index, string $string)
{
$this->type = $type;
$this->index = $index;
$this->string = $string;
}
public function __toString(): string
{
return $this->string;
}
/**
* @return int
*/
public function getType(): int
{
return $this->type;
}
/**
* @return int
*/
public function getIndex(): int
{
return $this->index;
}
public function getElementName(): ?string
{
if ($this->getType() !== self::TYPE_ELEMENT) {
return null;
}
if (!preg_match('#^<(?P<name>[a-z][a-z0-9-]*)\b#i', $this->string, $matches)) {
return null;
}
return $matches['name'];
}
}
<?php
declare(strict_types=1);
/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
namespace TYPO3\CMS\Core\Html;
/**
* Simple HTML node parser. The main focus is to determine "runaway nodes"
* like `<span attribute="<runaway attribute="other">` and better nod boundaries.
*
* (Most of) the behavior is similar to Mozilla's behavior on handling those nodes.
* (e.g. `div.innerHTML = 'x =<y>= z';` - but without creating closing node blocks)
*
* This parser does not resolve nested nodes - it just provides a flat node sequence.
*
* @internal
*/
class SimpleParser
{
/**
* @var string|null
*/
protected $attribute;
/**
* @var SimpleNode[]
*/
protected $nodes = [];
/**
* @var int
*/
protected $currentType = SimpleNode::TYPE_TEXT;
/**
* @var string
*/
protected $currentData = '';
public static function fromString(string $string): self
{
return new self($string);
}
public function __construct(string $string)
{
$this->process($string);
}
/**
* @param int ...$types using `Node::TYPE_*`
* @return SimpleNode[]
*/
public function getNodes(int ...$types): array
{
if (empty($types)) {
return $this->nodes;
}
$nodes = array_filter(
$this->nodes,
function (SimpleNode $node) use ($types): bool {
return in_array(
$node->getType(),
$types,
true
);
}
);
// reindex nodes
return array_values($nodes);
}
/**
* @param int|null $type using `Node::TYPE_*`
* @return SimpleNode|null
*/
public function getFirstNode(int $type = null): ?SimpleNode
{
foreach ($this->nodes as $node) {
if ($type === null || $type === $node->getType()) {
return $node;
}