[BUGFIX] Exclude CDATA from t3lib_parsehtml->XHTML_clean 07/34207/2
authorNicole Cordes <typo3@cordes.co>
Sat, 15 Nov 2014 00:24:24 +0000 (01:24 +0100)
committerOliver Hader <oliver.hader@typo3.org>
Thu, 4 Dec 2014 13:27:19 +0000 (14:27 +0100)
Due to commit https://review.typo3.org/#/c/30240/ the comments from
javascript is removed and now the javascript is parsed with
config.xhtml_cleaning = all. This patch prevents any CDATA content from
being parsed.

Resolves: #62967
Releases: master, 6.2, 4.5
Change-Id: Ib024c5c8f2b056e47d9222b9767b7a5e6923af8c
Reviewed-on: http://review.typo3.org/34207
Reviewed-by: Nicole Cordes <typo3@cordes.co>
Tested-by: Nicole Cordes <typo3@cordes.co>
Reviewed-by: Oliver Hader <oliver.hader@typo3.org>
Tested-by: Oliver Hader <oliver.hader@typo3.org>
typo3/sysext/core/Classes/Html/HtmlParser.php
typo3/sysext/core/Tests/Unit/Html/HtmlParserTest.php

index 7977e67..f5681fc 100644 (file)
@@ -703,6 +703,7 @@ class HtmlParser {
                $tagRegister = array();
                $tagStack = array();
                $inComment = FALSE;
+               $inCdata = FALSE;
                $skipTag = FALSE;
                while (list(, $tok) = each($tokArr)) {
                        if ($inComment) {
@@ -716,6 +717,17 @@ class HtmlParser {
                                $tok = substr($tok, $eocPos + 3);
                                $inComment = FALSE;
                                $skipTag = TRUE;
+                       } elseif ($inCdata) {
+                               if (($eocPos = strpos($tok, '/*]]>*/')) === FALSE) {
+                                       // End of comment is not found in the token. Go futher until end of comment is found in other tokens.
+                                       $newContent[$c++] = '<' . $tok;
+                                       continue;
+                               }
+                               // Comment ends in the middle of the token: add comment and proceed with rest of the token
+                               $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
+                               $tok = substr($tok, $eocPos + 10);
+                               $inCdata = FALSE;
+                               $skipTag = TRUE;
                        } elseif (substr($tok, 0, 3) == '!--') {
                                if (($eocPos = strpos($tok, '-->')) === FALSE) {
                                        // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
@@ -727,6 +739,17 @@ class HtmlParser {
                                $newContent[$c++] = '<' . substr($tok, 0, ($eocPos + 3));
                                $tok = substr($tok, $eocPos + 3);
                                $skipTag = TRUE;
+                       } elseif (substr($tok, 0, 10) === '![CDATA[*/') {
+                               if (($eocPos = strpos($tok, '/*]]>*/')) === FALSE) {
+                                       // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
+                                       $newContent[$c++] = '<' . $tok;
+                                       $inCdata = TRUE;
+                                       continue;
+                               }
+                               // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
+                               $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
+                               $tok = substr($tok, $eocPos + 10);
+                               $skipTag = TRUE;
                        }
                        $firstChar = $tok[0];
                        // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
index 64d6d4e..a02e333 100644 (file)
@@ -231,4 +231,40 @@ Value 2.2
        public function substituteMarkerAndSubpartArrayRecursiveResolvesMarkersAndSubpartsArray($template, $markersAndSubparts, $wrap, $uppercase, $deleteUnused, $expected) {
                $this->assertSame($expected, $this->fixture->substituteMarkerAndSubpartArrayRecursive($template, $markersAndSubparts, $wrap, $uppercase, $deleteUnused));
        }
+
+       /**
+        * @return array
+        */
+       public function cDataWillRemainUnmodifiedDataProvider() {
+               return array(
+                       'single-line CDATA' => array(
+                               '/*<![CDATA[*/ <hello world> /*]]>*/',
+                               '/*<![CDATA[*/ <hello world> /*]]>*/',
+                       ),
+                       'multi-line CDATA #1' => array(
+                               '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
+                               '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
+                       ),
+                       'multi-line CDATA #2' => array(
+                               '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
+                               '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
+                       ),
+                       'multi-line CDATA #3' => array(
+                               '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
+                               '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
+                       ),
+               );
+       }
+
+       /**
+        * @test
+        * @param string $source
+        * @param string $expected
+        * @dataProvider cDataWillRemainUnmodifiedDataProvider
+        */
+       public function xHtmlCleaningDoesNotModifyCDATA($source, $expected) {
+               $result = $this->fixture->XHTML_clean($source);
+               $this->assertSame($expected, $result);
+       }
+
 }