[BUGFIX] Do not crop inside known html tags 95/43995/2
authorBenjamin Mack <benni@typo3.org>
Sun, 11 Oct 2015 21:09:34 +0000 (23:09 +0200)
committerBenni Mack <benni@typo3.org>
Sun, 11 Oct 2015 21:10:03 +0000 (23:10 +0200)
This change adds missing tags to the regex used to match html tags.
Furthermore, scripting tags are added, so that they will not be counted
nor split apart.

This list of html tags was extracted from the current W3C html5
standard recommendation at [1] and merged with tags from the current
code that are not (or no longer) in the W3C standard recommendation
(e.g: TYPO3's internal <link> tag).

---
[1]: http://www.w3.org/TR/html5/semantics.html#semantics

Resolves: #69263
Releases: master, 6.2
Change-Id: Id58dcf3470ed06256b9b8f744dd13891757faef1
Reviewed-on: http://review.typo3.org/43995
Reviewed-by: Benni Mack <benni@typo3.org>
Tested-by: Benni Mack <benni@typo3.org>
typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php
typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php

index b48a2ce..e002d72 100644 (file)
@@ -3902,12 +3902,19 @@ class ContentObjectRenderer {
                $replacementForEllipsis = trim($options[1]);
                $crop2space = trim($options[2]) === '1' ? TRUE : FALSE;
                // Split $content into an array(even items in the array are outside the tags, odd numbers are tag-blocks).
-               $tags = 'a|b|blockquote|body|div|em|font|form|h1|h2|h3|h4|h5|h6|i|li|map|ol|option|p|pre|sub|sup|select|span|strong|table|thead|tbody|tfoot|td|textarea|tr|u|ul|br|hr|img|input|area|link';
-               // TODO We should not crop inside <script> tags.
+               $tags = 'a|abbr|address|area|article|aside|audio|b|bdi|bdo|blockquote|body|br|button|caption|cite|code|col|colgroup|data|datalist|dd|del|dfn|div|dl|dt|em|embed|fieldset|figcaption|figure|font|footer|form|h1|h2|h3|h4|h5|h6|header|hr|i|iframe|img|input|ins|kbd|keygen|label|legend|li|link|main|map|mark|meter|nav|object|ol|optgroup|option|output|p|param|pre|progress|q|rb|rp|rt|rtc|ruby|s|samp|section|select|small|source|span|strong|sub|sup|table|tbody|td|textarea|tfoot|th|thead|time|tr|track|u|ul|ut|var|video|wbr';
                $tagsRegEx = '
                        (
                                (?:
                                        <!--.*?-->                                      # a comment
+                                       |
+                                       <canvas[^>]*>.*?</canvas>   # a canvas tag
+                                       |
+                                       <script[^>]*>.*?</script>   # a script tag
+                                       |
+                                       <noscript[^>]*>.*?</noscript> # a noscript tag
+                                       |
+                                       <template[^>]*>.*?</template> # a template tag
                                )
                                |
                                </?(?:' . $tags . ')+                   # opening tag (\'<tag\') or closing tag (\'</tag\')
index 9cf16e5..4b54be0 100644 (file)
@@ -699,6 +699,18 @@ class ContentObjectRendererTest extends \TYPO3\CMS\Core\Tests\UnitTestCase {
                                        '<em data-foo="x">foobar</em>foo',
                                        $charset
                                ),
+                               $charset . ' html elements with iframe embedded 24|...|1' => array(
+                    '24|...|1',
+                    'Text with iframe <iframe src="//what.ever/"></iframe> and text after it',
+                    'Text with iframe <iframe src="//what.ever/"></iframe> and...',
+                    $charset
+                ),
+                $charset . ' html elements with script tag embedded 24|...|1' => array(
+                    '24|...|1',
+                    'Text with script <script>alert(\'foo\');</script> and text after it',
+                    'Text with script <script>alert(\'foo\');</script> and...',
+                    $charset
+                ),
                        ));
                }
                return $data;