[BUGFIX] Do not crop inside known html tags 41/42841/3
authorLudwig Rafelsberger <ludwig.rafelsberger@gmx.at>
Sat, 22 Aug 2015 18:10:12 +0000 (20:10 +0200)
committerBenni Mack <benni@typo3.org>
Sun, 11 Oct 2015 21:04:11 +0000 (23:04 +0200)
This change adds missing tags to the regex used to match html tags.
Furthermore, scripting tags are added, so that they will not be counted
nor split apart.

This list of html tags was extracted from the current W3C html5
standard recommendation at [1] and merged with tags from the current
code that are not (or no longer) in the W3C standard recommendation
(e.g: TYPO3's internal <link> tag).

---
[1]: http://www.w3.org/TR/html5/semantics.html#semantics

Resolves: #69263
Releases: master, 6.2
Change-Id: Id58dcf3470ed06256b9b8f744dd13891757faef1
Reviewed-on: http://review.typo3.org/42841
Reviewed-by: Jigal van Hemert <jigal.van.hemert@typo3.org>
Tested-by: Jigal van Hemert <jigal.van.hemert@typo3.org>
Reviewed-by: Wouter Wolters <typo3@wouterwolters.nl>
Tested-by: Wouter Wolters <typo3@wouterwolters.nl>
Reviewed-by: Benni Mack <benni@typo3.org>
Tested-by: Benni Mack <benni@typo3.org>
typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php
typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php

index 4b97ccd..fa42ad8 100644 (file)
@@ -4329,12 +4329,19 @@ class ContentObjectRenderer
         $replacementForEllipsis = trim($options[1]);
         $crop2space = trim($options[2]) === '1';
         // Split $content into an array(even items in the array are outside the tags, odd numbers are tag-blocks).
-        $tags = 'a|b|blockquote|body|div|em|font|form|h1|h2|h3|h4|h5|h6|i|li|map|ol|option|p|pre|sub|sup|select|span|strong|table|thead|tbody|tfoot|td|textarea|tr|u|ul|br|hr|img|input|area|link';
-        // @todo We should not crop inside <script> tags.
+        $tags = 'a|abbr|address|area|article|aside|audio|b|bdi|bdo|blockquote|body|br|button|caption|cite|code|col|colgroup|data|datalist|dd|del|dfn|div|dl|dt|em|embed|fieldset|figcaption|figure|font|footer|form|h1|h2|h3|h4|h5|h6|header|hr|i|iframe|img|input|ins|kbd|keygen|label|legend|li|link|main|map|mark|meter|nav|object|ol|optgroup|option|output|p|param|pre|progress|q|rb|rp|rt|rtc|ruby|s|samp|section|select|small|source|span|strong|sub|sup|table|tbody|td|textarea|tfoot|th|thead|time|tr|track|u|ul|ut|var|video|wbr';
         $tagsRegEx = '
                        (
                                (?:
                                        <!--.*?-->                                      # a comment
+                                       |
+                                       <canvas[^>]*>.*?</canvas>   # a canvas tag
+                                       |
+                                       <script[^>]*>.*?</script>   # a script tag
+                                       |
+                                       <noscript[^>]*>.*?</noscript> # a noscript tag
+                                       |
+                                       <template[^>]*>.*?</template> # a template tag
                                )
                                |
                                </?(?:' . $tags . ')+                   # opening tag (\'<tag\') or closing tag (\'</tag\')
index 2ce96e3..c0b43e5 100644 (file)
@@ -733,6 +733,18 @@ class ContentObjectRendererTest extends \TYPO3\CMS\Core\Tests\UnitTestCase
                     '<em data-foo="x">foobar</em>foo',
                     $charset
                 ),
+                $charset . ' html elements with iframe embedded 24|...|1' => array(
+                    '24|...|1',
+                    'Text with iframe <iframe src="//what.ever/"></iframe> and text after it',
+                    'Text with iframe <iframe src="//what.ever/"></iframe> and...',
+                    $charset
+                ),
+                $charset . ' html elements with script tag embedded 24|...|1' => array(
+                    '24|...|1',
+                    'Text with script <script>alert(\'foo\');</script> and text after it',
+                    'Text with script <script>alert(\'foo\');</script> and...',
+                    $charset
+                ),
             ));
         }
         return $data;