[TASK] Cleanup RemoveXSS 96/37996/5
authorJigal van Hemert <jigal.van.hemert@typo3.org>
Sun, 22 Mar 2015 13:36:47 +0000 (14:36 +0100)
committerAndreas Fernandez <andreas.fernandez@aspedia.de>
Tue, 24 Mar 2015 16:58:05 +0000 (17:58 +0100)
Easier to understand variable names and some optimizations of the code.

Resolves: #65893
Releases: master
Change-Id: I4f3b535fba5809fc3fa6ac1b1ccccbec239cc85b
Reviewed-on: http://review.typo3.org/37996
Reviewed-by: Andreas Fernandez <andreas.fernandez@aspedia.de>
Tested-by: Andreas Fernandez <andreas.fernandez@aspedia.de>
Reviewed-by: Jan Helke <typo3@helke.de>
Reviewed-by: Nicole Cordes <typo3@cordes.co>
Tested-by: Nicole Cordes <typo3@cordes.co>
typo3/sysext/core/Resources/PHP/RemoveXSS.php

index b1598d0..731ac09 100644 (file)
@@ -17,7 +17,7 @@
  * including adding it to your own project which can be under any license.
  *
  * @author     Travis Puderbaugh <kallahar@quickwired.com>
- * @author     Jigal van Hemert <jigal@xs4all.nl>
+ * @author     Jigal van Hemert <jigal.van.hemert@typo3.org>
  * @package    RemoveXSS
  */
 class RemoveXSS {
@@ -27,11 +27,11 @@ class RemoveXSS {
         *
         * Using an external class by Travis Puderbaugh <kallahar@quickwired.com>
         *
-        * @param string $val Input string
+        * @param string $value Input string
         * @param string $replaceString replaceString for inserting in keywords (which destroys the tags)
         * @return string Input string with potential XSS code removed
         */
-       public static function process($val, $replaceString = '<x>') {
+       public static function process($value, $replaceString = '<x>') {
                // Don't use empty $replaceString because then no XSS-remove will be done
                if ($replaceString == '') {
                        $replaceString = '<x>';
@@ -39,31 +39,31 @@ class RemoveXSS {
                // Remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed.
                // This prevents some character re-spacing such as <java\0script>
                // Note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs
-               $val = preg_replace('/([\x00-\x08]|[\x0b-\x0c]|[\x0e-\x19])/', '', $val);
+               $value = preg_replace('/([\x00-\x08]|[\x0b-\x0c]|[\x0e-\x19])/', '', $value);
 
                // Straight replacements, the user should never need these since they're normal characters.
                // This prevents like <IMG SRC=&#X40&#X61&#X76&#X61&#X73&#X63&#X72&#X69&#X70&#X74&#X3A&#X61&#X6C&#X65&#X72&#X74&#X28&#X27&#X58&#X53&#X53&#X27&#X29>
                $searchHexEncodings = '/&#[xX]0{0,8}(21|22|23|24|25|26|27|28|29|2a|2b|2d|2f|30|31|32|33|34|35|36|37|38|39|3a|3b|3d|3f|40|41|42|43|44|45|46|47|48|49|4a|4b|4c|4d|4e|4f|50|51|52|53|54|55|56|57|58|59|5a|5b|5c|5d|5e|5f|60|61|62|63|64|65|66|67|68|69|6a|6b|6c|6d|6e|6f|70|71|72|73|74|75|76|77|78|79|7a|7b|7c|7d|7e);?/i';
                $searchUnicodeEncodings = '/&#0{0,8}(33|34|35|36|37|38|39|40|41|42|43|45|47|48|49|50|51|52|53|54|55|56|57|58|59|61|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126);?/i';
-               while (preg_match($searchHexEncodings, $val) || preg_match($searchUnicodeEncodings, $val)) {
-                       $val = preg_replace_callback(
+               while (preg_match($searchHexEncodings, $value) || preg_match($searchUnicodeEncodings, $value)) {
+                       $value = preg_replace_callback(
                                $searchHexEncodings,
                                function ($matches) {
                                        return chr(hexdec($matches[1]));
                                },
-                               $val
+                               $value
                        );
-                       $val = preg_replace_callback(
+                       $value = preg_replace_callback(
                                $searchUnicodeEncodings,
                                function ($matches) {
                                        return chr($matches[1]);
                                },
-                               $val
+                               $value
                        );
                }
 
                // Now the only remaining whitespace attacks are \t, \n, and \r
-               $ra1 = array('javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed',
+               $allKeywords = array('javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed',
                        'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'video', 'audio', 'track',
                        'canvas', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut',
                        'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate',
@@ -79,9 +79,9 @@ class RemoveXSS {
                        'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted',
                        'onscroll', 'onseeked', 'onseeking','onselect', 'onselectionchange', 'onselectstart', 'onshow', 'onstalled', 'onstart',
                        'onstop', 'onstorage', 'onsubmit', 'onsuspend', 'ontimeupdate', 'onunload', 'onvolumechange', 'onwaiting');
-               $ra_tag = array('applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame',
+               $tagKeywords = array('applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame',
                        'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'video', 'audio', 'track', 'canvas');
-               $ra_attribute = array('style', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate',
+               $attributeKeywords = array('style', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate',
                        'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint',
                        'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncanplay', 'oncanplaythrough', 'oncellchange', 'onchange',
                        'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncuechange', 'oncut', 'ondataavailable', 'ondatasetchanged',
@@ -96,72 +96,72 @@ class RemoveXSS {
                        'onresizestart','onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onseeked', 'onseeking',
                        'onselect', 'onselectionchange', 'onselectstart', 'onshow', 'onstalled', 'onstart', 'onstop', 'onstorage', 'onsubmit',
                        'onsuspend', 'ontimeupdate', 'onundo', 'onunload', 'onvolumechange', 'onwaiting');
-               $ra_protocol = array('javascript', 'vbscript', 'expression');
+               $protocolKeywords = array('javascript', 'vbscript', 'expression');
 
                // Remove the potential &#xxx; stuff for testing
-               $val2 = preg_replace('/(&#[xX]?0{0,8}(9|10|13|a|b);?)*\s*/i', '', $val);
-               $ra = array();
+               $valueForQuickCheck = preg_replace('/(&#[xX]?0{0,8}(9|10|13|a|b);?)*\s*/i', '', $value);
+               $potentialKeywords = array();
 
-               foreach ($ra1 as $ra1word) {
+               foreach ($allKeywords as $keyword) {
                        // Stripos is faster than the regular expressions used later and because the words we're looking for only have
                        // chars < 0x80 we can use the non-multibyte safe version.
-                       if (stripos($val2, $ra1word ) !== FALSE ) {
-                                       //keep list of potential words that were found
-                               if (in_array($ra1word, $ra_protocol, TRUE)) {
-                                       $ra[] = array($ra1word, 'ra_protocol');
+                       if (stripos($valueForQuickCheck, $keyword ) !== FALSE ) {
+                               //keep list of potential words that were found
+                               if (in_array($keyword, $protocolKeywords, TRUE)) {
+                                       $potentialKeywords[] = array($keyword, 'protocol');
                                }
-                               if (in_array($ra1word, $ra_tag, TRUE)) {
-                                       $ra[] = array($ra1word, 'ra_tag');
+                               if (in_array($keyword, $tagKeywords, TRUE)) {
+                                       $potentialKeywords[] = array($keyword, 'tag');
                                }
-                               if (in_array($ra1word, $ra_attribute, TRUE)) {
-                                       $ra[] = array($ra1word, 'ra_attribute');
+                               if (in_array($keyword, $attributeKeywords, TRUE)) {
+                                       $potentialKeywords[] = array($keyword, 'attribute');
                                }
                                // Some keywords appear in more than one array.
-                               // These get multiple entries in $ra, each with the appropriate type
+                               // These get multiple entries in $potentialKeywords, each with the appropriate type
                        }
                }
                // Only process potential words
-               if (count($ra) > 0) {
+               if (!empty($potentialKeywords)) {
                        // Keep replacing as long as the previous round replaced something
                        $found = TRUE;
-                       while ($found == TRUE) {
-                               $val_before = $val;
-                               for ($i = 0; $i < sizeof($ra); $i++) {
-                                       $pattern = '';
-                                       for ($j = 0; $j < strlen($ra[$i][0]); $j++) {
-                                               if ($j > 0) {
-                                                       $pattern .= '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*';
+                       while ($found) {
+                               $valueBeforeReplacement = $value;
+                               foreach ($potentialKeywords as $potentialKeywordItem) {
+                                       list($keyword, $type) = $potentialKeywordItem;
+                                       $keywordLength = strlen($keyword);
+                                       // Build pattern with each letter of the keyword and potential (encoded) whitespace in between
+                                       $pattern = $keyword[0];
+                                       if ($keywordLength > 1) {
+                                               for ($j = 1; $j < $keywordLength; $j++) {
+                                                       $pattern .= '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*' . $keyword[$j];
                                                }
-                                               $pattern .= $ra[$i][0][$j];
                                        }
                                        // Handle each type a little different (extra conditions to prevent false positives a bit better)
-                                       switch ($ra[$i][1]) {
-                                               case 'ra_protocol':
+                                       switch ($type) {
+                                               case 'protocol':
                                                        // These take the form of e.g. 'javascript:'
                                                        $pattern .= '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*(?=:)';
                                                        break;
-                                               case 'ra_tag':
+                                               case 'tag':
                                                        // These take the form of e.g. '<SCRIPT[^\da-z] ....';
                                                        $pattern = '(?<=<)' . $pattern . '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*(?=[^\da-z])';
                                                        break;
-                                               case 'ra_attribute':
+                                               case 'attribute':
                                                        // These take the form of e.g. 'onload='  Beware that a lot of characters are allowed
                                                        // between the attribute and the equal sign!
                                                        $pattern .= '[\s\!\#\$\%\&\(\)\*\~\+\-\_\.\,\:\;\?\@\[\/\|\\\\\]\^\`]*(?==)';
                                                        break;
                                        }
                                        $pattern = '/' . $pattern . '/i';
-                                       // Add in <x> to nerf the tag
-                                       $replacement = substr_replace($ra[$i][0], $replaceString, 2, 0);
-                                       // Filter out the hex tags
-                                       $val = preg_replace($pattern, $replacement, $val);
-                                       if ($val_before == $val) {
-                                               // No replacements were made, so exit the loop
-                                               $found = FALSE;
-                                       }
+                                       // Inject the replacement to render the potential problem harmless
+                                       $replacement = substr_replace($keyword, $replaceString, 2, 0);
+                                       // Perform the actual replacement
+                                       $value = preg_replace($pattern, $replacement, $value);
+                                       // If no replacements were made exit the loop
+                                       $found = ($valueBeforeReplacement !== $value);
                                }
                        }
                }
-               return $val;
+               return $value;
        }
 }