dacd7b9b69bafc5b8c2cb075a54e024c18903baa
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Encoder / JavaScriptEncoder.php
1 <?php
2 namespace TYPO3\CMS\Core\Encoder;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 /**
18 * Adopted from OWASP Enterprise Security API (ESAPI) reference implementation for the JavaScript Codec.
19 * Original Author: Mike Boberski
20 *
21 * This class provides encoding for user input that is intended to be used in a JavaScript context.
22 * It encodes all characters except alphanumericals and the immune characters to a hex representation.
23 * @copyright 2009-2010 The OWASP Foundation
24 * @link http://www.owasp.org/index.php/ESAPI
25 */
26 class JavaScriptEncoder implements \TYPO3\CMS\Core\SingletonInterface
27 {
28 /**
29 * A map where the keys are ordinal values of non-alphanumeric single-byte
30 * characters and the values are hexadecimal equivalents as strings.
31 *
32 * @var array
33 */
34 protected $hexMatrix = [];
35
36 /**
37 * Characters that are immune (not dangerous) in the JavaScript context
38 *
39 * @var array
40 */
41 protected $immuneCharacters = [',', '.', '_'];
42
43 /**
44 * TYPO3 charset encoding object
45 *
46 * @var \TYPO3\CMS\Core\Charset\CharsetConverter
47 */
48 protected $charsetConversion = null;
49
50 /**
51 * Populates the $hex map of non-alphanumeric single-byte characters.
52 *
53 * Alphanumerical character are set to NULL in the matrix.
54 */
55 public function __construct()
56 {
57 $this->charsetConversion = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(\TYPO3\CMS\Core\Charset\CharsetConverter::class);
58 for ($i = 0; $i < 256; $i++) {
59 if ($i >= ord('0') && $i <= ord('9') || $i >= ord('A') && $i <= ord('Z') || $i >= ord('a') && $i <= ord('z')) {
60 $this->hexMatrix[$i] = null;
61 } else {
62 $this->hexMatrix[$i] = dechex($i);
63 }
64 }
65 }
66
67 /**
68 * Encodes a string for JavaScript.
69 *
70 * @param string $input The string to encode, may be empty.
71 * @return string The encoded string.
72 */
73 public function encode($input)
74 {
75 $stringLength = mb_strlen($input, 'utf-8');
76 $encodedString = '';
77 for ($i = 0; $i < $stringLength; $i++) {
78 $c = mb_substr($input, $i, 1, 'utf-8');
79 $encodedString .= $this->encodeCharacter($c);
80 }
81 return $encodedString;
82 }
83
84 /**
85 * Returns backslash encoded numeric format. Does not use backslash
86 * character escapes such as, \" or \' as these may cause parsing problems.
87 * For example, if a javascript attribute, such as onmouseover, contains
88 * a \" that will close the entire attribute and allow an attacker to inject
89 * another script attribute.
90 *
91 * @param string $character utf-8 character that needs to be encoded
92 * @return string encoded character
93 */
94 protected function encodeCharacter($character)
95 {
96 if ($this->isImmuneCharacter($character)) {
97 return $character;
98 }
99 $ordinalValue = $this->charsetConversion->utf8CharToUnumber($character);
100 // Check for alphanumeric characters
101 $hex = $this->getHexForNonAlphanumeric($ordinalValue);
102 if ($hex === null) {
103 return $character;
104 }
105 // Encode up to 256 with \\xHH
106 if ($ordinalValue < 256) {
107 $pad = substr('00', strlen($hex));
108 return '\\x' . $pad . strtoupper($hex);
109 }
110 // Otherwise encode with \\uHHHH
111 $pad = substr('0000', strlen($hex));
112 return '\\u' . $pad . strtoupper($hex);
113 }
114
115 /**
116 * Checks if the given character is one of the immune characters
117 *
118 * @param string $character utf-8 character to search for, must not be empty
119 * @return bool TRUE if character is immune, FALSE otherwise
120 */
121 protected function isImmuneCharacter($character)
122 {
123 return in_array($character, $this->immuneCharacters, true);
124 }
125
126 /**
127 * Returns the ordinal value as a hex string of any character that is not a
128 * single-byte alphanumeric. The character should be supplied as a string in
129 * the utf-8 character encoding.
130 * If the character is an alphanumeric character with ordinal value below 255,
131 * then this method will return NULL.
132 *
133 * @param int $ordinalValue Ordinal value of the character
134 * @return string hexadecimal ordinal value of non-alphanumeric characters or NULL otherwise.
135 */
136 protected function getHexForNonAlphanumeric($ordinalValue)
137 {
138 if ($ordinalValue <= 255) {
139 return $this->hexMatrix[$ordinalValue];
140 }
141 return dechex($ordinalValue);
142 }
143 }