[!!!][TASK] Removed deprecated code from HtmlParser
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Tests / Unit / Html / HtmlParserTest.php
1 <?php
2 namespace TYPO3\CMS\Core\Tests\Unit\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Core\Html\HtmlParser;
18
19 /**
20 * Testcase for \TYPO3\CMS\Core\Html\HtmlParser
21 */
22 class HtmlParserTest extends \TYPO3\CMS\Core\Tests\UnitTestCase
23 {
24 /**
25 * @var \TYPO3\CMS\Core\Html\HtmlParser
26 */
27 protected $subject = null;
28
29 protected function setUp()
30 {
31 $this->subject = new HtmlParser();
32 }
33
34 /**
35 * @return array
36 */
37 public function cDataWillRemainUnmodifiedDataProvider()
38 {
39 return array(
40 'single-line CDATA' => array(
41 '/*<![CDATA[*/ <hello world> /*]]>*/',
42 '/*<![CDATA[*/ <hello world> /*]]>*/',
43 ),
44 'multi-line CDATA #1' => array(
45 '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
46 '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
47 ),
48 'multi-line CDATA #2' => array(
49 '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
50 '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
51 ),
52 'multi-line CDATA #3' => array(
53 '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
54 '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
55 ),
56 );
57 }
58
59 /**
60 * Data provider for splitIntoBlock
61 *
62 * @return array
63 */
64 public function splitIntoBlockDataProvider()
65 {
66 return array(
67 'splitBlock' => array(
68 'h1,span',
69 '<body><h1>Title</h1><span>Note</span></body>',
70 false,
71 array('<body>',
72 '<h1>Title</h1>',
73 '',
74 '<span>Note</span>',
75 '</body>')
76 ),
77 'splitBlock br' => array(
78 'h1,span',
79 '<body><h1>Title</h1><br /><span>Note</span><br /></body>',
80 false,
81 array('<body>',
82 '<h1>Title</h1>',
83 '<br />',
84 '<span>Note</span>',
85 '<br /></body>')
86 ),
87 'splitBlock with attribute' => array(
88 'h1,span',
89 '<body><h1 class="title">Title</h1><span>Note</span></body>',
90 false,
91 array('<body>',
92 '<h1 class="title">Title</h1>',
93 '',
94 '<span>Note</span>',
95 '</body>')
96 ),
97 'splitBlock span with attribute' => array(
98 'span',
99 '<body><h1>Title</h1><span class="title">Note</span></body>',
100 false,
101 array('<body><h1>Title</h1>',
102 '<span class="title">Note</span>',
103 '</body>')
104 ),
105 'splitBlock without extra end tags' => array(
106 'h1,span,div',
107 '<body><h1>Title</h1><span>Note</span></body></div>',
108 true,
109 array('<body>',
110 '<h1>Title</h1>',
111 '',
112 '<span>Note</span>',
113 '</body>')
114 ),
115 );
116 }
117
118 /**
119 * @test
120 * @param string $tag List of tags, comma separated.
121 * @param string $content HTML-content
122 * @param bool $eliminateExtraEndTags If set, excessive end tags are ignored - you should probably set this in most cases.
123 * @param array $expected The expected result
124 * @dataProvider splitIntoBlockDataProvider
125 */
126 public function splitIntoBlock($tag, $content, $eliminateExtraEndTags, $expected)
127 {
128 $this->assertSame($expected, $this->subject->splitIntoBlock($tag, $content, $eliminateExtraEndTags));
129 }
130
131 /**
132 * @test
133 * @param string $source
134 * @param string $expected
135 * @dataProvider cDataWillRemainUnmodifiedDataProvider
136 */
137 public function xHtmlCleaningDoesNotModifyCDATA($source, $expected)
138 {
139 $result = $this->subject->HTMLcleaner($source, array(), 1);
140 $this->assertSame($expected, $result);
141 }
142
143 /**
144 * Data provider for spanTagCorrectlyRemovedWhenRmTagIfNoAttribIsConfigured
145 */
146 public static function spanTagCorrectlyRemovedWhenRmTagIfNoAttribIsConfiguredDataProvider()
147 {
148 return array(
149 'Span tag with no attrib' => array(
150 '<span>text</span>',
151 'text'
152 ),
153 'Span tag with allowed id attrib' => array(
154 '<span id="id">text</span>',
155 '<span id="id">text</span>'
156 ),
157 'Span tag with disallowed style attrib' => array(
158 '<span style="line-height: 12px;">text</span>',
159 'text'
160 )
161 );
162 }
163
164 /**
165 * @test
166 * @param string $content
167 * @param string $expectedResult
168 * @dataProvider spanTagCorrectlyRemovedWhenRmTagIfNoAttribIsConfiguredDataProvider
169 */
170 public function tagCorrectlyRemovedWhenRmTagIfNoAttribIsConfigured($content, $expectedResult)
171 {
172 $tsConfig = array(
173 'allowTags' => 'span',
174 'tags.' => array(
175 'span.' => array(
176 'allowedAttribs' => 'id',
177 'rmTagIfNoAttrib' => 1
178 )
179 )
180 );
181 $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
182 }
183
184 /**
185 * @test
186 */
187 public function rmTagIfNoAttribIsConfiguredDoesNotChangeNestingType()
188 {
189 $tsConfig = array(
190 'allowTags' => 'div,span',
191 'rmTagIfNoAttrib' => 'span',
192 'globalNesting' => 'div,span'
193 );
194 $content = '<span></span><span id="test"><div></span></div>';
195 $expectedResult = '<span id="test"></span>';
196 $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
197 }
198
199 /**
200 * Data provider for localNestingCorrectlyRemovesInvalidTags
201 *
202 * @return array
203 */
204 public static function localNestingCorrectlyRemovesInvalidTagsDataProvider()
205 {
206 return array(
207 'Valid nesting is untouched' => array(
208 '<B><I></B></I>',
209 '<B><I></B></I>'
210 ),
211 'Valid nesting with content is untouched' => array(
212 'testa<B>test1<I>test2</B>test3</I>testb',
213 'testa<B>test1<I>test2</B>test3</I>testb'
214 ),
215 'Superflous tags are removed' => array(
216 '</B><B><I></B></I></B>',
217 '<B><I></B></I>'
218 ),
219 'Superflous tags with content are removed' => array(
220 'test1</B>test2<B>test3<I>test4</B>test5</I>test6</B>test7',
221 'test1test2<B>test3<I>test4</B>test5</I>test6test7'
222 ),
223 'Another valid nesting test' => array(
224 '<span><div></span></div>',
225 '<span><div></span></div>',
226 ),
227 );
228 }
229
230 /**
231 * @test
232 * @dataProvider localNestingCorrectlyRemovesInvalidTagsDataProvider
233 * @param string $content
234 * @param string $expectedResult
235 */
236 public function localNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
237 {
238 $tsConfig = array(
239 'allowTags' => 'div,span,b,i',
240 'localNesting' => 'div,span,b,i',
241 );
242 $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
243 }
244
245 /**
246 * Data provider for globalNestingCorrectlyRemovesInvalidTags
247 *
248 * @return array
249 */
250 public static function globalNestingCorrectlyRemovesInvalidTagsDataProvider()
251 {
252 return array(
253 'Valid nesting is untouched' => array(
254 '<B><I></I></B>',
255 '<B><I></I></B>'
256 ),
257 'Valid nesting with content is untouched' => array(
258 'testa<B>test1<I>test2</I>test3</B>testb',
259 'testa<B>test1<I>test2</I>test3</B>testb'
260 ),
261 'Invalid nesting is cleaned' => array(
262 '</B><B><I></B></I></B>',
263 '<B></B>'
264 ),
265 'Invalid nesting with content is cleaned' => array(
266 'test1</B>test2<B>test3<I>test4</B>test5</I>test6</B>test7',
267 'test1test2<B>test3test4</B>test5test6test7'
268 ),
269 'Another invalid nesting test' => array(
270 '<span><div></span></div>',
271 '<span></span>',
272 ),
273 );
274 }
275
276 /**
277 * @test
278 * @dataProvider globalNestingCorrectlyRemovesInvalidTagsDataProvider
279 * @param string $content
280 * @param string $expectedResult
281 */
282 public function globalNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
283 {
284 $tsConfig = array(
285 'allowTags' => 'span,div,b,i',
286 'globalNesting' => 'span,div,b,i',
287 );
288 $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
289 }
290
291 /**
292 * @return array
293 */
294 public function emptyTagsDataProvider()
295 {
296 return array(
297 array(0 , null, false, '<h1></h1>', '<h1></h1>'),
298 array(1 , null, false, '<h1></h1>', ''),
299 array(1 , null, false, '<h1>hallo</h1>', '<h1>hallo</h1>'),
300 array(1 , null, false, '<h1 class="something"></h1>', ''),
301 array(1 , null, false, '<h1 class="something"></h1><h2></h2>', ''),
302 array(1 , 'h2', false, '<h1 class="something"></h1><h2></h2>', '<h1 class="something"></h1>'),
303 array(1 , 'h2, h1', false, '<h1 class="something"></h1><h2></h2>', ''),
304 array(1 , null, false, '<div><p></p></div>', ''),
305 array(1 , null, false, '<div><p>&nbsp;</p></div>', '<div><p>&nbsp;</p></div>'),
306 array(1 , null, true, '<div><p>&nbsp;&nbsp;</p></div>', ''),
307 array(1 , null, true, '<div>&nbsp;&nbsp;<p></p></div>', ''),
308 array(1 , null, false, '<div>Some content<p></p></div>', '<div>Some content</div>'),
309 array(1 , null, true, '<div>Some content<p></p></div>', '<div>Some content</div>'),
310 array(1 , null, false, '<div>Some content</div>', '<div>Some content</div>'),
311 array(1 , null, true, '<div>Some content</div>', '<div>Some content</div>'),
312 array(1 , null, false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'),
313 array(1 , null, true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'),
314 );
315 }
316
317 /**
318 * @test
319 * @dataProvider emptyTagsDataProvider
320 * @param bool $stripOn TRUE if stripping should be activated.
321 * @param string $tagList Comma seperated list of tags that should be stripped.
322 * @param bool $treatNonBreakingSpaceAsEmpty If TRUE &nbsp; will be considered empty.
323 * @param string $content The HTML code that should be modified.
324 * @param string $expectedResult The expected HTML code result.
325 */
326 public function stripEmptyTags($stripOn, $tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
327 {
328 $tsConfig = array(
329 'keepNonMatchedTags' => 1,
330 'stripEmptyTags' => $stripOn,
331 'stripEmptyTags.' => array(
332 'tags' => $tagList,
333 'treatNonBreakingSpaceAsEmpty' => $treatNonBreakingSpaceAsEmpty
334 ),
335 );
336
337 $result = $this->parseConfigAndCleanHtml($tsConfig, $content);
338 $this->assertEquals($expectedResult, $result);
339 }
340
341 /**
342 * Calls HTMLparserConfig() and passes the generated config to the HTMLcleaner() method on the current subject.
343 *
344 * @param array $tsConfig The TypoScript that should be used to generate the HTML parser config.
345 * @param string $content The content that should be parsed by the HTMLcleaner.
346 * @return string The parsed content.
347 */
348 protected function parseConfigAndCleanHtml(array $tsConfig, $content)
349 {
350 $config = $this->subject->HTMLparserConfig($tsConfig);
351 return $this->subject->HTMLcleaner($content, $config[0], $config[1], $config[2], $config[3]);
352 }
353
354 /**
355 * Data provider for getFirstTag
356 *
357 * @return array
358 */
359 public function getFirstTagDataProvider()
360 {
361 return array(
362 array('<body><span></span></body>', '<body>'),
363 array('<span>Wrapper<div>Some content</div></span>', '<span>'),
364 array('Something before<span>Wrapper<div>Some content</div></span>Something after', 'Something before<span>'),
365 array('Something without tag', '')
366 );
367 }
368
369 /**
370 * Returns the first tag in $str
371 * Actually everything from the beginning of the $str is returned, so you better make sure the tag is the first thing...
372 *
373 * @test
374 * @dataProvider getFirstTagDataProvider
375 *
376 * @param string $str HTML string with tags
377 * @param string $expected The expected result.
378 */
379 public function getFirstTag($str, $expected)
380 {
381 $this->assertEquals($expected, $this->subject->getFirstTag($str));
382 }
383
384 /**
385 * Data provider for getFirstTagName
386 *
387 * @return array
388 */
389 public function getFirstTagNameDataProvider()
390 {
391 return array(
392 array('<body><span></span></body>',
393 false,
394 'BODY'),
395 array('<body><span></span></body>',
396 true,
397 'body'),
398 array('<div class="test"><span></span></div>',
399 false,
400 'DIV'),
401 array('<div><span class="test"></span></div>',
402 false,
403 'DIV'),
404 array('<br /><span class="test"></span>',
405 false,
406 'BR'),
407 array('<img src="test.jpg" />',
408 false,
409 'IMG'),
410 );
411 }
412
413 /**
414 * Returns the NAME of the first tag in $str
415 *
416 * @test
417 * @dataProvider getFirstTagNameDataProvider
418 *
419 * @param string $str HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do)
420 * @param bool $preserveCase If set, then the tag is NOT converted to uppercase by case is preserved.
421 * @param string $expected The expected result.
422 */
423 public function getFirstTagName($str, $preserveCase, $expected)
424 {
425 $this->assertEquals($expected, $this->subject->getFirstTagName($str, $preserveCase));
426 }
427
428 /**
429 * @return array
430 */
431 public function removeFirstAndLastTagDataProvider()
432 {
433 return array(
434 array('<span>Wrapper<div>Some content</div></span>', 'Wrapper<div>Some content</div>'),
435 array('<td><tr>Some content</tr></td>', '<tr>Some content</tr>'),
436 array('Something before<span>Wrapper<div>Some content</div></span>Something after', 'Wrapper<div>Some content</div>'),
437 array('<span class="hidden">Wrapper<div>Some content</div></span>', 'Wrapper<div>Some content</div>'),
438 array('<span>Wrapper<div class="hidden">Some content</div></span>', 'Wrapper<div class="hidden">Some content</div>'),
439 );
440 }
441
442 /**
443 * Removes the first and last tag in the string
444 * Anything before the first and after the last tags respectively is also removed
445 *
446 * @test
447 * @dataProvider removeFirstAndLastTagDataProvider
448 * @param string $str String to process
449 * @param string $expectedResult
450 */
451 public function removeFirstAndLastTag($str, $expectedResult)
452 {
453 $this->assertEquals($expectedResult, $this->subject->removeFirstAndLastTag($str));
454 }
455 }