[!!!][TASK] Extract testing framework for TYPO3
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Tests / Unit / Html / HtmlParserTest.php
1 <?php
2 namespace TYPO3\CMS\Core\Tests\Unit\Html;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Core\Html\HtmlParser;
18
19 /**
20 * Testcase for \TYPO3\CMS\Core\Html\HtmlParser
21 */
22 class HtmlParserTest extends \TYPO3\CMS\Components\TestingFramework\Core\UnitTestCase
23 {
24 /**
25 * @var \TYPO3\CMS\Core\Html\HtmlParser
26 */
27 protected $subject = null;
28
29 protected function setUp()
30 {
31 $this->subject = new HtmlParser();
32 }
33
34 /**
35 * @return array
36 */
37 public function cDataWillRemainUnmodifiedDataProvider()
38 {
39 return [
40 'single-line CDATA' => [
41 '/*<![CDATA[*/ <hello world> /*]]>*/',
42 '/*<![CDATA[*/ <hello world> /*]]>*/',
43 ],
44 'multi-line CDATA #1' => [
45 '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
46 '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
47 ],
48 'multi-line CDATA #2' => [
49 '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
50 '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
51 ],
52 'multi-line CDATA #3' => [
53 '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
54 '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
55 ],
56 ];
57 }
58
59 /**
60 * Data provider for splitIntoBlock
61 *
62 * @return array
63 */
64 public function splitIntoBlockDataProvider()
65 {
66 return [
67 'splitBlock' => [
68 'h1,span',
69 '<body><h1>Title</h1><span>Note</span></body>',
70 false,
71 ['<body>',
72 '<h1>Title</h1>',
73 '',
74 '<span>Note</span>',
75 '</body>']
76 ],
77 'splitBlock br' => [
78 'h1,span',
79 '<body><h1>Title</h1><br /><span>Note</span><br /></body>',
80 false,
81 ['<body>',
82 '<h1>Title</h1>',
83 '<br />',
84 '<span>Note</span>',
85 '<br /></body>']
86 ],
87 'splitBlock with attribute' => [
88 'h1,span',
89 '<body><h1 class="title">Title</h1><span>Note</span></body>',
90 false,
91 ['<body>',
92 '<h1 class="title">Title</h1>',
93 '',
94 '<span>Note</span>',
95 '</body>']
96 ],
97 'splitBlock span with attribute' => [
98 'span',
99 '<body><h1>Title</h1><span class="title">Note</span></body>',
100 false,
101 ['<body><h1>Title</h1>',
102 '<span class="title">Note</span>',
103 '</body>']
104 ],
105 'splitBlock without extra end tags' => [
106 'h1,span,div',
107 '<body><h1>Title</h1><span>Note</span></body></div>',
108 true,
109 ['<body>',
110 '<h1>Title</h1>',
111 '',
112 '<span>Note</span>',
113 '</body>']
114 ],
115 ];
116 }
117
118 /**
119 * @test
120 * @param string $tag List of tags, comma separated.
121 * @param string $content HTML-content
122 * @param bool $eliminateExtraEndTags If set, excessive end tags are ignored - you should probably set this in most cases.
123 * @param array $expected The expected result
124 * @dataProvider splitIntoBlockDataProvider
125 */
126 public function splitIntoBlock($tag, $content, $eliminateExtraEndTags, $expected)
127 {
128 $this->assertSame($expected, $this->subject->splitIntoBlock($tag, $content, $eliminateExtraEndTags));
129 }
130
131 /**
132 * @test
133 * @param string $source
134 * @param string $expected
135 * @dataProvider cDataWillRemainUnmodifiedDataProvider
136 */
137 public function xHtmlCleaningDoesNotModifyCDATA($source, $expected)
138 {
139 $result = $this->subject->HTMLcleaner($source, [], 1);
140 $this->assertSame($expected, $result);
141 }
142
143 /**
144 * Data provider for spanTagCorrectlyRemovedWhenRmTagIfNoAttribIsConfigured
145 */
146 public static function spanTagCorrectlyRemovedWhenRmTagIfNoAttribIsConfiguredDataProvider()
147 {
148 return [
149 'Span tag with no attrib' => [
150 '<span>text</span>',
151 'text'
152 ],
153 'Span tag with allowed id attrib' => [
154 '<span id="id">text</span>',
155 '<span id="id">text</span>'
156 ],
157 'Span tag with disallowed style attrib' => [
158 '<span style="line-height: 12px;">text</span>',
159 'text'
160 ]
161 ];
162 }
163
164 /**
165 * @test
166 * @param string $content
167 * @param string $expectedResult
168 * @dataProvider spanTagCorrectlyRemovedWhenRmTagIfNoAttribIsConfiguredDataProvider
169 */
170 public function tagCorrectlyRemovedWhenRmTagIfNoAttribIsConfigured($content, $expectedResult)
171 {
172 $tsConfig = [
173 'allowTags' => 'span',
174 'tags.' => [
175 'span.' => [
176 'allowedAttribs' => 'id',
177 'rmTagIfNoAttrib' => 1
178 ]
179 ]
180 ];
181 $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
182 }
183
184 /**
185 * @test
186 */
187 public function rmTagIfNoAttribIsConfiguredDoesNotChangeNestingType()
188 {
189 $tsConfig = [
190 'allowTags' => 'div,span',
191 'rmTagIfNoAttrib' => 'span',
192 'globalNesting' => 'div,span'
193 ];
194 $content = '<span></span><span id="test"><div></span></div>';
195 $expectedResult = '<span id="test"></span>';
196 $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
197 }
198
199 /**
200 * Data provider for localNestingCorrectlyRemovesInvalidTags
201 *
202 * @return array
203 */
204 public static function localNestingCorrectlyRemovesInvalidTagsDataProvider()
205 {
206 return [
207 'Valid nesting is untouched' => [
208 '<B><I></B></I>',
209 '<B><I></B></I>'
210 ],
211 'Valid nesting with content is untouched' => [
212 'testa<B>test1<I>test2</B>test3</I>testb',
213 'testa<B>test1<I>test2</B>test3</I>testb'
214 ],
215 'Superflous tags are removed' => [
216 '</B><B><I></B></I></B>',
217 '<B><I></B></I>'
218 ],
219 'Superflous tags with content are removed' => [
220 'test1</B>test2<B>test3<I>test4</B>test5</I>test6</B>test7',
221 'test1test2<B>test3<I>test4</B>test5</I>test6test7'
222 ],
223 'Another valid nesting test' => [
224 '<span><div></span></div>',
225 '<span><div></span></div>',
226 ],
227 ];
228 }
229
230 /**
231 * @test
232 * @dataProvider localNestingCorrectlyRemovesInvalidTagsDataProvider
233 * @param string $content
234 * @param string $expectedResult
235 */
236 public function localNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
237 {
238 $tsConfig = [
239 'allowTags' => 'div,span,b,i',
240 'localNesting' => 'div,span,b,i',
241 ];
242 $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
243 }
244
245 /**
246 * Data provider for globalNestingCorrectlyRemovesInvalidTags
247 *
248 * @return array
249 */
250 public static function globalNestingCorrectlyRemovesInvalidTagsDataProvider()
251 {
252 return [
253 'Valid nesting is untouched' => [
254 '<B><I></I></B>',
255 '<B><I></I></B>'
256 ],
257 'Valid nesting with content is untouched' => [
258 'testa<B>test1<I>test2</I>test3</B>testb',
259 'testa<B>test1<I>test2</I>test3</B>testb'
260 ],
261 'Invalid nesting is cleaned' => [
262 '</B><B><I></B></I></B>',
263 '<B></B>'
264 ],
265 'Invalid nesting with content is cleaned' => [
266 'test1</B>test2<B>test3<I>test4</B>test5</I>test6</B>test7',
267 'test1test2<B>test3test4</B>test5test6test7'
268 ],
269 'Another invalid nesting test' => [
270 '<span><div></span></div>',
271 '<span></span>',
272 ],
273 ];
274 }
275
276 /**
277 * @test
278 * @dataProvider globalNestingCorrectlyRemovesInvalidTagsDataProvider
279 * @param string $content
280 * @param string $expectedResult
281 */
282 public function globalNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
283 {
284 $tsConfig = [
285 'allowTags' => 'span,div,b,i',
286 'globalNesting' => 'span,div,b,i',
287 ];
288 $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
289 }
290
291 /**
292 * @return array
293 */
294 public function emptyTagsDataProvider()
295 {
296 return [
297 [0, null, false, '<h1></h1>', '<h1></h1>'],
298 [1, null, false, '<h1></h1>', ''],
299 [1, null, false, '<h1>hallo</h1>', '<h1>hallo</h1>'],
300 [1, null, false, '<h1 class="something"></h1>', ''],
301 [1, null, false, '<h1 class="something"></h1><h2></h2>', ''],
302 [1, 'h2', false, '<h1 class="something"></h1><h2></h2>', '<h1 class="something"></h1>'],
303 [1, 'h2, h1', false, '<h1 class="something"></h1><h2></h2>', ''],
304 [1, null, false, '<div><p></p></div>', ''],
305 [1, null, false, '<div><p>&nbsp;</p></div>', '<div><p>&nbsp;</p></div>'],
306 [1, null, true, '<div><p>&nbsp;&nbsp;</p></div>', ''],
307 [1, null, true, '<div>&nbsp;&nbsp;<p></p></div>', ''],
308 [1, null, false, '<div>Some content<p></p></div>', '<div>Some content</div>'],
309 [1, null, true, '<div>Some content<p></p></div>', '<div>Some content</div>'],
310 [1, null, false, '<div>Some content</div>', '<div>Some content</div>'],
311 [1, null, true, '<div>Some content</div>', '<div>Some content</div>'],
312 [1, null, false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
313 [1, null, true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
314 [0, '', false, '<h1></h1>', '<h1></h1>'],
315 [1, '', false, '<h1></h1>', ''],
316 [1, '', false, '<h1>hallo</h1>', '<h1>hallo</h1>'],
317 [1, '', false, '<h1 class="something"></h1>', ''],
318 [1, '', false, '<h1 class="something"></h1><h2></h2>', ''],
319 [1, '', false, '<div><p></p></div>', ''],
320 [1, '', false, '<div><p>&nbsp;</p></div>', '<div><p>&nbsp;</p></div>'],
321 [1, '', true, '<div><p>&nbsp;&nbsp;</p></div>', ''],
322 [1, '', true, '<div>&nbsp;&nbsp;<p></p></div>', ''],
323 [1, '', false, '<div>Some content<p></p></div>', '<div>Some content</div>'],
324 [1, '', true, '<div>Some content<p></p></div>', '<div>Some content</div>'],
325 [1, '', false, '<div>Some content</div>', '<div>Some content</div>'],
326 [1, '', true, '<div>Some content</div>', '<div>Some content</div>'],
327 [1, '', false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
328 [1, '', true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
329 ];
330 }
331
332 /**
333 * @test
334 * @dataProvider emptyTagsDataProvider
335 * @param bool $stripOn TRUE if stripping should be activated.
336 * @param string $tagList Comma separated list of tags that should be stripped.
337 * @param bool $treatNonBreakingSpaceAsEmpty If TRUE &nbsp; will be considered empty.
338 * @param string $content The HTML code that should be modified.
339 * @param string $expectedResult The expected HTML code result.
340 */
341 public function stripEmptyTags($stripOn, $tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
342 {
343 $tsConfig = [
344 'keepNonMatchedTags' => 1,
345 'stripEmptyTags' => $stripOn,
346 'stripEmptyTags.' => [
347 'tags' => $tagList,
348 'treatNonBreakingSpaceAsEmpty' => $treatNonBreakingSpaceAsEmpty
349 ],
350 ];
351
352 $result = $this->parseConfigAndCleanHtml($tsConfig, $content);
353 $this->assertEquals($expectedResult, $result);
354 }
355
356 /**
357 * @return array
358 */
359 public function stripEmptyTagsKeepsConfiguredTagsDataProvider()
360 {
361 return [
362 [
363 'tr,td',
364 false,
365 '<div><p><tr><td></td></tr></p></div><div class="test"></div><tr></tr><p></p><td></td><i></i>',
366 '<div><p><tr><td></td></tr></p></div><tr></tr><td></td>'
367 ],
368 [
369 'tr,td',
370 true,
371 '<div><p><tr><td></td></tr></p></div><p class="test"> &nbsp; </p><tr></tr><p></p><td></td><i></i>',
372 '<div><p><tr><td></td></tr></p></div><tr></tr><td></td>'
373 ],
374 ];
375 }
376
377 /**
378 * @test
379 * @dataProvider stripEmptyTagsKeepsConfiguredTagsDataProvider
380 * @param string $tagList List of tags that should be kept, event if they are empty.
381 * @param bool $treatNonBreakingSpaceAsEmpty If true &nbsp; will be considered empty.
382 * @param string $content The HTML content that should be parsed.
383 * @param string $expectedResult The expected HTML code result.
384 */
385 public function stripEmptyTagsKeepsConfiguredTags($tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
386 {
387 $tsConfig = [
388 'keepNonMatchedTags' => 1,
389 'stripEmptyTags' => 1,
390 'stripEmptyTags.' => [
391 'keepTags' => $tagList,
392 'treatNonBreakingSpaceAsEmpty' => $treatNonBreakingSpaceAsEmpty
393 ],
394 ];
395
396 $result = $this->parseConfigAndCleanHtml($tsConfig, $content);
397 $this->assertEquals($expectedResult, $result);
398 }
399
400 /**
401 * Calls HTMLparserConfig() and passes the generated config to the HTMLcleaner() method on the current subject.
402 *
403 * @param array $tsConfig The TypoScript that should be used to generate the HTML parser config.
404 * @param string $content The content that should be parsed by the HTMLcleaner.
405 * @return string The parsed content.
406 */
407 protected function parseConfigAndCleanHtml(array $tsConfig, $content)
408 {
409 $config = $this->subject->HTMLparserConfig($tsConfig);
410 return $this->subject->HTMLcleaner($content, $config[0], $config[1], $config[2], $config[3]);
411 }
412
413 /**
414 * Data provider for getFirstTag
415 *
416 * @return array
417 */
418 public function getFirstTagDataProvider()
419 {
420 return [
421 ['<body><span></span></body>', '<body>'],
422 ['<span>Wrapper<div>Some content</div></span>', '<span>'],
423 ['Something before<span>Wrapper<div>Some content</div></span>Something after', 'Something before<span>'],
424 ['Something without tag', '']
425 ];
426 }
427
428 /**
429 * Returns the first tag in $str
430 * Actually everything from the beginning of the $str is returned, so you better make sure the tag is the first thing...
431 *
432 * @test
433 * @dataProvider getFirstTagDataProvider
434 *
435 * @param string $str HTML string with tags
436 * @param string $expected The expected result.
437 */
438 public function getFirstTag($str, $expected)
439 {
440 $this->assertEquals($expected, $this->subject->getFirstTag($str));
441 }
442
443 /**
444 * Data provider for getFirstTagName
445 *
446 * @return array
447 */
448 public function getFirstTagNameDataProvider()
449 {
450 return [
451 ['<body><span></span></body>',
452 false,
453 'BODY'],
454 ['<body><span></span></body>',
455 true,
456 'body'],
457 ['<div class="test"><span></span></div>',
458 false,
459 'DIV'],
460 ['<div><span class="test"></span></div>',
461 false,
462 'DIV'],
463 ['<br /><span class="test"></span>',
464 false,
465 'BR'],
466 ['<img src="test.jpg" />',
467 false,
468 'IMG'],
469 ];
470 }
471
472 /**
473 * Returns the NAME of the first tag in $str
474 *
475 * @test
476 * @dataProvider getFirstTagNameDataProvider
477 *
478 * @param string $str HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do)
479 * @param bool $preserveCase If set, then the tag is NOT converted to uppercase by case is preserved.
480 * @param string $expected The expected result.
481 */
482 public function getFirstTagName($str, $preserveCase, $expected)
483 {
484 $this->assertEquals($expected, $this->subject->getFirstTagName($str, $preserveCase));
485 }
486
487 /**
488 * @return array
489 */
490 public function removeFirstAndLastTagDataProvider()
491 {
492 return [
493 ['<span>Wrapper<div>Some content</div></span>', 'Wrapper<div>Some content</div>'],
494 ['<td><tr>Some content</tr></td>', '<tr>Some content</tr>'],
495 ['Something before<span>Wrapper<div>Some content</div></span>Something after', 'Wrapper<div>Some content</div>'],
496 ['<span class="hidden">Wrapper<div>Some content</div></span>', 'Wrapper<div>Some content</div>'],
497 ['<span>Wrapper<div class="hidden">Some content</div></span>', 'Wrapper<div class="hidden">Some content</div>'],
498 ['Some stuff before <span>Wrapper<div class="hidden">Some content</div></span> and after', 'Wrapper<div class="hidden">Some content</div>'],
499 ];
500 }
501
502 /**
503 * Removes the first and last tag in the string
504 * Anything before the first and after the last tags respectively is also removed
505 *
506 * @test
507 * @dataProvider removeFirstAndLastTagDataProvider
508 * @param string $str String to process
509 * @param string $expectedResult
510 */
511 public function removeFirstAndLastTag($str, $expectedResult)
512 {
513 $this->assertEquals($expectedResult, $this->subject->removeFirstAndLastTag($str));
514 }
515
516 /**
517 * @return array
518 */
519 public function getTagAttributesDataProvider()
520 {
521 return [
522 [
523 '<a href="" data-shortCut="DXB" required>',
524 [
525 ['href' => '', 'data-shortcut' => 'DXB', 'required' => ''],
526 ['href' => ['origTag' => 'href', 'dashType' => '"'], 'data-shortcut' => ['origTag' => 'data-shortCut', 'dashType' => '"'], 'required' => ['origTag' => 'required']]
527 ]
528 ],
529 [
530 '<ul STYLE=\'background-image: (url: "fra.png")\' data-shortcut=FRA>',
531 [
532 ['style' => 'background-image: (url: "fra.png")', 'data-shortcut' => 'FRA'],
533 ['style' => ['origTag' => 'STYLE', 'dashType' => '\''], 'data-shortcut' => ['origTag' => 'data-shortcut', 'dashType' => '']]
534 ]
535 ]
536
537 ];
538 }
539
540 /**
541 * Returns an array with all attributes and its meta information from a tag.
542 * Removes tag-name if found
543 *
544 * @test
545 * @dataProvider getTagAttributesDataProvider
546 * @param string $tag String to process
547 * @param array $expectedResult
548 */
549 public function getTagAttributes($tag, $expectedResult)
550 {
551 $this->assertEquals($expectedResult, $this->subject->get_tag_attributes($tag));
552 }
553
554 /**
555 * @return array
556 */
557 public function stripEmptyTagsDataProvider()
558 {
559 return [
560 // Testing wrongly encapsulated and upper/lowercase tags
561 [
562 '<div>Denpassar</div><p> Bali</P><p></p><P></p><ul><li></li></ul>',
563 '',
564 false,
565 '<div>Denpassar</div><p> Bali</P>'
566 ],
567 // Testing incomplete tags
568 [
569 '<p><div>Klungklung</div></p><p> Semarapura<p></p><p></p><ul><li></li></ul>',
570 '',
571 false,
572 '<p><div>Klungklung</div></p><p> Semarapura'
573 ],
574 // Testing third parameter (break spaces
575 [
576 '<p><div>Badung</div></p><ul> Mangupura<p></p><p></p><ul><li>&nbsp;</li><li>Uluwatu</li></ul>',
577 '',
578 true,
579 '<p><div>Badung</div></p><ul> Mangupura<ul><li>Uluwatu</li></ul>'
580 ],
581 // Testing fourth parameter (keeping empty other tags, keeping defined used tags)
582 [
583 '<p><div>Badung</div></p><ul> Mangupura<p></p><p></p><ul><li></li></ul>',
584 'p,div',
585 true,
586 '<p><div>Badung</div></p><ul> Mangupura<ul><li></li></ul>'
587 ],
588
589 ];
590 }
591
592 /**
593 * Strips empty tags from HTML.
594 *
595 * @test
596 * @dataProvider stripEmptyTagsDataProvider
597 * @param string $content The content to be stripped of empty tags
598 * @param string $tagList The comma separated list of tags to be stripped.
599 * If empty, all empty tags will be stripped
600 * @param bool $treatNonBreakingSpaceAsEmpty If TRUE tags containing only &nbsp; entities will be treated as empty.
601 * @param string $expectedResult
602 */
603 public function rawStripEmptyTagsTest($content, $tagList, $treatNonBreakingSpaceAsEmpty, $expectedResult)
604 {
605 $this->assertEquals($expectedResult, $this->subject->stripEmptyTags($content, $tagList, $treatNonBreakingSpaceAsEmpty));
606 }
607 }