Added feature #7984: Besides stdWrap.crop there is now a function stdWrap.cropHTML...
authorJochen Rau <j.rau@web.de>
Mon, 31 Aug 2009 15:10:20 +0000 (15:10 +0000)
committerJochen Rau <j.rau@web.de>
Mon, 31 Aug 2009 15:10:20 +0000 (15:10 +0000)
git-svn-id: https://svn.typo3.org/TYPO3v4/Core/trunk@5857 709f56b5-9817-0410-a4d7-c38de5d9e867

ChangeLog
typo3/sysext/cms/tests/tslib/tslib_cObj_testcase.php [new file with mode: 0644]
typo3/sysext/cms/tslib/class.tslib_content.php

index f469f16..57e5e9a 100755 (executable)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2009-08-31  Jochen Rau <jochen.rau@typoplanet.de>
+
+       * Added feature #7984: Besides stdWrap.crop there is now a function stdWrap.cropHTML that is aware of HTML tags (same options)
+
 2009-08-31  Oliver Hader  <oliver@typo3.org>
 
        * Cleanup: Set SVN properties and tags
diff --git a/typo3/sysext/cms/tests/tslib/tslib_cObj_testcase.php b/typo3/sysext/cms/tests/tslib/tslib_cObj_testcase.php
new file mode 100644 (file)
index 0000000..a09a0bc
--- /dev/null
@@ -0,0 +1,238 @@
+<?php
+/***************************************************************
+*  Copyright notice
+*
+*  (c) 2009 Jochen Rau <jochen.rau@typoplanet.de>
+*  All rights reserved
+*
+*  This script is part of the TYPO3 project. The TYPO3 project is
+*  free software; you can redistribute it and/or modify
+*  it under the terms of the GNU General Public License as published by
+*  the Free Software Foundation; either version 2 of the License, or
+*  (at your option) any later version.
+*
+*  The GNU General Public License can be found at
+*  http://www.gnu.org/copyleft/gpl.html.
+*
+*  This script is distributed in the hope that it will be useful,
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*  GNU General Public License for more details.
+*
+*  This copyright notice MUST APPEAR in all copies of the script!
+***************************************************************/
+
+/**
+ * Testcase for class tslib_cObj
+ *
+ * @package TYPO3
+ * @subpackage cms
+ */
+class tslib_cObj_testcase extends tx_phpunit_testcase {
+       
+       /**
+        * Holds the backed up $GLOBASL array()
+        *
+        * @var array
+        **/
+       protected $backedUpGlobals;
+       
+       /**
+        * The content object
+        *
+        * @var tslib_cObj
+        **/
+       protected $cObj;
+       /**
+        * Sets up this test case
+        */
+       public function setUp() {
+               $this->backedUpGlobals = $GLOBALS;
+               $this->cObj = new tslib_cObj();
+               $GLOBALS['TSFE']->csConvObj = new t3lib_cs();
+               $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = 'mbstring';
+       }
+       
+       /**
+        * Tears down this test case
+        */
+       public function tearDown() {
+               $GLOBALS = $this->backedUpGlobals;
+       }
+       
+       /**
+        * This is the data provider for the tests of crop and cropHTML below. It provides all combinations
+        * of charset, text type, and configuration options to be tested.
+        *
+        * @return void
+        */
+       public function providerForCrop() {
+               $plainText = 'Kasper Skårhøj implemented the original version of the crop function.';
+               $textWithMarkup = '<strong><a href="mailto:kasper@typo3.org">Kasper Skårhøj</a> implemented</strong> the original version of the crop function.';
+               $textWithEntities = 'Kasper Sk&aring;rh&oslash;j implemented the original version of the crop function.';
+
+               $charsets = array();
+               $charsets[] = 'iso-8859-1';
+               $charsets[] = 'utf-8';
+               // Enable more charsets if necessary. This will slow down overall test execution time!
+               // $charsets[] = 'ascii';
+               // $charsets[] = 'big5';
+
+               $data = array();
+               foreach ($charsets as $charset) {
+                       $data = array_merge($data, array(
+                               $charset . ' plain text; 11|...' => array('11|...', $plainText, 'Kasper Skår...', $charset),
+                               $charset . ' plain text; -58|...' => array('-58|...', $plainText, '...høj implemented the original version of the crop function.', $charset),
+                               $charset . ' plain text; 20|...|1' => array('20|...|1', $plainText, 'Kasper Skårhøj...', $charset),
+                               $charset . ' plain text; -49|...|1' => array('-49|...|1', $plainText, '...the original version of the crop function.', $charset),
+                               $charset . ' text with markup; 11|...' => array('11|...', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper Skår...</a></strong>', $charset),
+                               $charset . ' text with markup; 13|...' => array('13|...', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper Skårhø...</a></strong>', $charset),
+                               $charset . ' text with markup; 14|...' => array('14|...', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper Skårhøj</a>...</strong>', $charset),
+                               $charset . ' text with markup; 15|...' => array('15|...', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper Skårhøj</a> ...</strong>', $charset),
+                               $charset . ' text with markup; 29|...' => array('29|...', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper Skårhøj</a> implemented</strong> th...', $charset),
+                               $charset . ' text with markup; -58|...' => array('-58|...', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">...høj</a> implemented</strong> the original version of the crop function.', $charset),
+                               $charset . ' text with markup; 11|...|1' => array('11|...|1', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper...</a></strong>', $charset),
+                               $charset . ' text with markup; 13|...|1' => array('13|...|1', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper...</a></strong>', $charset),
+                               $charset . ' text with markup; 14|...|1' => array('14|...|1', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper Skårhøj</a>...</strong>', $charset),
+                               $charset . ' text with markup; 15|...|1' => array('15|...|1', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper Skårhøj</a>...</strong>', $charset),
+                               $charset . ' text with markup; 29|...|1' => array('29|...|1', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">Kasper Skårhøj</a> implemented</strong>...', $charset),
+                               $charset . ' text with markup; -66|...|1' => array('-66|...|1', $textWithMarkup, '<strong><a href="mailto:kasper@typo3.org">...Skårhøj</a> implemented</strong> the original version of the crop function.', $charset),
+                               $charset . ' text with entities 9|...' => array('9|...', $textWithEntities, 'Kasper Sk...', $charset),
+                               $charset . ' text with entities 10|...' => array('10|...', $textWithEntities, 'Kasper Sk&aring;...', $charset),
+                               $charset . ' text with entities 11|...' => array('11|...', $textWithEntities, 'Kasper Sk&aring;r...', $charset),
+                               $charset . ' text with entities 13|...' => array('13|...', $textWithEntities, 'Kasper Sk&aring;rh&oslash;...', $charset),
+                               $charset . ' text with entities 14|...' => array('14|...', $textWithEntities, 'Kasper Sk&aring;rh&oslash;j...', $charset),
+                               $charset . ' text with entities 15|...' => array('15|...', $textWithEntities, 'Kasper Sk&aring;rh&oslash;j ...', $charset),
+                               $charset . ' text with entities 16|...' => array('16|...', $textWithEntities, 'Kasper Sk&aring;rh&oslash;j i...', $charset),
+                               $charset . ' text with entities -56|...' => array('-56|...', $textWithEntities, '...j implemented the original version of the crop function.', $charset),
+                               $charset . ' text with entities -57|...' => array('-57|...', $textWithEntities, '...&oslash;j implemented the original version of the crop function.', $charset),
+                               $charset . ' text with entities -58|...' => array('-58|...', $textWithEntities, '...h&oslash;j implemented the original version of the crop function.', $charset),
+                               $charset . ' text with entities 9|...|1' => array('9|...|1', $textWithEntities, 'Kasper...', $charset),
+                               $charset . ' text with entities 10|...|1' => array('10|...|1', $textWithEntities, 'Kasper...', $charset),
+                               $charset . ' text with entities 11|...|1' => array('11|...|1', $textWithEntities, 'Kasper...', $charset),
+                               $charset . ' text with entities 13|...|1' => array('13|...|1', $textWithEntities, 'Kasper...', $charset),
+                               $charset . ' text with entities 14|...|1' => array('14|...|1', $textWithEntities, 'Kasper Sk&aring;rh&oslash;j...', $charset),
+                               $charset . ' text with entities 15|...|1' => array('15|...|1', $textWithEntities, 'Kasper Sk&aring;rh&oslash;j...', $charset),
+                               $charset . ' text with entities 16|...|1' => array('16|...|1', $textWithEntities, 'Kasper Sk&aring;rh&oslash;j...', $charset),
+                               $charset . ' text with entities -56|...|1' => array('-56|...|1', $textWithEntities, '...implemented the original version of the crop function.', $charset),
+                               $charset . ' text with entities -57|...|1' => array('-57|...|1', $textWithEntities, '...implemented the original version of the crop function.', $charset),
+                               $charset . ' text with entities -58|...|1' => array('-58|...|1', $textWithEntities, '...implemented the original version of the crop function.', $charset)
+                               ));
+               }
+               return $data;           
+       }
+
+       /**
+        * Checks if stdWrap.cropHTML works with plain text cropping from left 
+        *
+        * @test
+     * @dataProvider providerForCrop
+        */
+       public function cropHtmlWorks($settings, $subject, $expected, $charset) {
+               $this->handleCharset($charset, $subject, $expected);
+               $this->assertEquals($expected, $this->cObj->cropHTML($subject, $settings), 'cropHTML failed with settings: "' . $settings . '" and charset "' . $charset . '"');
+       }
+       
+       /**
+        * Checks if stdWrap.cropHTML works with a complex content with many tags. Currently cropHTML 
+        * counts multiple invisible characters not as one (as the browser will output the content).
+        *
+        * @test
+        */
+       public function cropHtmlWorksWithComplexContent() {
+               $GLOBALS['TSFE']->renderCharset = 'iso-8859-1';
+               $subject = '
+<h1>Blog Example</h1>
+<hr>
+<div class="csc-header csc-header-n1">
+       <h2 class="csc-firstHeader">Welcome to Blog #1</h2>
+</div>
+<p class="bodytext">
+       A blog about TYPO3 extension development. In order to start blogging, read the <a href="#">Help section</a>. If you have any further questions, feel free to contact the administrator John Doe (<a href="mailto:john.doe@example.com">john.doe@example.com)</a>.
+</p>
+<div class="tx-blogexample-list-container">
+       <p class="bodytext">
+               Below are the most recent posts:
+       </p>
+       <ul>
+               <li>
+                       <h3>
+                               <a href="index.php?id=99&amp;tx_blogexample_pi1[post][uid]=211&amp;tx_blogexample_pi1[blog]=&amp;tx_blogexample_pi1[action]=show&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=003b0131ed">The Post #1</a>
+                       </h3>
+                       <p class="bodytext">
+                               Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut...
+                       </p>
+                       <p class="metadata">
+                               Published on 26.08.2009 by Jochen Rau
+                       </p>
+                       <p>
+                               Tags: [MVC]&nbsp;[Domain Driven Design]&nbsp;<br>
+                               <a href="index.php?id=99&amp;tx_blogexample_pi1[post][uid]=211&amp;tx_blogexample_pi1[action]=show&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=f982643bc3">read more &gt;&gt;</a><br>
+                               <a href="index.php?id=99&amp;tx_blogexample_pi1[post][uid]=211&amp;tx_blogexample_pi1[blog][uid]=70&amp;tx_blogexample_pi1[action]=edit&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=5b481bc8f0">Edit</a>&nbsp;<a href="index.php?id=99&amp;tx_blogexample_pi1[post][uid]=211&amp;tx_blogexample_pi1[blog][uid]=70&amp;tx_blogexample_pi1[action]=delete&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=4e52879656">Delete</a>
+                       </p>
+               </li>
+       </ul>
+       <p>
+               <a href="index.php?id=99&amp;tx_blogexample_pi1[blog][uid]=70&amp;tx_blogexample_pi1[action]=new&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=2718a4b1a0">Create a new Post</a>
+       </p>
+</div>
+<hr>
+<p>
+       © TYPO3 Association
+</p>
+';
+
+               $result = $this->cObj->cropHTML($subject, '300');
+               $expected = '
+<h1>Blog Example</h1>
+<hr>
+<div class="csc-header csc-header-n1">
+       <h2 class="csc-firstHeader">Welcome to Blog #1</h2>
+</div>
+<p class="bodytext">
+       A blog about TYPO3 extension development. In order to start blogging, read the <a href="#">Help section</a>. If you have any further questions, feel free to contact the administrator John Doe (<a href="mailto:john.doe@example.com">john.doe@example.com)</a>.
+</p>
+<div class="tx-blogexample-list-container">
+       <p class="bodytext">
+               Below are the most recent posts:
+       </p>
+       <ul>
+               <li>
+                       <h3>
+                               <a href="index.php?id=99&amp;tx_blogexample_pi1[post][uid]=211&amp;tx_blogexample_pi1[blog]=&amp;tx_blogexample_pi1[action]=show&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=003b0131ed">The Pos</a></h3></li></ul></div>';
+               $this->assertEquals($expected, $result);
+
+               $result = $this->cObj->cropHTML($subject, '-100');
+               $expected = '<div class="tx-blogexample-list-container"><ul><li><p>Design]&nbsp;<br>
+                               <a href="index.php?id=99&amp;tx_blogexample_pi1[post][uid]=211&amp;tx_blogexample_pi1[action]=show&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=f982643bc3">read more &gt;&gt;</a><br>
+                               <a href="index.php?id=99&amp;tx_blogexample_pi1[post][uid]=211&amp;tx_blogexample_pi1[blog][uid]=70&amp;tx_blogexample_pi1[action]=edit&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=5b481bc8f0">Edit</a>&nbsp;<a href="index.php?id=99&amp;tx_blogexample_pi1[post][uid]=211&amp;tx_blogexample_pi1[blog][uid]=70&amp;tx_blogexample_pi1[action]=delete&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=4e52879656">Delete</a>
+                       </p>
+               </li>
+       </ul>
+       <p>
+               <a href="index.php?id=99&amp;tx_blogexample_pi1[blog][uid]=70&amp;tx_blogexample_pi1[action]=new&amp;tx_blogexample_pi1[controller]=Post&amp;cHash=2718a4b1a0">Create a new Post</a>
+       </p>
+</div>
+<hr>
+<p>
+       © TYPO3 Association
+</p>
+';
+               $this->assertEquals($expected, $result);
+       }
+       
+       /**
+        * Converts the subject and the expected result into the target charset.
+        *
+        * @param string $charset The target charset
+        * @param string $subject The subject
+        * @param string $expected The expected result
+        * @return void
+        */
+       protected function handleCharset($charset, &$subject, &$expected) {
+               $GLOBALS['TSFE']->renderCharset = $charset;
+               $subject = $GLOBALS['TSFE']->csConvObj->conv($subject, 'iso-8859-1', $charset);
+               $expected = $GLOBALS['TSFE']->csConvObj->conv($expected, 'iso-8859-1', $charset);
+       }
+}
+?>
\ No newline at end of file
index 05e4947..5ab208e 100644 (file)
@@ -3752,6 +3752,7 @@ class tslib_cObj {
                                if ($conf['bytes']){$content=t3lib_div::formatSize($content, $conf['bytes.']['labels']);}
                                if ($conf['substring']){$content=$this->substring($content,$conf['substring']);}
                                if ($conf['removeBadHTML'])     {$content = $this->removeBadHTML($content, $conf['removeBadHTML.']);}
+                               if ($conf['cropHTML']) {$content=$this->cropHTML($content, $conf['cropHTML']);}
                                if ($conf['stripHtml']){$content = strip_tags($content);}
                                if ($conf['crop']){$content=$this->crop($content, $conf['crop']);}
                                if ($conf['rawUrlEncode']){$content = rawurlencode($content);}
@@ -4203,6 +4204,133 @@ class tslib_cObj {
        }
 
        /**
+        * Implements the stdWrap property "cropHTML" which is a modified "substr" function allowing to limit a string length 
+        * to a certain number of chars (from either start or end of string) and having a pre/postfix applied if the string 
+        * really was cropped.
+        * 
+        * Compared to stdWrap.crop it respects HTML tags and entities.
+        *
+        * @param       string          The string to perform the operation on
+        * @param       string          The parameters splitted by "|": First parameter is the max number of chars of the string. Negative value means cropping from end of string. Second parameter is the pre/postfix string to apply if cropping occurs. Third parameter is a boolean value. If set then crop will be applied at nearest space.
+        * @return      string          The processed input value.
+        * @access private
+        * @see stdWrap()
+        */
+       function cropHTML($content, $options) {
+               $options = explode('|', $options);
+               $chars = intval($options[0]);
+               $absChars = abs($chars);
+               $replacementForEllipsis = trim($options[1]);
+               $crop2space = $options[2] === '1' ? TRUE : FALSE;
+
+               // Split $content into an array (even items in the array are outside the tags, odd numbers are tag-blocks).
+               $tags= 'a|b|blockquote|body|div|em|font|form|h1|h2|h3|h4|h5|h6|i|li|map|ol|option|p|pre|sub|sup|select|span|strong|table|thead|tbody|tfoot|td|textarea|tr|u|ul|br|hr|img|input|area|link';
+               // TODO We should not crop inside <script> tags.
+               $tagsRegEx = "
+                       (
+                               (?:
+                                       <!--.*?-->                                      # a comment
+                               )
+                               |
+                               </?(?:". $tags . ")+                    # opening tag ('<tag') or closing tag ('</tag')
+                               (?:
+                                       (?:
+                                               \s+\w+                                  # EITHER spaces, followed by word characters (attribute names) 
+                                               (?:
+                                                       \s*=?\s*                        # equals
+                                                       (?>
+                                                               \".*?\"                 # attribute values in double-quotes
+                                                               |
+                                                               '.*?'                   # attribute values in single-quotes
+                                                               |
+                                                               [^'\">\s]+              # plain attribute values
+                                                       )
+                                               )?
+                                       )+\s*
+                                       |                                                       # OR only spaces
+                                       \s*
+                               )
+                               /?>                                                             # closing the tag with '>' or '/>'
+                       )";
+               $splittedContent = preg_split('%' . $tagsRegEx . '%xs', $content, -1, PREG_SPLIT_DELIM_CAPTURE);
+
+               // Reverse array if we are cropping from right.
+               if ($chars < 0) {
+                       $splittedContent = array_reverse($splittedContent);
+               }
+
+               // Crop the text (chars of tag-blocks are not counted).
+               $strLen = 0;
+               $croppedOffset = NULL; // This is the offset of the content item which was cropped.
+               $countSplittedContent = count($splittedContent);
+               for ($offset = 0; $offset < $countSplittedContent; $offset++) {
+                       if ($offset%2 === 0) {
+                               $tempContent = $GLOBALS['TSFE']->csConvObj->utf8_encode($splittedContent[$offset], $GLOBALS['TSFE']->renderCharset);
+                               $thisStrLen = $GLOBALS['TSFE']->csConvObj->strlen('utf-8', html_entity_decode($tempContent, ENT_COMPAT, 'UTF-8'));
+                               if (($strLen + $thisStrLen > $absChars)) {
+                                       $croppedOffset = $offset;
+                                       $cropPosition = $absChars - $strLen;
+                                       if ($crop2space) {
+                                               $cropRegEx = $chars < 0 ? '#(?<=\s)(.(?![^&\s]{2,7};)|(&[^&\s;]{2,7};)){0,' . $cropPosition . '}$#ui' : '#^(.(?![^&\s]{2,7};)|(&[^&\s;]{2,7};)){0,' . $cropPosition . '}(?=\s)#ui';
+                                       } else {
+                                               // The snippets "&[^&\s;]{2,7};" in the RegEx below represents entities.
+                                               $cropRegEx = $chars < 0 ? '#(.(?![^&\s]{2,7};)|(&[^&\s;]{2,7};)){0,' . $cropPosition . '}$#ui' : '#^(.(?![^&\s]{2,7};)|(&[^&\s;]{2,7};)){0,' . $cropPosition . '}#ui';
+                                       }
+                                       if (preg_match($cropRegEx, $tempContent, $croppedMatch)) {
+                                               $tempContent = $croppedMatch[0];
+                                       }
+                                       $splittedContent[$offset] = $GLOBALS['TSFE']->csConvObj->utf8_decode($tempContent, $GLOBALS['TSFE']->renderCharset);
+                                       break;
+                               } else {
+                                       $strLen += $thisStrLen;
+                               }
+                       }
+               }
+
+               // Close cropped tags.
+               $closingTags = array();
+               if($croppedOffset !== NULL) {
+                       $tagName = '';
+                       $openingTagRegEx = '#^<(\w+)(?:\s|>)#';
+                       $closingTagRegEx = '#^</(\w+)(?:\s|>)#';
+                       for ($offset = $croppedOffset - 1; $offset >= 0; $offset = $offset - 2) {
+                               if (substr($splittedContent[$offset], -2) === '/>') {
+                                       // Ignore empty element tags (e.g. <br />).
+                                       continue;
+                               }
+                               preg_match($chars < 0 ? $closingTagRegEx : $openingTagRegEx, $splittedContent[$offset], $matches);
+                               $tagName = isset($matches[1]) ? $matches[1] : NULL;
+                               if ($tagName !== NULL) {
+                                       // Seek for the closing (or opening) tag.
+                                       $seekingTagName = '';
+                                       $countSplittedContent = count($splittedContent);
+                                       for ($seekingOffset = $offset + 2; $seekingOffset < $countSplittedContent; $seekingOffset = $seekingOffset + 2) {
+                                               preg_match($chars < 0 ? $openingTagRegEx : $closingTagRegEx, $splittedContent[$seekingOffset], $matches);
+                                               $seekingTagName = isset($matches[1]) ? $matches[1] : NULL;
+                                               if ($tagName === $seekingTagName) { // We found a matching tag.
+                                                       // Add closing tag only if it occurs after the cropped content item.
+                                                       if ($seekingOffset > $croppedOffset) {
+                                                               $closingTags[] = $splittedContent[$seekingOffset];
+                                                       }
+                                                       break;
+                                               }
+                                       }
+                               }
+                       }
+                       // Drop the cropped items of the content array. The $closingTags will be added later on again.
+                       array_splice($splittedContent, $croppedOffset + 1);
+               }
+               $splittedContent = array_merge($splittedContent, array($croppedOffset !== NULL ? $replacementForEllipsis : ''), $closingTags);
+
+               // Reverse array once again if we are cropping from the end.
+               if ($chars < 0) {
+                       $splittedContent = array_reverse($splittedContent);
+               }
+
+               return implode('', $splittedContent);
+       }
+
+       /**
         * Function for removing malicious HTML code when you want to provide some HTML code user-editable.
         * The purpose is to avoid XSS attacks and the code will be continously modified to remove such code.
         * For a complete reference with javascript-on-events, see http://www.wdvl.com/Authoring/JavaScript/Events/events_target.html