[FEATURE][htmlArea RTE] Better Remove Format for MS Word
authorstan <typo3@sjbr.ca>
Thu, 7 Jul 2011 05:52:43 +0000 (01:52 -0400)
committerStanislas Rolland <typo3@sjbr.ca>
Thu, 7 Jul 2011 05:58:35 +0000 (07:58 +0200)
Remove some additional MS-specific elements.

Change-Id: I80d3589f47653e2a83f1e0173a5bd43421fca349
Resolves: #25895
Releases: 4.6
Reviewed-on: http://review.typo3.org/3071
Reviewed-by: Stanislas Rolland
Tested-by: Stanislas Rolland
typo3/sysext/rtehtmlarea/extensions/RemoveFormat/locallang_csh.xml
typo3/sysext/rtehtmlarea/htmlarea/plugins/RemoveFormat/remove-format.js

index b04d4e5..ee40cd0 100644 (file)
@@ -43,9 +43,11 @@ In the dialogue window that opens when the button is clicked:
                        <label index="msWordFormat.details">This option removes:
 - all attributes on the following elements: b, em, i, li, p, strong, ul;
 - the following attributes on all HTML elements: align, class, style;
-- the following elements, but keeps their textual contents: div, link, meta, span, any element containing ":" in its name;
+- the following elements, but keeps their textual contents: div, f, formulas, link, lock, meta, path, shape, shapetype, span, stroke, ?xml, any element containing ":" in its name;
 - the following elements and their contents: style, title;
 - HTML comments;
+- img, imagedata tags;
+- xml tags;
 - the following elements whenever they have no textual contents: b, big, i, s, small, strike, tt, u;
 - double opening and double closing tags;
 - multiples spaces, replacing them by single spaces.</label>
index d358523..66f68df 100644 (file)
@@ -36,7 +36,7 @@ HTMLArea.RemoveFormat = Ext.extend(HTMLArea.Plugin, {
                 * Registering plugin "About" information
                 */
                var pluginInformation = {
-                       version         : '2.2',
+                       version         : '2.3',
                        developer       : 'Stanislas Rolland',
                        developerUrl    : 'http://www.sjbr.ca/',
                        copyrightOwner  : 'Stanislas Rolland',
@@ -228,7 +228,7 @@ HTMLArea.RemoveFormat = Ext.extend(HTMLArea.Plugin, {
                        }
                        if (params['images']) {
                                        // remove any IMG tag
-                               html = html.replace(/<\/?img[^>]*>/gi, "");
+                               html = html.replace(/<\/?(img|imagedata)(>|[^>a-zA-Z][^>]*>)/gi, "");
                        }
                        if (params['msWordFormatting']) {
                                        // Make one line
@@ -237,19 +237,20 @@ HTMLArea.RemoveFormat = Ext.extend(HTMLArea.Plugin, {
                                html = html.replace(/<(b|strong|i|em|p|li|ul) [^>]*>/gi, "<$1>");
                                        // Keep tags, strip attributes
                                html = html.replace(/ (style|class|align)=\"[^>\"]*\"/gi, "");
-                                       // kill unwanted tags: span, div, ?xml:, st1:, [a-z]:, meta, link
-                               html = html.replace(/<\/?span[^>]*>/gi, "").
-                                       replace(/<\/?div[^>]*>/gi, "").
-                                       replace(/<\?xml:[^>]*>/gi, "").
-                                       replace(/<\/?st1:[^>]*>/gi, "").
-                                       replace(/<\/?[a-z]:[^>]*>/g, "").
-                                       replace(/<\/?meta[^>]*>/g, "").
-                                       replace(/<\/?link[^>]*>/g, "");
-                                       // remove unwanted tags and their contents: style, title
+                                       // Remove unwanted tags: div, link, meta, span, ?xml:, [a-z]+:
+                               html = html.replace(/<\/?(div|link|meta|span)(>|[^>a-zA-Z][^>]*>)/gi, "");
+                               html = html.replace(/<\?xml:[^>]*>/gi, "").replace(/<\/?[a-z]+:[^>]*>/g, "");
+                                       // Remove images
+                               html = html.replace(/<\/?(img|imagedata)(>|[^>a-zA-Z][^>]*>)/gi, "");
+                                       // Remove MS-specific tags
+                               html = html.replace(/<\/?(f|formulas|lock|path|shape|shapetype|stroke)(>|[^>a-zA-Z][^>]*>)/gi, "");
+                                       // Remove unwanted tags and their contents: style, title
                                html = html.replace(/<style[^>]*>.*<\/style[^>]*>/gi, "").
                                        replace(/<title[^>]*>.*<\/title[^>]*>/gi, "");
-                                       // remove comments
+                                       // Remove comments
                                html = html.replace(/<!--[^>]*>/gi, "");
+                                       // Remove xml tags
+                               html = html.replace(/<xml.[^>]*>/gi, "");
                                        // Remove inline elements resets
                                html = html.replace(/<\/(b[^a-zA-Z]|big|i[^a-zA-Z]|s[^a-zA-Z]|small|strike|tt|u[^a-zA-Z])><\1>/gi, "");
                                        // Remove double tags