Fixed bug #8988: htmlArea RTE: Pasting text from MS Word to Firefox 3 brings a lot...
authorStanislas Rolland <typo3@sjbr.ca>
Tue, 12 Aug 2008 21:20:09 +0000 (21:20 +0000)
committerStanislas Rolland <typo3@sjbr.ca>
Tue, 12 Aug 2008 21:20:09 +0000 (21:20 +0000)
git-svn-id: https://svn.typo3.org/TYPO3v4/Core/trunk@3972 709f56b5-9817-0410-a4d7-c38de5d9e867

ChangeLog
typo3/sysext/rtehtmlarea/ChangeLog
typo3/sysext/rtehtmlarea/doc/manual.sxw
typo3/sysext/rtehtmlarea/htmlarea/plugins/DefaultClean/default-clean.js
typo3/sysext/rtehtmlarea/htmlarea/plugins/RemoveFormat/remove-format.js
typo3/sysext/rtehtmlarea/res/demo/pageTSConfig.txt
typo3/sysext/rtehtmlarea/res/proc/pageTSConfig.txt
typo3/sysext/rtehtmlarea/res/typical/pageTSConfig.txt

index 1eba548..a76796b 100755 (executable)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2008-08-12  Stanislas Rolland  <typo3@sjbr.ca>
+
+       * Fixed bug #8988: htmlArea RTE: Pasting text from MS Word to Firefox 3 brings a lot of garbage
+
 2008-08-12  Dmitry Dulepov  <dmotry@typo3.org>
 
        * Fixed bug #9060: Wrong alert text in Constant editor
index 54e57cd..2410af0 100644 (file)
@@ -1,3 +1,7 @@
+2008-08-12  Stanislas Rolland  <typo3@sjbr.ca>
+
+       * Fixed bug #8988: htmlArea RTE: Pasting text from MS Word to Firefox 3 brings a lot of garbage
+
 2008-08-11  Stanislas Rolland  <typo3@sjbr.ca>
 
        * Follow-up for issue #8673: htmlArea RTE drag n drop tab of image insertion not functional
index 376b898..4207d2f 100644 (file)
Binary files a/typo3/sysext/rtehtmlarea/doc/manual.sxw and b/typo3/sysext/rtehtmlarea/doc/manual.sxw differ
index 912170a..40ead9a 100644 (file)
@@ -119,22 +119,38 @@ DefaultClean = HTMLArea.Plugin.extend({
                        }
                }
                function checkEmpty(el) {
-                       if(/^(span|b|strong|i|em|font)$/i.test(el.tagName) && !el.firstChild) el.parentNode.removeChild(el);
+                       if(/^(span|b|strong|i|em|font)$/i.test(el.nodeName) && !el.firstChild) el.parentNode.removeChild(el);
                }
                function parseTree(root) {
-                       var tag = root.tagName.toLowerCase(), i, next;
-                       if((HTMLArea.is_ie && root.scopeName != 'HTML') || (!HTMLArea.is_ie && /:/.test(tag)) || /o:p/.test(tag)) {
-                               stripTag(root);
-                               return false;
-                       } else {
-                               clearClass(root);
-                               clearStyle(root);
-                               for (i=root.firstChild;i;i=next) {
-                                       next = i.nextSibling;
-                                       if(i.nodeType == 1 && parseTree(i)) { checkEmpty(i); }
-                               }
+                       var tag = root.nodeName.toLowerCase(), next;
+                       switch (root.nodeType) {
+                               case 1:
+                                       if (/^(meta|style|title|link)$/.test(tag)) {
+                                               root.parentNode.removeChild(root);
+                                               return false;
+                                               break;
+                                       }
+                               case 3:
+                               case 9:
+                               case 11:
+                                       if ((HTMLArea.is_ie && root.scopeName != 'HTML') || (!HTMLArea.is_ie && /:/.test(tag)) || /o:p/.test(tag)) {
+                                               stripTag(root);
+                                               return false;
+                                       } else {
+                                               clearClass(root);
+                                               clearStyle(root);
+                                               for (var i=root.firstChild;i;i=next) {
+                                                       next = i.nextSibling;
+                                                       if (i.nodeType != 3 && parseTree(i)) { checkEmpty(i); }
+                                               }
+                                       }
+                                       return true;
+                                       break;
+                               default:
+                                       root.parentNode.removeChild(root);
+                                       return false;
+                                       break;
                        }
-                       return true;
                }
                parseTree(html);
        }
index 00cdffc..ed83578 100644 (file)
@@ -1,7 +1,7 @@
 /***************************************************************
 *  Copyright notice
 *
-*  (c) 2005-2008 Stanislas Rolland <stanislas.rolland(arobas)fructifor.ca>
+*  (c) 2005-2008 Stanislas Rolland <typo3(arobas)sjbr.ca>
 *  All rights reserved
 *
 *  This script is part of the TYPO3 project. The TYPO3 project is
@@ -140,12 +140,17 @@ RemoveFormat = HTMLArea.Plugin.extend({
                                        html = html.replace(regMS3, "").replace(regMS4, "");
                                                // mozilla doesn't like <em> tags
                                        html = html.replace(/<em>/gi, "<i>").replace(/<\/em>/gi, "</i>");
-                                               // kill unwanted tags: span, div, ?xml:, st1:, [a-z]:
+                                               // kill unwanted tags: span, div, ?xml:, st1:, [a-z]:, meta, link
                                        html = html.replace(/<\/?span[^>]*>/gi, "").
                                                replace(/<\/?div[^>]*>/gi, "").
                                                replace(/<\?xml:[^>]*>/gi, "").
                                                replace(/<\/?st1:[^>]*>/gi, "").
-                                               replace(/<\/?[a-z]:[^>]*>/g, "");
+                                               replace(/<\/?[a-z]:[^>]*>/g, "").
+                                               replace(/<\/?meta[^>]*>/g, "").
+                                               replace(/<\/?link[^>]*>/g, "");
+                                               // remove unwanted tags and their contents: style, title
+                                       html = html.replace(/<style[^>]*>.*<\/style[^>]*>/gi, "").
+                                               replace(/<title[^>]*>.*<\/title[^>]*>/gi, "");
                                                // remove comments
                                        html = html.replace(/<!--[^>]*>/gi, "");
                                                // remove double tags
index b589bac..d5e6279 100644 (file)
@@ -113,7 +113,7 @@ RTE.default {
        removeTrailingBR = 1
        removeComments = 1
        removeTags = center, o:p, sdfield
-       removeTagsAndContents = style,script
+       removeTagsAndContents = link, meta, script, style, title
 
                ## Allow img tags
        proc.entryHTMLparser_db.tags.img >
index 707466b..11c4742 100644 (file)
@@ -36,13 +36,13 @@ RTE.default.proc {
                ## TAGS ALLOWED
                ## Added to the default internal list: b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span
                ## But, for the sake of clarity, we use a complete list in alphabetic order.
-               ## center, font, o:p, strike, sdfield and  u will be removed on entry (see below).
+               ## center, font, link, meta, o:p, strike, sdfield, style, title and u will be removed on entry (see below).
                ## b and i will be remapped on exit (see below).
                ## Note that the link accessibility feature of htmlArea RTE does insert img tags.
        allowTags (
                a, abbr, acronym, address, b, bdo, big, blockquote, br, caption, center, cite, code, col, colgroup, del, dfn, div, em, font,
-               h1, h2, h3, h4, h5, h6, hr, i, img, ins, kbd, label, li, link, ol, p, pre, q, samp, sdfield, small,
-               span, strike, strong, sub, sup, table, thead, tbody, tfoot, td, th, tr, tt, u, ul, var
+               h1, h2, h3, h4, h5, h6, hr, i, img, ins, kbd, label, li, link, meta, ol, p, pre, q, samp, sdfield, small,
+               span, strike, strong, style, sub, sup, table, thead, tbody, tfoot, td, th, tr, title, tt, u, ul, var
                )
 
                ## TAGS DENIED
@@ -116,9 +116,9 @@ RTE.default.proc {
                        var.allowedAttribs = id, title, dir, lang, xml:lang, class
                }
 
-                       ## REMOVE OPEN OFFICE META DATA TAGS, WORD 2003 TAGS AND DEPRECATED HTML TAGS
+                       ## REMOVE OPEN OFFICE META DATA TAGS, WORD 2003 TAGS, LINK, META, STYLE AND TITLE TAGS, AND DEPRECATED HTML TAGS
                        ## We use this rule instead of the denyTags rule so that we can protect custom tags without protecting these unwanted tags.
-               removeTags = center, font, o:p, sdfield, strike, u
+               removeTags = center, font, link, meta, o:p, sdfield, strike, style, title, u
 
                        ## PROTECT CUSTOM TAGS
                keepNonMatchedTags = protect
index 41ab867..d2f180f 100644 (file)
@@ -102,8 +102,8 @@ RTE.default {
        enableWordClean = 1
        removeTrailingBR = 1
        removeComments = 1
-       removeTags = center, font, o:p, sdfield, strike,u
-       removeTagsAndContents = style,script
+       removeTags = center, font, o:p, sdfield, strike, u
+       removeTagsAndContents = link, meta, script, style, title
 
                ## Toolbar options
                ## The TCA configuration may add buttons to the toolbar