You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@roller.apache.org by "Terry Smith (JIRA)" <ji...@apache.org> on 2008/06/17 23:20:58 UTC
[jira] Created: (ROL-1730) Custom MS Word Cleaner

Custom MS Word Cleaner
----------------------

                 Key: ROL-1730
                 URL: https://issues.apache.org/roller/browse/ROL-1730
             Project: Roller
          Issue Type: Improvement
          Components: Weblog Editor
    Affects Versions: 4.0
         Environment: Roller 4.0
            Reporter: Terry Smith
            Assignee: Roller Unassigned
            Priority: Minor


Added the following function in htmlarea.js to cleanup copy/paste from MS Word:

function customWordCleaner(editor) {
	var inChars = editor.getInnerHTML(); 
  	// Convert MS Word auto-correct characters
	var outChars = escape(inChars); // encode special characters
	// alert(outChars);  // uncomment to see character encoding
	outChars = outChars.replace(/%u201C/g,'"') // Replace left double quote.
	outChars = outChars.replace(/%u201D/g,'"') // Replace right double quote.
	outChars = outChars.replace(/%u2013/g,'-') // Replace --.
	outChars = outChars.replace(/%u2014/g,'-') // Replace --.
	outChars = outChars.replace(/%u2018/g,'\'') // Replace apost.
	outChars = outChars.replace(/%u2019/g,'\'') // Replace apost.
	outChars = outChars.replace(/%BD/g,'1/2') // Replace 1/2 symbol.
	outChars = outChars.replace(/%u2122/g,'TM') // Replace TM symbol.
	outChars = outChars.replace(/%u2026/g,'...') // Replace ...
	outChars = unescape(outChars); // decode string 

                     // Cleanup MS Word extra tags and doublespaces
                     outChars= outChars.replace(/<(html|body|div|meta|span|xml|del|ins|\!|[ovwxp]:\w+)[^>]*>/g,"");
                     outChars = outChars.replace(/<\/(html|body|div|meta|span|xml|del|ins|\!|[ovwxp]:\w+)[^>]*>/g,"");
                     outChars = outChars.replace(/<(p|ul|li|hr|table|tr|td)[^>]*>/gi,'<$1>');
                     outChars = outChars.replace(/<\/(p|ul|li|hr|table|tr|td)[^>]*>/gi,'</$1>');
                     outChars = outChars.replace(/<(p|ul|li|hr|table|tr|td)[^>]*>(&nbsp;)<\/(p|ul|li|hr|table|tr|td)[^>]*>/gi,'');
	editor.setHTML(outChars);
}	

.
.
.
  parseTree(this._doc.body);
  //  Add cleaner here.
  customWordCleaner(this);

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.