You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@maven.apache.org by be...@apache.org on 2008/04/01 22:42:13 UTC

svn commit: r643558 - in /maven/plugin-tools/trunk/maven-plugin-tools-api/src: main/java/org/apache/maven/tools/plugin/util/PluginUtils.java test/java/org/apache/maven/tools/plugin/util/PluginUtilsTest.java

Author: bentmann
Date: Tue Apr  1 13:42:11 2008
New Revision: 643558

URL: http://svn.apache.org/viewvc?rev=643558&view=rev
Log:
[MPLUGIN-96] Handle character encoding properly in makeHtmlValid()

Modified:
    maven/plugin-tools/trunk/maven-plugin-tools-api/src/main/java/org/apache/maven/tools/plugin/util/PluginUtils.java
    maven/plugin-tools/trunk/maven-plugin-tools-api/src/test/java/org/apache/maven/tools/plugin/util/PluginUtilsTest.java

Modified: maven/plugin-tools/trunk/maven-plugin-tools-api/src/main/java/org/apache/maven/tools/plugin/util/PluginUtils.java
URL: http://svn.apache.org/viewvc/maven/plugin-tools/trunk/maven-plugin-tools-api/src/main/java/org/apache/maven/tools/plugin/util/PluginUtils.java?rev=643558&r1=643557&r2=643558&view=diff
==============================================================================
--- maven/plugin-tools/trunk/maven-plugin-tools-api/src/main/java/org/apache/maven/tools/plugin/util/PluginUtils.java (original)
+++ maven/plugin-tools/trunk/maven-plugin-tools-api/src/main/java/org/apache/maven/tools/plugin/util/PluginUtils.java Tue Apr  1 13:42:11 2008
@@ -19,7 +19,10 @@
  * under the License.
  */
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.File;
+import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLClassLoader;
@@ -38,10 +41,9 @@
 import org.codehaus.plexus.component.repository.ComponentDependency;
 import org.codehaus.plexus.util.DirectoryScanner;
 import org.codehaus.plexus.util.FileUtils;
-import org.codehaus.plexus.util.StringInputStream;
-import org.codehaus.plexus.util.StringOutputStream;
 import org.codehaus.plexus.util.StringUtils;
 import org.codehaus.plexus.util.xml.XMLWriter;
+import org.w3c.tidy.Configuration;
 import org.w3c.tidy.Tidy;
 
 /**
@@ -241,29 +243,42 @@
             return "";
         }
 
-        StringOutputStream out = new StringOutputStream();
+        String commentCleaned = decodeJavadocTags( description );
 
         // Using jTidy to clean comment
         Tidy tidy = new Tidy();
         tidy.setDocType( "loose" );
         tidy.setXHTML( true );
         tidy.setXmlOut( true );
+        tidy.setCharEncoding( Configuration.UTF8 );
         tidy.setMakeClean( true );
+        tidy.setNumEntities( true );
+        tidy.setQuoteNbsp( false );
         tidy.setQuiet( true );
         tidy.setShowWarnings( false );
-        tidy.parse( new StringInputStream( decodeJavadocTags( description ) ), out );
+        try
+        {
+            ByteArrayOutputStream out = new ByteArrayOutputStream( commentCleaned.length() + 256 );
+            tidy.parse( new ByteArrayInputStream( commentCleaned.getBytes( "UTF-8" ) ), out );
+            commentCleaned = out.toString("UTF-8");
+        }
+        catch ( UnsupportedEncodingException e )
+        {
+            // cannot happen as every JVM must support UTF-8, see also class javadoc for java.nio.charset.Charset
+        }
 
-        // strip the header/body stuff
-        String LS = System.getProperty( "line.separator" );
-        String commentCleaned = out.toString();
         if ( StringUtils.isEmpty( commentCleaned ) )
         {
             return "";
         }
+
+        // strip the header/body stuff
+        String LS = System.getProperty( "line.separator" );
         int startPos = commentCleaned.indexOf( "<body>" + LS ) + 6 + LS.length();
         int endPos = commentCleaned.indexOf( LS + "</body>" );
+        commentCleaned = commentCleaned.substring( startPos, endPos );
 
-        return commentCleaned.substring( startPos, endPos );
+        return commentCleaned;
     }
 
     /**

Modified: maven/plugin-tools/trunk/maven-plugin-tools-api/src/test/java/org/apache/maven/tools/plugin/util/PluginUtilsTest.java
URL: http://svn.apache.org/viewvc/maven/plugin-tools/trunk/maven-plugin-tools-api/src/test/java/org/apache/maven/tools/plugin/util/PluginUtilsTest.java?rev=643558&r1=643557&r2=643558&view=diff
==============================================================================
--- maven/plugin-tools/trunk/maven-plugin-tools-api/src/test/java/org/apache/maven/tools/plugin/util/PluginUtilsTest.java (original)
+++ maven/plugin-tools/trunk/maven-plugin-tools-api/src/test/java/org/apache/maven/tools/plugin/util/PluginUtilsTest.java Tue Apr  1 13:42:11 2008
@@ -130,8 +130,19 @@
 
         // wrong HTML
         javadoc = "Generates <i>something</i> <b> for the project.";
-        assertEquals( "Generates <i>something</i> <b> for the project.</b>", PluginUtils
-            .makeHtmlValid( javadoc ) );
+        assertEquals( "Generates <i>something</i> <b> for the project.</b>", PluginUtils.makeHtmlValid( javadoc ) );
+
+        // special characters
+        javadoc = "& &amp; < > \u00A0";
+        assertEquals( "&amp; &amp; &lt; &gt; \u00A0", PluginUtils.makeHtmlValid( javadoc ) );
+
+        // non ASCII characters
+        javadoc = "\u00E4 \u00F6 \u00FC \u00DF";
+        assertEquals( javadoc, PluginUtils.makeHtmlValid( javadoc ) );
+
+        // non Latin1 characters
+        javadoc = "\u0130 \u03A3 \u05D0 \u06DE";
+        assertEquals( javadoc, PluginUtils.makeHtmlValid( javadoc ) );
     }
 
     public void testDecodeJavadocTags()