You are viewing a plain text version of this content. The canonical link for it is here.
Posted to doxia-commits@maven.apache.org by vs...@apache.org on 2008/09/12 23:09:56 UTC

svn commit: r694807 - in /maven/doxia/doxia/trunk: doxia-core/src/main/java/org/apache/maven/doxia/parser/ doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/

Author: vsiveton
Date: Fri Sep 12 14:09:55 2008
New Revision: 694807

URL: http://svn.apache.org/viewvc?rev=694807&view=rev
Log:
DOXIA-226: Make XML based parsers better handle whitespace

o first implementation
o take care of ignorable whitespace in xdoc

Modified:
    maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
    maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
    maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java

Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=694807&r1=694806&r2=694807&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java Fri Sep 12 14:09:55 2008
@@ -27,6 +27,7 @@
 import org.apache.maven.doxia.sink.Sink;
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
 
+import org.codehaus.plexus.util.StringUtils;
 import org.codehaus.plexus.util.xml.pull.MXParser;
 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
@@ -42,6 +43,12 @@
     extends AbstractParser
     implements XmlMarkup
 {
+    private boolean ignorable;
+
+    private boolean collapsible;
+
+    private boolean trimmable;
+
     /** {@inheritDoc} */
     public void parse( Reader source, Sink sink )
         throws ParseException
@@ -90,7 +97,7 @@
     /**
      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
      *
-     * @param parser A parser.
+     * @param parser A parser, not null.
      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
      */
     protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
@@ -112,11 +119,10 @@
         return atts;
     }
 
-
     /**
      * Parse the model from the XmlPullParser into the given sink.
      *
-     * @param parser A parser.
+     * @param parser A parser, not null.
      * @param sink the sink to receive the events.
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
@@ -138,7 +144,19 @@
             }
             else if ( eventType == XmlPullParser.TEXT )
             {
-                handleText( parser, sink );
+                String text = getText( parser );
+
+                if ( isIgnorableWhitespace() )
+                {
+                    if ( !text.trim().equals( "" ) )
+                    {
+                        handleText( parser, sink );
+                    }
+                }
+                else
+                {
+                    handleText( parser, sink );
+                }
             }
             else if ( eventType == XmlPullParser.CDSECT )
             {
@@ -152,6 +170,10 @@
             {
                 handleEntity( parser, sink );
             }
+            else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
+            {
+                // nop
+            }
 
             try
             {
@@ -167,7 +189,7 @@
     /**
      * Goes through the possible start tags.
      *
-     * @param parser A parser.
+     * @param parser A parser, not null.
      * @param sink the sink to receive the events.
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
@@ -178,7 +200,7 @@
     /**
      * Goes through the possible end tags.
      *
-     * @param parser A parser.
+     * @param parser A parser, not null.
      * @param sink the sink to receive the events.
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
@@ -189,7 +211,7 @@
     /**
      * Handles text events.
      *
-     * @param parser A parser.
+     * @param parser A parser, not null.
      * @param sink the sink to receive the events.
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
      */
@@ -199,7 +221,7 @@
     /**
      * Handles CDATA sections.
      *
-     * @param parser A parser.
+     * @param parser A parser, not null.
      * @param sink the sink to receive the events.
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
      */
@@ -209,7 +231,7 @@
     /**
      * Handles comments.
      *
-     * @param parser A parser.
+     * @param parser A parser, not null.
      * @param sink the sink to receive the events.
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
      */
@@ -219,11 +241,110 @@
     /**
      * Handles entities.
      *
-     * @param parser A parser.
+     * @param parser A parser, not null.
      * @param sink the sink to receive the events.
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
      */
     protected abstract void handleEntity( XmlPullParser parser, Sink sink )
         throws XmlPullParserException;
 
+    /**
+     * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
+     * @see #setIgnorableWhitespace(boolean)
+     */
+    protected boolean isIgnorableWhitespace()
+    {
+        return ignorable;
+    }
+
+    /**
+     * Specify that whitespace will be ignore i.e.:
+     * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
+     * is equivalent to
+     * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
+     *
+     * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
+     */
+    protected void setIgnorableWhitespace( boolean ignorable )
+    {
+        this.ignorable = ignorable;
+    }
+
+    /**
+     * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
+     * @see #setCollapsibleWhitespace(boolean)
+     */
+    protected boolean isCollapsibleWhitespace()
+    {
+        return collapsible;
+    }
+
+    /**
+     * Specify that text will be collapse i.e.:
+     * <pre>Text   Text</pre>
+     * is equivalent to
+     * <pre>Text Text</pre>
+     *
+     * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
+     */
+    protected void setCollapsibleWhitespace( boolean collapsible )
+    {
+        this.collapsible = collapsible;
+    }
+
+    /**
+     * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
+     * @see #setTrimmableWhitespace(boolean)
+     */
+    protected boolean isTrimmableWhitespace()
+    {
+        return trimmable;
+    }
+
+    /**
+     * Specify that text will be collapse i.e.:
+     * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
+     * is equivalent to
+     * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
+     *
+     * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
+     */
+    protected void setTrimmableWhitespace( boolean trimmable )
+    {
+        this.trimmable = trimmable;
+    }
+
+    /**
+     * @param parser A parser, not null.
+     * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
+     * @see XmlPullParser#getText()
+     * @see #isCollapsibleWhitespace()
+     * @see #isTrimmableWhitespace()
+     */
+    protected String getText( XmlPullParser parser )
+    {
+        String text = parser.getText();
+
+        if ( isTrimmableWhitespace() )
+        {
+            text = text.trim();
+        }
+
+        if ( isCollapsibleWhitespace() )
+        {
+            StringBuffer newText = new StringBuffer();
+            String[] elts = StringUtils.split( text, " \r\n" );
+            for ( int i = 0; i < elts.length; i++ )
+            {
+                newText.append( elts[i] );
+                if ( ( i + 1 ) < elts.length )
+                {
+                    newText.append( " " );
+                }
+            }
+            text = newText.toString();
+        }
+
+        return text;
+    }
 }

Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java?rev=694807&r1=694806&r2=694807&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java Fri Sep 12 14:09:55 2008
@@ -101,7 +101,7 @@
                 verbatim();
             }
 
-            sink.text( parser.getText() );
+            sink.text( getText( parser ) );
         }
         else if ( parser.getName().equals( Tag.H2.toString() ) )
         {
@@ -428,7 +428,7 @@
 
                 if ( isVerbatim() )
                 {
-                    sink.text( parser.getText() );
+                    sink.text( getText( parser ) );
                 }
                 else
                 {
@@ -437,7 +437,7 @@
             }
             else
             {
-                sink.text( parser.getText() );
+                sink.text( getText( parser ) );
             }
         }
         else if ( parser.getName().equals( Tag.P.toString() ) )
@@ -642,7 +642,7 @@
     protected void handleText( XmlPullParser parser, Sink sink )
         throws XmlPullParserException
     {
-        String text = parser.getText();
+        String text = getText( parser );
 
         /*
          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
@@ -672,14 +672,14 @@
     protected void handleCdsect( XmlPullParser parser, Sink sink )
         throws XmlPullParserException
     {
-        sink.text( parser.getText() );
+        sink.text( getText( parser ) );
     }
 
     /** {@inheritDoc} */
     protected void handleComment( XmlPullParser parser, Sink sink )
         throws XmlPullParserException
     {
-        String text = parser.getText();
+        String text = getText( parser );
 
         if ( "PB".equals( text.trim() ) )
         {
@@ -695,7 +695,7 @@
     protected void handleEntity( XmlPullParser parser, Sink sink )
         throws XmlPullParserException
     {
-        String text = parser.getText();
+        String text = getText( parser );
 
         int[] holder = new int[] {0, 0};
         char[] chars = parser.getTextCharacters( holder );

Modified: maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java?rev=694807&r1=694806&r2=694807&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java Fri Sep 12 14:09:55 2008
@@ -91,6 +91,8 @@
 
         Reader tmp = new StringReader( sourceContent );
 
+        setIgnorableWhitespace( true );
+
         super.parse( tmp, sink );
     }
 
@@ -110,7 +112,7 @@
                 verbatim();
             }
 
-            sink.text( parser.getText() );
+            sink.text( getText( parser ) );
         }
         else if ( parser.getName().equals( DOCUMENT_TAG.toString() ) )
         {
@@ -147,7 +149,7 @@
                 sink.anchor( id.toString() );
                 sink.anchor_();
             }
-            
+
             sink.section( Sink.SECTION_LEVEL_1, attribs );
 
             sink.sectionTitle( Sink.SECTION_LEVEL_1, attribs );
@@ -166,7 +168,7 @@
                 sink.anchor( id.toString() );
                 sink.anchor_();
             }
-            
+
             sink.section( Sink.SECTION_LEVEL_2, attribs );
 
             sink.sectionTitle( Sink.SECTION_LEVEL_2, attribs );
@@ -275,7 +277,7 @@
 
                 if ( isVerbatim() )
                 {
-                    sink.text( parser.getText() );
+                    sink.text( getText( parser ) );
                 }
                 else
                 {
@@ -284,7 +286,7 @@
             }
             else
             {
-                sink.text( parser.getText() );
+                sink.text( getText( parser ) );
             }
         }
         else if ( parser.getName().equals( DOCUMENT_TAG.toString() ) )
@@ -399,7 +401,7 @@
     protected void handleCdsect( XmlPullParser parser, Sink sink )
         throws XmlPullParserException
     {
-        String text = parser.getText();
+        String text = getText( parser );
 
         if ( scriptBlock )
         {
@@ -439,5 +441,4 @@
 
         setSectionLevel( newLevel );
     }
-
 }