You are viewing a plain text version of this content. The canonical link for it is here.
Posted to doxia-commits@maven.apache.org by vs...@apache.org on 2008/09/12 23:09:56 UTC
svn commit: r694807 - in /maven/doxia/doxia/trunk:
doxia-core/src/main/java/org/apache/maven/doxia/parser/
doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/
Author: vsiveton
Date: Fri Sep 12 14:09:55 2008
New Revision: 694807
URL: http://svn.apache.org/viewvc?rev=694807&view=rev
Log:
DOXIA-226: Make XML based parsers better handle whitespace
o first implementation
o take care of ignorable whitespace in xdoc
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=694807&r1=694806&r2=694807&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java Fri Sep 12 14:09:55 2008
@@ -27,6 +27,7 @@
import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkEventAttributeSet;
+import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.xml.pull.MXParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
@@ -42,6 +43,12 @@
extends AbstractParser
implements XmlMarkup
{
+ private boolean ignorable;
+
+ private boolean collapsible;
+
+ private boolean trimmable;
+
/** {@inheritDoc} */
public void parse( Reader source, Sink sink )
throws ParseException
@@ -90,7 +97,7 @@
/**
* Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
*
- * @param parser A parser.
+ * @param parser A parser, not null.
* @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
*/
protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
@@ -112,11 +119,10 @@
return atts;
}
-
/**
* Parse the model from the XmlPullParser into the given sink.
*
- * @param parser A parser.
+ * @param parser A parser, not null.
* @param sink the sink to receive the events.
* @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
* @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
@@ -138,7 +144,19 @@
}
else if ( eventType == XmlPullParser.TEXT )
{
- handleText( parser, sink );
+ String text = getText( parser );
+
+ if ( isIgnorableWhitespace() )
+ {
+ if ( !text.trim().equals( "" ) )
+ {
+ handleText( parser, sink );
+ }
+ }
+ else
+ {
+ handleText( parser, sink );
+ }
}
else if ( eventType == XmlPullParser.CDSECT )
{
@@ -152,6 +170,10 @@
{
handleEntity( parser, sink );
}
+ else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
+ {
+ // nop
+ }
try
{
@@ -167,7 +189,7 @@
/**
* Goes through the possible start tags.
*
- * @param parser A parser.
+ * @param parser A parser, not null.
* @param sink the sink to receive the events.
* @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
* @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
@@ -178,7 +200,7 @@
/**
* Goes through the possible end tags.
*
- * @param parser A parser.
+ * @param parser A parser, not null.
* @param sink the sink to receive the events.
* @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
* @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
@@ -189,7 +211,7 @@
/**
* Handles text events.
*
- * @param parser A parser.
+ * @param parser A parser, not null.
* @param sink the sink to receive the events.
* @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
*/
@@ -199,7 +221,7 @@
/**
* Handles CDATA sections.
*
- * @param parser A parser.
+ * @param parser A parser, not null.
* @param sink the sink to receive the events.
* @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
*/
@@ -209,7 +231,7 @@
/**
* Handles comments.
*
- * @param parser A parser.
+ * @param parser A parser, not null.
* @param sink the sink to receive the events.
* @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
*/
@@ -219,11 +241,110 @@
/**
* Handles entities.
*
- * @param parser A parser.
+ * @param parser A parser, not null.
* @param sink the sink to receive the events.
* @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
*/
protected abstract void handleEntity( XmlPullParser parser, Sink sink )
throws XmlPullParserException;
+ /**
+ * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
+ * @see #setIgnorableWhitespace(boolean)
+ */
+ protected boolean isIgnorableWhitespace()
+ {
+ return ignorable;
+ }
+
+ /**
+ * Specify that whitespace will be ignore i.e.:
+ * <pre><tr> <td/> </tr></pre>
+ * is equivalent to
+ * <pre><tr><td/></tr></pre>
+ *
+ * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
+ */
+ protected void setIgnorableWhitespace( boolean ignorable )
+ {
+ this.ignorable = ignorable;
+ }
+
+ /**
+ * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
+ * @see #setCollapsibleWhitespace(boolean)
+ */
+ protected boolean isCollapsibleWhitespace()
+ {
+ return collapsible;
+ }
+
+ /**
+ * Specify that text will be collapse i.e.:
+ * <pre>Text Text</pre>
+ * is equivalent to
+ * <pre>Text Text</pre>
+ *
+ * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
+ */
+ protected void setCollapsibleWhitespace( boolean collapsible )
+ {
+ this.collapsible = collapsible;
+ }
+
+ /**
+ * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
+ * @see #setTrimmableWhitespace(boolean)
+ */
+ protected boolean isTrimmableWhitespace()
+ {
+ return trimmable;
+ }
+
+ /**
+ * Specify that text will be collapse i.e.:
+ * <pre><p> Text </p></pre>
+ * is equivalent to
+ * <pre><p>Text</p></pre>
+ *
+ * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
+ */
+ protected void setTrimmableWhitespace( boolean trimmable )
+ {
+ this.trimmable = trimmable;
+ }
+
+ /**
+ * @param parser A parser, not null.
+ * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
+ * @see XmlPullParser#getText()
+ * @see #isCollapsibleWhitespace()
+ * @see #isTrimmableWhitespace()
+ */
+ protected String getText( XmlPullParser parser )
+ {
+ String text = parser.getText();
+
+ if ( isTrimmableWhitespace() )
+ {
+ text = text.trim();
+ }
+
+ if ( isCollapsibleWhitespace() )
+ {
+ StringBuffer newText = new StringBuffer();
+ String[] elts = StringUtils.split( text, " \r\n" );
+ for ( int i = 0; i < elts.length; i++ )
+ {
+ newText.append( elts[i] );
+ if ( ( i + 1 ) < elts.length )
+ {
+ newText.append( " " );
+ }
+ }
+ text = newText.toString();
+ }
+
+ return text;
+ }
}
Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java?rev=694807&r1=694806&r2=694807&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java Fri Sep 12 14:09:55 2008
@@ -101,7 +101,7 @@
verbatim();
}
- sink.text( parser.getText() );
+ sink.text( getText( parser ) );
}
else if ( parser.getName().equals( Tag.H2.toString() ) )
{
@@ -428,7 +428,7 @@
if ( isVerbatim() )
{
- sink.text( parser.getText() );
+ sink.text( getText( parser ) );
}
else
{
@@ -437,7 +437,7 @@
}
else
{
- sink.text( parser.getText() );
+ sink.text( getText( parser ) );
}
}
else if ( parser.getName().equals( Tag.P.toString() ) )
@@ -642,7 +642,7 @@
protected void handleText( XmlPullParser parser, Sink sink )
throws XmlPullParserException
{
- String text = parser.getText();
+ String text = getText( parser );
/*
* NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
@@ -672,14 +672,14 @@
protected void handleCdsect( XmlPullParser parser, Sink sink )
throws XmlPullParserException
{
- sink.text( parser.getText() );
+ sink.text( getText( parser ) );
}
/** {@inheritDoc} */
protected void handleComment( XmlPullParser parser, Sink sink )
throws XmlPullParserException
{
- String text = parser.getText();
+ String text = getText( parser );
if ( "PB".equals( text.trim() ) )
{
@@ -695,7 +695,7 @@
protected void handleEntity( XmlPullParser parser, Sink sink )
throws XmlPullParserException
{
- String text = parser.getText();
+ String text = getText( parser );
int[] holder = new int[] {0, 0};
char[] chars = parser.getTextCharacters( holder );
Modified: maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java?rev=694807&r1=694806&r2=694807&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java Fri Sep 12 14:09:55 2008
@@ -91,6 +91,8 @@
Reader tmp = new StringReader( sourceContent );
+ setIgnorableWhitespace( true );
+
super.parse( tmp, sink );
}
@@ -110,7 +112,7 @@
verbatim();
}
- sink.text( parser.getText() );
+ sink.text( getText( parser ) );
}
else if ( parser.getName().equals( DOCUMENT_TAG.toString() ) )
{
@@ -147,7 +149,7 @@
sink.anchor( id.toString() );
sink.anchor_();
}
-
+
sink.section( Sink.SECTION_LEVEL_1, attribs );
sink.sectionTitle( Sink.SECTION_LEVEL_1, attribs );
@@ -166,7 +168,7 @@
sink.anchor( id.toString() );
sink.anchor_();
}
-
+
sink.section( Sink.SECTION_LEVEL_2, attribs );
sink.sectionTitle( Sink.SECTION_LEVEL_2, attribs );
@@ -275,7 +277,7 @@
if ( isVerbatim() )
{
- sink.text( parser.getText() );
+ sink.text( getText( parser ) );
}
else
{
@@ -284,7 +286,7 @@
}
else
{
- sink.text( parser.getText() );
+ sink.text( getText( parser ) );
}
}
else if ( parser.getName().equals( DOCUMENT_TAG.toString() ) )
@@ -399,7 +401,7 @@
protected void handleCdsect( XmlPullParser parser, Sink sink )
throws XmlPullParserException
{
- String text = parser.getText();
+ String text = getText( parser );
if ( scriptBlock )
{
@@ -439,5 +441,4 @@
setSectionLevel( newLevel );
}
-
}