You are viewing a plain text version of this content. The canonical link for it is here.
Posted to doxia-commits@maven.apache.org by lt...@apache.org on 2008/02/22 21:21:19 UTC

svn commit: r630310 - in /maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src: main/java/org/apache/maven/doxia/module/docbook/DocBookParser.java test/java/org/apache/maven/doxia/module/docbook/DocBookParserTest.java

Author: ltheussl
Date: Fri Feb 22 12:21:16 2008
New Revision: 630310

URL: http://svn.apache.org/viewvc?rev=630310&view=rev
Log:
[DOXIA-225] DocBookParser swallows significant whitespace
Submitted by: Benjamin Bentmann

Modified:
    maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/main/java/org/apache/maven/doxia/module/docbook/DocBookParser.java
    maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/test/java/org/apache/maven/doxia/module/docbook/DocBookParserTest.java

Modified: maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/main/java/org/apache/maven/doxia/module/docbook/DocBookParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/main/java/org/apache/maven/doxia/module/docbook/DocBookParser.java?rev=630310&r1=630309&r2=630310&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/main/java/org/apache/maven/doxia/module/docbook/DocBookParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/main/java/org/apache/maven/doxia/module/docbook/DocBookParser.java Fri Feb 22 12:21:16 2008
@@ -731,14 +731,26 @@
     {
         String text = parser.getText();
 
-        if ( StringUtils.isNotEmpty( text.trim() ) )
+        /*
+         * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
+         * parser so any whitespace that makes it here is significant.
+         */ 
+        if ( StringUtils.isNotEmpty( text ) )
         {
-            // emit separate text events for different lines
-            String[] lines = StringUtils.split( text, EOL );
+            // Emit separate text events for different lines, e.g. the input 
+            // "\nLine1\n\nLine2\n\n" should deliver the event sequence "\n", "Line1\n", "\n", "Line2\n", "\n".
+            // In other words, the concatenation of the text events must deliver the input sequence.
+            // (according to section 2.11 of the XML spec, parsers must normalize line breaks to "\n")
+            String[] lines = text.split( "\n", -1 );
 
-            for ( int i = 0; i < lines.length; i++ )
+            for ( int i = 0; i < lines.length - 1; i++ )
             {
-                sink.text( lines[i] );
+                sink.text( lines[i] + EOL );
+            }
+
+            if ( lines[lines.length - 1].length() > 0 )
+            {
+                sink.text( lines[lines.length - 1] );
             }
         }
     }

Modified: maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/test/java/org/apache/maven/doxia/module/docbook/DocBookParserTest.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/test/java/org/apache/maven/doxia/module/docbook/DocBookParserTest.java?rev=630310&r1=630309&r2=630310&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/test/java/org/apache/maven/doxia/module/docbook/DocBookParserTest.java (original)
+++ maven/doxia/doxia/trunk/doxia-modules/doxia-module-docbook-simple/src/test/java/org/apache/maven/doxia/module/docbook/DocBookParserTest.java Fri Feb 22 12:21:16 2008
@@ -23,10 +23,14 @@
 import java.io.Reader;
 import java.io.Writer;
 
+import java.util.Iterator;
+
 import org.apache.maven.doxia.parser.AbstractParserTest;
 import org.apache.maven.doxia.parser.Parser;
 import org.apache.maven.doxia.parser.ParseException;
 import org.apache.maven.doxia.sink.Sink;
+import org.apache.maven.doxia.sink.SinkEventElement;
+import org.apache.maven.doxia.sink.SinkEventTestingSink;
 
 /**
  * @author <a href="mailto:lars@trieloff.net">Lars Trieloff</a>
@@ -94,5 +98,57 @@
         }
     }
 
+    /** @throws Exception  */
+    public void testSignificantWhiteSpace()
+        throws Exception
+    {
+        // NOTE significant white space
+        String text = "<para><command>word</command> <emphasis>word</emphasis></para>";
+
+        SinkEventTestingSink sink = new SinkEventTestingSink();
+
+        parser.parse( text, sink );
+
+        Iterator it = sink.getEventList().iterator();
+
+        assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "bold", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "text", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "bold_", ( (SinkEventElement) it.next() ).getName() );
+
+        SinkEventElement el = (SinkEventElement) it.next();
+        assertEquals( "text", el.getName() );
+        assertEquals( " ",  (String) el.getArgs()[0] );
 
+        assertEquals( "italic", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "text", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "italic_", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName() );
+        assertFalse( it.hasNext() );
+
+
+        // same test with EOL
+        String EOL = System.getProperty( "line.separator" );
+        text = "<para><command>word</command>" + EOL + "<emphasis>word</emphasis></para>";
+
+        sink.reset();
+        parser.parse( text, sink );
+        it = sink.getEventList().iterator();
+
+        assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "bold", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "text", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "bold_", ( (SinkEventElement) it.next() ).getName() );
+
+        el = (SinkEventElement) it.next();
+        assertEquals( "text", el.getName() );
+        assertEquals( EOL,  (String) el.getArgs()[0] );
+
+        assertEquals( "italic", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "text", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "italic_", ( (SinkEventElement) it.next() ).getName() );
+        assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName() );
+        assertFalse( it.hasNext() );
+
+    }
 }