You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ar...@locus.apache.org on 2000/01/14 21:40:55 UTC
cvs commit: xml-xerces/java/src/org/apache/xml/serialize BaseMarkupSerializer.java ElementState.java HTMLSerializer.java TextSerializer.java XMLSerializer.java

arkin       00/01/14 12:40:55

  Modified:    java/src/org/apache/xml/serialize BaseMarkupSerializer.java
                        ElementState.java HTMLSerializer.java
                        TextSerializer.java XMLSerializer.java
  Log:
  Multiple CDATA sections now print as one.
  Serializer now complains about not setting the writer.
  Fixed indentation in whitespace preserving elements.
  
  Revision  Changes    Path
  1.3       +31 -13    xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java
  
  Index: BaseMarkupSerializer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- BaseMarkupSerializer.java	2000/01/14 02:00:53	1.2
  +++ BaseMarkupSerializer.java	2000/01/14 20:40:54	1.3
  @@ -145,7 +145,7 @@
       /**
        * The writer to which the document is written.
        */
  -    private Writer          _writer;
  +    protected Writer          _writer;
   
   
       /**
  @@ -335,6 +335,8 @@
       public void serialize( Element elem )
           throws IOException
       {
  +	if ( _writer == null )
  +	    throw new IllegalStateException( "No writer supplied for serializer" );
   	try {
   	    startDocument();
   	} catch ( SAXException except ) { }
  @@ -357,6 +359,8 @@
       public void serialize( DocumentFragment frag )
           throws IOException
       {
  +	if ( _writer == null )
  +	    throw new IllegalStateException( "No writer supplied for serializer" );
   	try {
   	    startDocument();
   	} catch ( SAXException except ) { }
  @@ -379,6 +383,8 @@
       public void serialize( Document doc )
           throws IOException
       {
  +	if ( _writer == null )
  +	    throw new IllegalStateException( "No writer supplied for serializer" );
   	try {
   	    startDocument();
   	} catch ( SAXException except ) { }
  @@ -397,7 +403,7 @@
   
       public void characters( char[] chars, int start, int length )
       {
  -	characters( new String( chars, start, length ), false, false );
  +	characters( new String( chars, start, length ), false );
       }
   
   
  @@ -506,7 +512,7 @@
   
   	state = getElementState();
   	if ( state != null )
  -	    state.cdata = true;
  +	    state.doCData = true;
       }
   
   
  @@ -516,7 +522,7 @@
   
   	state = getElementState();
   	if ( state != null )
  -	    state.cdata = false;
  +	    state.doCData = false;
       }
   
   
  @@ -699,6 +705,8 @@
       {
   	// Only works if we're going out of DTD mode.
   	if ( _writer == _dtdWriter ) {
  +System.out.println( "Writer " + _writer );
  +System.out.println( "DocWriter " + _docWriter );
   	    _line.append( _text );
   	    _text = new StringBuffer( 20 );
   	    flushLine( false );
  @@ -729,11 +737,13 @@
   	// handled by SAX are serialized directly.
           switch ( node.getNodeType() ) {
   	case Node.TEXT_NODE :
  -	    characters( node.getNodeValue(), false, false );
  +	    characters( node.getNodeValue(), false );
   	    break;
   
   	case Node.CDATA_SECTION_NODE :
  -	    characters( node.getNodeValue(), true, false );
  +	    startCDATA();
  +	    characters( node.getNodeValue(), false );
  +	    endCDATA();
   	    break;
   
   	case Node.COMMENT_NODE :
  @@ -742,6 +752,7 @@
   
   	case Node.ENTITY_REFERENCE_NODE :
   	    // Entity reference printed directly in text, do not break or pause.
  +	    endCDATA();
   	    content();
   	    printText( '&' + node.getNodeName() + ';' );
   	    break;
  @@ -771,7 +782,7 @@
   		    for ( i = 0 ; i < map.getLength() ; ++i ) {
   			entity = (Entity) map.item( i );
   			unparsedEntityDecl( entity.getNodeName(), entity.getPublicId(),
  -				    entity.getSystemId(), entity.getNotationName() );
  +					    entity.getSystemId(), entity.getNotationName() );
   		    }
   		}
   		map = docType.getNotations();
  @@ -818,6 +829,11 @@
   
   	state = getElementState();
   	if ( state != null ) {
  +	    // Need to close CData section first
  +	    if ( state.inCData && ! state.doCData ) {
  +		printText( "]]>" );
  +		state.inCData = false;
  +	    }
   	    // If this is the first content in the element,
   	    // change the state to not-empty and close the
   	    // opening element tag.
  @@ -842,10 +858,9 @@
        * whether the text is printed as CDATA or unescaped.
        *
        * @param text The text to print
  -     * @param cdata True is should print as CDATA
        * @param unescaped True is should print unescaped
        */
  -    protected void characters( String text, boolean cdata, boolean unescaped )
  +    protected void characters( String text, boolean unescaped )
       {
   	ElementState state;
   
  @@ -854,11 +869,10 @@
   	// based on elements listed in the output format (the element
   	// state) or whether we are inside a CDATA section or entity.
   	if ( state != null ) {
  -	    cdata = cdata || state.cdata;
   	    unescaped = unescaped || state.unescaped;
   	}
   
  -	if ( cdata ) {
  +	if ( state != null && ( state.inCData || state.doCData ) ) {
   	    StringBuffer buffer;
   	    int          index;
   	    int          saveIndent;
  @@ -867,13 +881,17 @@
   	    // appearing in the code must be identified and dealt with.
   	    // The contents of a text node is considered space preserving.
   	    buffer = new StringBuffer( text.length() );
  +	    if ( ! state.inCData ) {
  +		buffer.append( "<![CDATA[" );
  +		state.inCData = true;
  +	    }
   	    index = text.indexOf( "]]>" );
   	    while ( index >= 0 ) {
  -		buffer.append( "<![CDATA[" ).append( text.substring( 0, index + 2 ) ).append( "]]>" );
  +		buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" );
   		text = text.substring( index + 2 );
   		index = text.indexOf( "]]>" );
   	    }
  -	    buffer.append( "<![CDATA[" ).append( text ).append( "]]>" );
  +	    buffer.append( text );
   	    saveIndent = _nextIndent;
   	    _nextIndent = 0;
   	    printText( buffer, true );
  
  
  
  1.2       +7 -1      xml-xerces/java/src/org/apache/xml/serialize/ElementState.java
  
  Index: ElementState.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/ElementState.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- ElementState.java	1999/11/23 22:29:52	1.1
  +++ ElementState.java	2000/01/14 20:40:54	1.2
  @@ -100,7 +100,7 @@
        * True if textual content of current element should be
        * serialized as CDATA section.
        */
  -    boolean cdata;
  +    boolean doCData;
   
   
       /**
  @@ -108,6 +108,12 @@
        * serialized as raw characters (unescaped).
        */
       boolean unescaped;
  +
  +
  +    /**
  +     * True while inside CData and printing text as CData.
  +     */
  +    boolean inCData;
   
   
   }
  
  
  
  1.5       +30 -14    xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java
  
  Index: HTMLSerializer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- HTMLSerializer.java	2000/01/03 19:38:50	1.4
  +++ HTMLSerializer.java	2000/01/14 20:40:54	1.5
  @@ -212,6 +212,8 @@
       {
   	// Do nothing for HTML/XHTML, browser might not respond
   	// well to <?xml ...?>
  +	if ( _writer == null )
  +	    throw new IllegalStateException( "No writer supplied for serializer" );
       }
   
   
  @@ -223,6 +225,9 @@
   	String       name;
   	String       value;
   
  +	if ( _writer == null )
  +	    throw new IllegalStateException( "No writer supplied for serializer" );
  +
   	state = getElementState();
   	if ( state == null ) {
   	    // If this is the root element handle it differently.
  @@ -273,7 +278,7 @@
   		} else {
   		    // HTML: Empty values print as attribute name, no value.
   		    // HTML: URI attributes will print unescaped
  -		    if ( value == null )
  +		    if ( value == null || value.length() == 0 )
   			printText( name );
   		    else if ( HTMLdtd.isURI( tagName, name ) )
   			printText( name + "=\"" + escapeURI( value ) + '"' );
  @@ -295,12 +300,13 @@
   	// unescaped (HTML).
   	if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
   	     tagName.equalsIgnoreCase( "STYLE" ) ) {
  -	    if ( _xhtml )
  +	    if ( _xhtml ) {
   		// XHTML: Print contents as CDATA section
  -		getElementState().cdata = true;
  -	    else
  +		getElementState().doCData = true;
  +	    } else {
   		// HTML: Print contents unescaped
   		getElementState().unescaped = true;
  +	    }
   	}
       }
   
  @@ -315,11 +321,15 @@
   	unindent();
   	state = getElementState();
   	if ( _xhtml) {
  -	    if ( state.empty )
  +	    if ( state.empty ) {
   		printText( " />" );
  -	    else
  +	    } else {
  +		// Must leave CData section first
  +		if ( state.inCData )
  +		    printText( "]]>" );
   		// XHTML: element names are lower case, DOM will be different
   		printText( "</" + tagName.toLowerCase() + ">" );
  +	    }
   	} else {
   	    if ( state.empty )
   		printText( ">" );
  @@ -332,6 +342,9 @@
   		if ( ! tagName.equalsIgnoreCase( "A" )  && _format.getIndenting() &&
   		     ! state.preserveSpace && state.afterElement )
   		    breakLine();
  +		// Must leave CData section first (Illegal in HTML, but still)
  +		if ( state.inCData )
  +		    printText( "]]>" );
   		printText( "</" + tagName + ">" );
   	    }
   	}
  @@ -496,7 +509,7 @@
   		    } else {
   			// HTML: Empty values print as attribute name, no value.
   			// HTML: URI attributes will print unescaped
  -			if ( value == null )
  +			if ( value == null || value.length() == 0 )
   			    printText( name );
   			else if ( HTMLdtd.isURI( tagName, name ) )
   			    printText( name + "=\"" + escapeURI( value ) + '"' );
  @@ -521,12 +534,13 @@
   	    // unescaped (HTML).
   	    if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
   		 tagName.equalsIgnoreCase( "STYLE" ) ) {
  -		if ( _xhtml )
  +		if ( _xhtml ) {
   		    // XHTML: Print contents as CDATA section
  -		    getElementState().cdata = true;
  -		else
  +		    getElementState().doCData = true;
  +		} else {
   		    // HTML: Print contents unescaped
   		    getElementState().unescaped = true;
  +		}
   	    }
   
   	    child = elem.getFirstChild();
  @@ -552,7 +566,8 @@
       }
   
   
  -    protected void characters( String text, boolean cdata, boolean unescaped )
  +    /*
  +    protected void characters( String text, boolean unescaped )
       {
   	ElementState state;
   
  @@ -567,8 +582,9 @@
   	    else
   		super.characters( text, false, true );
   	} else
  -	    super.characters( text, cdata, unescaped );
  +	    super.characters( text, unescaped );
       }
  +    */
   
   
       protected String getEntityRef( char ch )
  @@ -581,8 +597,8 @@
       {
   	int index;
   
  -	// Apparently Netscape doesn't like if we escape the URI
  -	// using %nn, so we leave it as is, just remove any quotes.
  +	// XXX  Apparently Netscape doesn't like if we escape the URI
  +	//      using %nn, so we leave it as is, just remove any quotes.
   	index = uri.indexOf( "\"" );
   	if ( index >= 0 )
   	    return uri.substring( 0, index );
  
  
  
  1.3       +16 -16    xml-xerces/java/src/org/apache/xml/serialize/TextSerializer.java
  
  Index: TextSerializer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/TextSerializer.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TextSerializer.java	2000/01/03 20:45:18	1.2
  +++ TextSerializer.java	2000/01/14 20:40:54	1.3
  @@ -86,12 +86,6 @@
    * will not throw an exception directly, but only throw it
    * at the end of serializing (either DOM or SAX's {@link
    * org.xml.sax.DocumentHandler#endDocument}.
  - * <p>
  - * For elements that are not specified as whitespace preserving,
  - * the serializer will potentially break long text lines at space
  - * boundaries, indent lines, and serialize elements on separate
  - * lines. Line terminators will be regarded as spaces, and
  - * spaces at beginning of line will be stripped.
    *
    *
    * @version
  @@ -202,8 +196,6 @@
   	// Do not change the current element state yet.
   	// This only happens in endElement().
   
  -	indent();
  -
   	// Ignore all other attributes of the element, only printing
   	// its contents.
   
  @@ -221,7 +213,6 @@
   	// Works much like content() with additions for closing
   	// an element. Note the different checks for the closed
   	// element's state and the parent element's state.
  -	unindent();
   	state = getElementState();
   	// Leave the element state and update that of the parent
   	// (if we're not root) to not empty and after element.
  @@ -251,10 +242,22 @@
       {
       }
   
  +
  +    public void characters( char[] chars, int start, int length )
  +    {
  +	characters( new String( chars, start, length ), false );
  +    }
  +
   
  -    protected void characters( String text, boolean cdata, boolean unescaped )
  +    protected void characters( String text, boolean unescaped )
       {
  -	super.characters( text, false, true );
  +	ElementState state;
  +
  +	state = content();
  +	if ( state != null ) {
  +	    state.doCData = state.inCData = false;
  +	}
  +	printText( text, true );
       }
   
   
  @@ -314,8 +317,6 @@
   	// Do not change the current element state yet.
   	// This only happens in endElement().
   
  -	indent();
  -
   	// Ignore all other attributes of the element, only printing
   	// its contents.
   
  @@ -332,7 +333,6 @@
   	    }
   	    endElement( tagName );
   	} else {
  -	    unindent();
   	    if ( state != null ) {
   		// After element but parent element is no longer empty.
   		state.afterElement = true;
  @@ -354,11 +354,11 @@
   	// handled by SAX are serialized directly.
           switch ( node.getNodeType() ) {
   	case Node.TEXT_NODE :
  -	    characters( node.getNodeValue(), false, true );
  +	    characters( node.getNodeValue(), true );
   	    break;
   
   	case Node.CDATA_SECTION_NODE :
  -	    characters( node.getNodeValue(), false, true );
  +	    characters( node.getNodeValue(), true );
   	    break;
   
   	case Node.COMMENT_NODE :
  
  
  
  1.5       +10 -2     xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java
  
  Index: XMLSerializer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XMLSerializer.java	2000/01/03 19:38:50	1.4
  +++ XMLSerializer.java	2000/01/14 20:40:55	1.5
  @@ -176,6 +176,8 @@
   
       public void startDocument()
       {
  +	if ( _writer == null )
  +	    throw new IllegalStateException( "No writer supplied for serializer" );
   	// Nothing to do here. All the magic happens in startDocument(String)
       }
   
  @@ -188,6 +190,9 @@
   	String       name;
   	String       value;
   
  +	if ( _writer == null )
  +	    throw new IllegalStateException( "No writer supplied for serializer" );
  +
   	state = getElementState();
   	if ( state == null ) {
   	    // If this is the root element handle it differently.
  @@ -244,7 +249,7 @@
   	// with the tag name and space preserving.
   	// We still do not change the curent element state.
   	state = enterElementState( tagName, preserveSpace );
  -	state.cdata = _format.isCDataElement( tagName );
  +	state.doCData = _format.isCDataElement( tagName );
   	state.unescaped = _format.isNonEscapingElement( tagName );
       }
   
  @@ -261,6 +266,9 @@
   	if ( state.empty ) {
   	    printText( "/>" );
   	} else {
  +	    // Must leave CData section first
  +	    if ( state.inCData )
  +		printText( "]]>" );
   	    // This element is not empty and that last content was
   	    // another element, so print a line break before that
   	    // last element and this element's closing tag.
  @@ -457,7 +465,7 @@
   	    // Enter an element state, and serialize the children
   	    // one by one. Finally, end the element.
   	    state = enterElementState( tagName, preserveSpace );
  -	    state.cdata = _format.isCDataElement( tagName );
  +	    state.doCData = _format.isCDataElement( tagName );
   	    state.unescaped = _format.isNonEscapingElement( tagName );
   	    child = elem.getFirstChild();
   	    while ( child != null ) {