You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ar...@locus.apache.org on 2000/01/14 21:40:55 UTC
cvs commit: xml-xerces/java/src/org/apache/xml/serialize BaseMarkupSerializer.java ElementState.java HTMLSerializer.java TextSerializer.java XMLSerializer.java
arkin 00/01/14 12:40:55
Modified: java/src/org/apache/xml/serialize BaseMarkupSerializer.java
ElementState.java HTMLSerializer.java
TextSerializer.java XMLSerializer.java
Log:
Multiple CDATA sections now print as one.
Serializer now complains about not setting the writer.
Fixed indentation in whitespace preserving elements.
Revision Changes Path
1.3 +31 -13 xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java
Index: BaseMarkupSerializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- BaseMarkupSerializer.java 2000/01/14 02:00:53 1.2
+++ BaseMarkupSerializer.java 2000/01/14 20:40:54 1.3
@@ -145,7 +145,7 @@
/**
* The writer to which the document is written.
*/
- private Writer _writer;
+ protected Writer _writer;
/**
@@ -335,6 +335,8 @@
public void serialize( Element elem )
throws IOException
{
+ if ( _writer == null )
+ throw new IllegalStateException( "No writer supplied for serializer" );
try {
startDocument();
} catch ( SAXException except ) { }
@@ -357,6 +359,8 @@
public void serialize( DocumentFragment frag )
throws IOException
{
+ if ( _writer == null )
+ throw new IllegalStateException( "No writer supplied for serializer" );
try {
startDocument();
} catch ( SAXException except ) { }
@@ -379,6 +383,8 @@
public void serialize( Document doc )
throws IOException
{
+ if ( _writer == null )
+ throw new IllegalStateException( "No writer supplied for serializer" );
try {
startDocument();
} catch ( SAXException except ) { }
@@ -397,7 +403,7 @@
public void characters( char[] chars, int start, int length )
{
- characters( new String( chars, start, length ), false, false );
+ characters( new String( chars, start, length ), false );
}
@@ -506,7 +512,7 @@
state = getElementState();
if ( state != null )
- state.cdata = true;
+ state.doCData = true;
}
@@ -516,7 +522,7 @@
state = getElementState();
if ( state != null )
- state.cdata = false;
+ state.doCData = false;
}
@@ -699,6 +705,8 @@
{
// Only works if we're going out of DTD mode.
if ( _writer == _dtdWriter ) {
+System.out.println( "Writer " + _writer );
+System.out.println( "DocWriter " + _docWriter );
_line.append( _text );
_text = new StringBuffer( 20 );
flushLine( false );
@@ -729,11 +737,13 @@
// handled by SAX are serialized directly.
switch ( node.getNodeType() ) {
case Node.TEXT_NODE :
- characters( node.getNodeValue(), false, false );
+ characters( node.getNodeValue(), false );
break;
case Node.CDATA_SECTION_NODE :
- characters( node.getNodeValue(), true, false );
+ startCDATA();
+ characters( node.getNodeValue(), false );
+ endCDATA();
break;
case Node.COMMENT_NODE :
@@ -742,6 +752,7 @@
case Node.ENTITY_REFERENCE_NODE :
// Entity reference printed directly in text, do not break or pause.
+ endCDATA();
content();
printText( '&' + node.getNodeName() + ';' );
break;
@@ -771,7 +782,7 @@
for ( i = 0 ; i < map.getLength() ; ++i ) {
entity = (Entity) map.item( i );
unparsedEntityDecl( entity.getNodeName(), entity.getPublicId(),
- entity.getSystemId(), entity.getNotationName() );
+ entity.getSystemId(), entity.getNotationName() );
}
}
map = docType.getNotations();
@@ -818,6 +829,11 @@
state = getElementState();
if ( state != null ) {
+ // Need to close CData section first
+ if ( state.inCData && ! state.doCData ) {
+ printText( "]]>" );
+ state.inCData = false;
+ }
// If this is the first content in the element,
// change the state to not-empty and close the
// opening element tag.
@@ -842,10 +858,9 @@
* whether the text is printed as CDATA or unescaped.
*
* @param text The text to print
- * @param cdata True is should print as CDATA
* @param unescaped True is should print unescaped
*/
- protected void characters( String text, boolean cdata, boolean unescaped )
+ protected void characters( String text, boolean unescaped )
{
ElementState state;
@@ -854,11 +869,10 @@
// based on elements listed in the output format (the element
// state) or whether we are inside a CDATA section or entity.
if ( state != null ) {
- cdata = cdata || state.cdata;
unescaped = unescaped || state.unescaped;
}
- if ( cdata ) {
+ if ( state != null && ( state.inCData || state.doCData ) ) {
StringBuffer buffer;
int index;
int saveIndent;
@@ -867,13 +881,17 @@
// appearing in the code must be identified and dealt with.
// The contents of a text node is considered space preserving.
buffer = new StringBuffer( text.length() );
+ if ( ! state.inCData ) {
+ buffer.append( "<![CDATA[" );
+ state.inCData = true;
+ }
index = text.indexOf( "]]>" );
while ( index >= 0 ) {
- buffer.append( "<![CDATA[" ).append( text.substring( 0, index + 2 ) ).append( "]]>" );
+ buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" );
text = text.substring( index + 2 );
index = text.indexOf( "]]>" );
}
- buffer.append( "<![CDATA[" ).append( text ).append( "]]>" );
+ buffer.append( text );
saveIndent = _nextIndent;
_nextIndent = 0;
printText( buffer, true );
1.2 +7 -1 xml-xerces/java/src/org/apache/xml/serialize/ElementState.java
Index: ElementState.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/ElementState.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- ElementState.java 1999/11/23 22:29:52 1.1
+++ ElementState.java 2000/01/14 20:40:54 1.2
@@ -100,7 +100,7 @@
* True if textual content of current element should be
* serialized as CDATA section.
*/
- boolean cdata;
+ boolean doCData;
/**
@@ -108,6 +108,12 @@
* serialized as raw characters (unescaped).
*/
boolean unescaped;
+
+
+ /**
+ * True while inside CData and printing text as CData.
+ */
+ boolean inCData;
}
1.5 +30 -14 xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java
Index: HTMLSerializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- HTMLSerializer.java 2000/01/03 19:38:50 1.4
+++ HTMLSerializer.java 2000/01/14 20:40:54 1.5
@@ -212,6 +212,8 @@
{
// Do nothing for HTML/XHTML, browser might not respond
// well to <?xml ...?>
+ if ( _writer == null )
+ throw new IllegalStateException( "No writer supplied for serializer" );
}
@@ -223,6 +225,9 @@
String name;
String value;
+ if ( _writer == null )
+ throw new IllegalStateException( "No writer supplied for serializer" );
+
state = getElementState();
if ( state == null ) {
// If this is the root element handle it differently.
@@ -273,7 +278,7 @@
} else {
// HTML: Empty values print as attribute name, no value.
// HTML: URI attributes will print unescaped
- if ( value == null )
+ if ( value == null || value.length() == 0 )
printText( name );
else if ( HTMLdtd.isURI( tagName, name ) )
printText( name + "=\"" + escapeURI( value ) + '"' );
@@ -295,12 +300,13 @@
// unescaped (HTML).
if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
tagName.equalsIgnoreCase( "STYLE" ) ) {
- if ( _xhtml )
+ if ( _xhtml ) {
// XHTML: Print contents as CDATA section
- getElementState().cdata = true;
- else
+ getElementState().doCData = true;
+ } else {
// HTML: Print contents unescaped
getElementState().unescaped = true;
+ }
}
}
@@ -315,11 +321,15 @@
unindent();
state = getElementState();
if ( _xhtml) {
- if ( state.empty )
+ if ( state.empty ) {
printText( " />" );
- else
+ } else {
+ // Must leave CData section first
+ if ( state.inCData )
+ printText( "]]>" );
// XHTML: element names are lower case, DOM will be different
printText( "</" + tagName.toLowerCase() + ">" );
+ }
} else {
if ( state.empty )
printText( ">" );
@@ -332,6 +342,9 @@
if ( ! tagName.equalsIgnoreCase( "A" ) && _format.getIndenting() &&
! state.preserveSpace && state.afterElement )
breakLine();
+ // Must leave CData section first (Illegal in HTML, but still)
+ if ( state.inCData )
+ printText( "]]>" );
printText( "</" + tagName + ">" );
}
}
@@ -496,7 +509,7 @@
} else {
// HTML: Empty values print as attribute name, no value.
// HTML: URI attributes will print unescaped
- if ( value == null )
+ if ( value == null || value.length() == 0 )
printText( name );
else if ( HTMLdtd.isURI( tagName, name ) )
printText( name + "=\"" + escapeURI( value ) + '"' );
@@ -521,12 +534,13 @@
// unescaped (HTML).
if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
tagName.equalsIgnoreCase( "STYLE" ) ) {
- if ( _xhtml )
+ if ( _xhtml ) {
// XHTML: Print contents as CDATA section
- getElementState().cdata = true;
- else
+ getElementState().doCData = true;
+ } else {
// HTML: Print contents unescaped
getElementState().unescaped = true;
+ }
}
child = elem.getFirstChild();
@@ -552,7 +566,8 @@
}
- protected void characters( String text, boolean cdata, boolean unescaped )
+ /*
+ protected void characters( String text, boolean unescaped )
{
ElementState state;
@@ -567,8 +582,9 @@
else
super.characters( text, false, true );
} else
- super.characters( text, cdata, unescaped );
+ super.characters( text, unescaped );
}
+ */
protected String getEntityRef( char ch )
@@ -581,8 +597,8 @@
{
int index;
- // Apparently Netscape doesn't like if we escape the URI
- // using %nn, so we leave it as is, just remove any quotes.
+ // XXX Apparently Netscape doesn't like if we escape the URI
+ // using %nn, so we leave it as is, just remove any quotes.
index = uri.indexOf( "\"" );
if ( index >= 0 )
return uri.substring( 0, index );
1.3 +16 -16 xml-xerces/java/src/org/apache/xml/serialize/TextSerializer.java
Index: TextSerializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/TextSerializer.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- TextSerializer.java 2000/01/03 20:45:18 1.2
+++ TextSerializer.java 2000/01/14 20:40:54 1.3
@@ -86,12 +86,6 @@
* will not throw an exception directly, but only throw it
* at the end of serializing (either DOM or SAX's {@link
* org.xml.sax.DocumentHandler#endDocument}.
- * <p>
- * For elements that are not specified as whitespace preserving,
- * the serializer will potentially break long text lines at space
- * boundaries, indent lines, and serialize elements on separate
- * lines. Line terminators will be regarded as spaces, and
- * spaces at beginning of line will be stripped.
*
*
* @version
@@ -202,8 +196,6 @@
// Do not change the current element state yet.
// This only happens in endElement().
- indent();
-
// Ignore all other attributes of the element, only printing
// its contents.
@@ -221,7 +213,6 @@
// Works much like content() with additions for closing
// an element. Note the different checks for the closed
// element's state and the parent element's state.
- unindent();
state = getElementState();
// Leave the element state and update that of the parent
// (if we're not root) to not empty and after element.
@@ -251,10 +242,22 @@
{
}
+
+ public void characters( char[] chars, int start, int length )
+ {
+ characters( new String( chars, start, length ), false );
+ }
+
- protected void characters( String text, boolean cdata, boolean unescaped )
+ protected void characters( String text, boolean unescaped )
{
- super.characters( text, false, true );
+ ElementState state;
+
+ state = content();
+ if ( state != null ) {
+ state.doCData = state.inCData = false;
+ }
+ printText( text, true );
}
@@ -314,8 +317,6 @@
// Do not change the current element state yet.
// This only happens in endElement().
- indent();
-
// Ignore all other attributes of the element, only printing
// its contents.
@@ -332,7 +333,6 @@
}
endElement( tagName );
} else {
- unindent();
if ( state != null ) {
// After element but parent element is no longer empty.
state.afterElement = true;
@@ -354,11 +354,11 @@
// handled by SAX are serialized directly.
switch ( node.getNodeType() ) {
case Node.TEXT_NODE :
- characters( node.getNodeValue(), false, true );
+ characters( node.getNodeValue(), true );
break;
case Node.CDATA_SECTION_NODE :
- characters( node.getNodeValue(), false, true );
+ characters( node.getNodeValue(), true );
break;
case Node.COMMENT_NODE :
1.5 +10 -2 xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java
Index: XMLSerializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- XMLSerializer.java 2000/01/03 19:38:50 1.4
+++ XMLSerializer.java 2000/01/14 20:40:55 1.5
@@ -176,6 +176,8 @@
public void startDocument()
{
+ if ( _writer == null )
+ throw new IllegalStateException( "No writer supplied for serializer" );
// Nothing to do here. All the magic happens in startDocument(String)
}
@@ -188,6 +190,9 @@
String name;
String value;
+ if ( _writer == null )
+ throw new IllegalStateException( "No writer supplied for serializer" );
+
state = getElementState();
if ( state == null ) {
// If this is the root element handle it differently.
@@ -244,7 +249,7 @@
// with the tag name and space preserving.
// We still do not change the curent element state.
state = enterElementState( tagName, preserveSpace );
- state.cdata = _format.isCDataElement( tagName );
+ state.doCData = _format.isCDataElement( tagName );
state.unescaped = _format.isNonEscapingElement( tagName );
}
@@ -261,6 +266,9 @@
if ( state.empty ) {
printText( "/>" );
} else {
+ // Must leave CData section first
+ if ( state.inCData )
+ printText( "]]>" );
// This element is not empty and that last content was
// another element, so print a line break before that
// last element and this element's closing tag.
@@ -457,7 +465,7 @@
// Enter an element state, and serialize the children
// one by one. Finally, end the element.
state = enterElementState( tagName, preserveSpace );
- state.cdata = _format.isCDataElement( tagName );
+ state.doCData = _format.isCDataElement( tagName );
state.unescaped = _format.isNonEscapingElement( tagName );
child = elem.getFirstChild();
while ( child != null ) {