You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by tc...@apache.org on 2003/11/19 02:48:12 UTC
cvs commit: cocoon-2.1/src/test/org/apache/cocoon/xml AbstractXMLTestCase.java SaxBufferTestCase.java
tcurdt 2003/11/18 17:48:12
Modified: src/java/org/apache/cocoon/components/sax
XMLByteStreamCompiler.java
XMLByteStreamInterpreter.java
src/test/org/apache/cocoon/components/sax
XMLByteStreamCompilerInterpreterTestCase.java
src/test/org/apache/cocoon/xml AbstractXMLTestCase.java
SaxBufferTestCase.java
Log:
added support for character events longer than 0x7FFF,
removed some array creation and some more optimization
Revision Changes Path
1.4 +128 -68 cocoon-2.1/src/java/org/apache/cocoon/components/sax/XMLByteStreamCompiler.java
Index: XMLByteStreamCompiler.java
===================================================================
RCS file: /home/cvs/cocoon-2.1/src/java/org/apache/cocoon/components/sax/XMLByteStreamCompiler.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- XMLByteStreamCompiler.java 7 Nov 2003 14:28:27 -0000 1.3
+++ XMLByteStreamCompiler.java 19 Nov 2003 01:48:12 -0000 1.4
@@ -63,14 +63,14 @@
*
* @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
* @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
+ * @author <a href="mailto:tcurdt@apache.org">Torsten Curdt</a>
* @version CVS $Id$
*/
-public final class XMLByteStreamCompiler
-implements XMLSerializer, Recyclable {
+public final class XMLByteStreamCompiler implements XMLSerializer, Recyclable {
private HashMap map;
- private int count;
+ private int mapCount;
/** The buffer for the compile xml byte stream. */
private byte buf[];
@@ -78,15 +78,19 @@
/** The number of valid bytes in the buffer. */
private int bufCount;
+ private int bufCountAverage;
+
+
public XMLByteStreamCompiler() {
this.map = new HashMap();
+ this.bufCountAverage = 2000;
this.initOutput();
}
private void initOutput() {
- this.count = 0;
+ this.mapCount = 0;
this.map.clear();
- this.buf = new byte[2000];
+ this.buf = new byte[bufCountAverage];
this.buf[0] = (byte)'C';
this.buf[1] = (byte)'X';
this.buf[2] = (byte)'M';
@@ -97,6 +101,7 @@
}
public void recycle() {
+ bufCountAverage = (bufCountAverage + bufCount) / 2;
this.initOutput();
}
@@ -121,12 +126,18 @@
public Object getSAXFragment() {
- if ( this.bufCount == 6) { // no event arrived yet
+ if (this.bufCount == 6) { // no event arrived yet
return null;
}
+ /*
+ TC:
+ not nececcary since we create a new buffer on each recyle()
+
byte newbuf[] = new byte[this.bufCount];
System.arraycopy(this.buf, 0, newbuf, 0, this.bufCount);
return newbuf;
+ */
+ return buf;
}
public void startDocument() throws SAXException {
@@ -137,8 +148,7 @@
this.writeEvent(END_DOCUMENT);
}
- public void startPrefixMapping(java.lang.String prefix, java.lang.String uri)
- throws SAXException {
+ public void startPrefixMapping(java.lang.String prefix, java.lang.String uri) throws SAXException {
this.writeEvent(START_PREFIX_MAPPING);
this.writeString(prefix);
this.writeString(uri);
@@ -149,8 +159,7 @@
this.writeString(prefix);
}
- public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
- throws SAXException {
+ public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
int length = atts.getLength();
this.writeEvent(START_ELEMENT);
this.writeAttributes(length);
@@ -166,28 +175,24 @@
this.writeString(qName);
}
- public void endElement(String namespaceURI, String localName, String qName)
- throws SAXException {
+ public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
this.writeEvent(END_ELEMENT);
this.writeString((namespaceURI == null ? "" : namespaceURI));
this.writeString(localName);
this.writeString(qName);
}
- public void characters(char[] ch, int start, int length)
- throws SAXException {
+ public void characters(char[] ch, int start, int length) throws SAXException {
this.writeEvent(CHARACTERS);
this.writeChars(ch, start, length);
}
- public void ignorableWhitespace(char[] ch, int start, int length)
- throws SAXException {
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
this.writeEvent(IGNORABLE_WHITESPACE);
this.writeChars(ch, start, length);
}
- public void processingInstruction(String target, String data)
- throws SAXException {
+ public void processingInstruction(String target, String data) throws SAXException {
this.writeEvent(PROCESSING_INSTRUCTION);
this.writeString(target);
this.writeString(data);
@@ -215,8 +220,7 @@
/**
* SAX Event Handling: LexicalHandler
*/
- public void startDTD(String name, String publicId, String systemId)
- throws SAXException {
+ public void startDTD(String name, String publicId, String systemId) throws SAXException {
this.writeEvent(START_DTD);
this.writeString(name);
this.writeString(publicId!=null?publicId:"");
@@ -264,8 +268,7 @@
/**
* SAX Event Handling: LexicalHandler
*/
- public void comment(char ary[], int start, int length)
- throws SAXException {
+ public void comment(char ary[], int start, int length) throws SAXException {
try {
this.writeEvent(COMMENT);
this.writeChars(ary, start, length);
@@ -274,96 +277,153 @@
}
}
- public final void writeEvent(int event) throws SAXException {
+ public final void writeEvent( final int event) throws SAXException {
this.write(event);
}
- public final void writeAttributes(int attributes) throws SAXException {
+ public final void writeAttributes( final int attributes) throws SAXException {
+ if (attributes > 0xFFFF) throw new SAXException("Too many attributes");
this.write((attributes >>> 8) & 0xFF);
this.write((attributes >>> 0) & 0xFF);
}
- public final void writeString(String str) throws SAXException {
+ public final void writeString( final String str) throws SAXException {
Integer index = (Integer) map.get(str);
if (index == null) {
+ map.put(str, new Integer(mapCount++));
int length = str.length();
- map.put(str, new Integer(count++));
this.writeChars(str.toCharArray(), 0, length);
- } else {
+ }
+ else {
int i = index.intValue();
+
+ if (i > 0xFFFF) throw new SAXException("Index too large");
+
this.write(((i >>> 8) & 0xFF) | 0x80);
this.write((i >>> 0) & 0xFF);
}
}
- public final void writeChars(char[] ch, int start, int length)
- throws SAXException {
+ public final void writeChars( final char[] ch, final int start, final int length) throws SAXException {
int utflen = 0;
- int c, count = 0;
+ int c;
for (int i = 0; i < length; i++) {
c = ch[i + start];
if ((c >= 0x0001) && (c <= 0x007F)) {
utflen++;
- } else if (c > 0x07FF) {
+ }
+ else if (c > 0x07FF) {
utflen += 3;
- } else {
+ }
+ else {
utflen += 2;
}
}
- if (utflen > 0x00007FFF) {
- // handling "UTFDataFormatException: String cannot be longer than 32k."
- int split = length / 2;
- writeChars(ch, start, length - split);
- writeEvent(CHARACTERS);
- writeChars(ch, start + length - split, split);
- return;
+ if (utflen >= 0x00007FFF) {
+ assure(bufCount + utflen + 6);
+
+ buf[bufCount++] = (byte)0x7F;
+ buf[bufCount++] = (byte)0xFF;
+
+ buf[bufCount++] = (byte) ((utflen >>> 24) & 0xFF);
+ buf[bufCount++] = (byte) ((utflen >>> 16) & 0xFF);
+ buf[bufCount++] = (byte) ((utflen >>> 8) & 0xFF);
+ buf[bufCount++] = (byte) ((utflen >>> 0) & 0xFF);
+ }
+ else {
+ assure(bufCount + utflen + 2);
+
+ buf[bufCount++] = (byte) ((utflen >>> 8) & 0xFF);
+ buf[bufCount++] = (byte) ((utflen >>> 0) & 0xFF);
}
- byte[] bytearr = new byte[utflen+2];
- bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
- bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);
for (int i = 0; i < length; i++) {
c = ch[i + start];
if ((c >= 0x0001) && (c <= 0x007F)) {
- bytearr[count++] = (byte) c;
- } else if (c > 0x07FF) {
- bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
- bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
- bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
- } else {
- bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
- bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+ buf[bufCount++] = (byte) c;
+ }
+ else if (c > 0x07FF) {
+ buf[bufCount++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
+ buf[bufCount++] = (byte) (0x80 | ((c >> 6) & 0x3F));
+ buf[bufCount++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+ }
+ else {
+ buf[bufCount++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
+ buf[bufCount++] = (byte) (0x80 | ((c >> 0) & 0x3F));
}
}
- this.write(bytearr);
- }
- private void write(byte[] b) {
- int len = b.length;
- if (len == 0) return;
- int newcount = this.bufCount + len;
- if (newcount > this.buf.length) {
- byte newbuf[] = new byte[Math.max(this.buf.length << 1, newcount)];
- System.arraycopy(this.buf, 0, newbuf, 0, this.bufCount);
- this.buf = newbuf;
+/*
+ if (length == 0) return;
+
+ assure( (int) (buf.length + length * utfRatioAverage) );
+
+ int utflentotal = 0;
+
+ bufCount += 2;
+ int bufStart = bufCount;
+
+ for (int i = 0; i < length; i++) {
+ int c = ch[i + start];
+ int l = bufCount-bufStart;
+
+ if (l+3 >= 0x7FFF) {
+ buf[bufStart-2] = (byte) ((l >>> 8) & 0xFF);
+ buf[bufStart-1] = (byte) ((l >>> 0) & 0xFF);
+ utflentotal += l;
+ bufCount += 2;
+ bufStart = bufCount;
+ }
+
+ if ((c >= 0x0001) && (c <= 0x007F)) {
+ assure(bufCount+1);
+ buf[bufCount++] = (byte)c;
+ }
+ else if (c > 0x07FF) {
+ assure(bufCount+3);
+ buf[bufCount++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
+ buf[bufCount++] = (byte) (0x80 | ((c >> 6) & 0x3F));
+ buf[bufCount++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+ }
+ else {
+ assure(bufCount+2);
+ buf[bufCount++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
+ buf[bufCount++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+ }
}
- System.arraycopy(b, 0, this.buf, this.bufCount, len);
+
+ int l = bufCount-bufStart;
+ buf[bufStart-2] = (byte) ((l >>> 8) & 0xFF);
+ buf[bufStart-1] = (byte) ((l >>> 0) & 0xFF);
+ utflentotal += l;
+
+ utfRatioAverage = (utfRatioAverage + (utflentotal / length) / 2);
+*/
+ }
+
+ private void write( final byte[] b ) {
+ int newcount = this.bufCount + b.length;
+ assure(newcount);
+ System.arraycopy(b, 0, this.buf, this.bufCount, b.length);
this.bufCount = newcount;
}
- private void write(int b) {
+ private void write( final int b ) {
int newcount = this.bufCount + 1;
- if (newcount > this.buf.length) {
- byte newbuf[] = new byte[Math.max(this.buf.length << 1, newcount)];
- System.arraycopy(this.buf, 0, newbuf, 0, this.bufCount);
- this.buf = newbuf;
- }
+ assure(newcount);
this.buf[this.bufCount] = (byte)b;
this.bufCount = newcount;
}
+ private void assure( final int size ) {
+ if (size > this.buf.length) {
+ byte newbuf[] = new byte[Math.max(this.buf.length << 1, size)];
+ System.arraycopy(this.buf, 0, newbuf, 0, this.bufCount);
+ this.buf = newbuf;
+ }
+ }
}
1.5 +26 -13 cocoon-2.1/src/java/org/apache/cocoon/components/sax/XMLByteStreamInterpreter.java
Index: XMLByteStreamInterpreter.java
===================================================================
RCS file: /home/cvs/cocoon-2.1/src/java/org/apache/cocoon/components/sax/XMLByteStreamInterpreter.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- XMLByteStreamInterpreter.java 24 Sep 2003 21:26:51 -0000 1.4
+++ XMLByteStreamInterpreter.java 19 Nov 2003 01:48:12 -0000 1.5
@@ -62,12 +62,11 @@
*
* @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
* @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
+ * @author <a href="mailto:tcurdt@apache.org">Torsten Curdt</a>
* @version CVS $Id$
*/
-public final class XMLByteStreamInterpreter
-extends AbstractXMLProducer
-implements XMLDeserializer, Recyclable {
+public final class XMLByteStreamInterpreter extends AbstractXMLProducer implements XMLDeserializer, Recyclable {
private static final int START_DOCUMENT = 0;
private static final int END_DOCUMENT = 1;
@@ -98,8 +97,7 @@
this.input = null;
}
- public void deserialize(Object saxFragment)
- throws SAXException {
+ public void deserialize(Object saxFragment) throws SAXException {
if (!(saxFragment instanceof byte[])) {
throw new SAXException("XMLDeserializer needs byte array for deserialization.");
}
@@ -224,12 +222,16 @@
}
private String readString() throws SAXException {
- int length = this.readLength();
+ int length = this.readWord();
int index = length & 0x00007FFF;
if (length >= 0x00008000) {
return (String) list.get(index);
- } else {
- char[] chars = this.readChars(index);
+ }
+ else {
+ if (length == 0x00007FFF) {
+ length = this.readLong();
+ }
+ char[] chars = this.readChars(length);
int len = chars.length;
if (len > 0) {
while (chars[len-1]==0) len--;
@@ -250,7 +252,11 @@
* at the end
*/
private char[] readChars() throws SAXException {
- return this.readChars(this.readLength());
+ int length = this.readWord();
+ if (length == 0x00007FFF) {
+ length = this.readLong();
+ }
+ return this.readChars(length);
}
private int read() throws SAXException {
@@ -302,8 +308,7 @@
return str;
}
- private void readBytes(byte[] b)
- throws SAXException {
+ private void readBytes(byte[] b) throws SAXException {
if (this.currentPos + b.length > this.input.length) {
// TC:
// >= prevents getting the last byte
@@ -319,9 +324,17 @@
this.currentPos += b.length;
}
- private int readLength() throws SAXException {
+ private int readWord() throws SAXException {
int ch1 = this.read();
int ch2 = this.read();
return ((ch1 << 8) + (ch2 << 0));
+ }
+
+ private int readLong() throws SAXException {
+ int ch1 = this.read();
+ int ch2 = this.read();
+ int ch3 = this.read();
+ int ch4 = this.read();
+ return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
}
}
1.2 +8 -7 cocoon-2.1/src/test/org/apache/cocoon/components/sax/XMLByteStreamCompilerInterpreterTestCase.java
Index: XMLByteStreamCompilerInterpreterTestCase.java
===================================================================
RCS file: /home/cvs/cocoon-2.1/src/test/org/apache/cocoon/components/sax/XMLByteStreamCompilerInterpreterTestCase.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- XMLByteStreamCompilerInterpreterTestCase.java 17 Nov 2003 01:01:02 -0000 1.1
+++ XMLByteStreamCompilerInterpreterTestCase.java 19 Nov 2003 01:48:12 -0000 1.2
@@ -63,6 +63,7 @@
* Testcase for XMLByteStreamCompiler and Interpreter
*
* @author <a href="mailto:tcurdt@apache.org">Torsten Curdt</a>
+ * @version
*/
public final class XMLByteStreamCompilerInterpreterTestCase extends AbstractXMLTestCase {
@@ -73,11 +74,11 @@
public void testCompareDOM() throws Exception {
// reference
DOMBuilder in = new DOMBuilder();
- generateSAX(in);
+ generateLargeSAX(in);
// capture events
XMLByteStreamCompiler xmlc = new XMLByteStreamCompiler();
- generateSAX(xmlc);
+ generateLargeSAX(xmlc);
// recall events and build a DOM from it
XMLByteStreamInterpreter xmli = new XMLByteStreamInterpreter();
@@ -92,7 +93,7 @@
public void testCompareByteArray() throws Exception {
// capture events
XMLByteStreamCompiler sa = new XMLByteStreamCompiler();
- generateSAX(sa);
+ generateLargeSAX(sa);
// serialize events
byte[] aa = (byte[]) sa.getSAXFragment();
@@ -116,12 +117,12 @@
public void testStressLoop() throws Exception {
XMLByteStreamCompiler xmlc = new XMLByteStreamCompiler();
- long loop = 50000;
+ long loop = 10000;
// simply consume documents
long start = System.currentTimeMillis();
for(int i=0;i<loop;i++) {
- generateSAX(xmlc);
+ generateSmallSAX(xmlc);
xmlc.recycle();
}
long stop = System.currentTimeMillis();
@@ -132,7 +133,7 @@
public void testCompareToParsing() throws Exception {
DOMBuilder in = new DOMBuilder();
- generateSAX(in);
+ generateSmallSAX(in);
SAXParserFactory pfactory = SAXParserFactory.newInstance();
SAXParser p = pfactory.newSAXParser();
@@ -142,7 +143,7 @@
ByteArrayInputStream bis = new ByteArrayInputStream(generateByteArray());
- long loop = 50000;
+ long loop = 10000;
// parse documents
long start = System.currentTimeMillis();
1.2 +20 -2 cocoon-2.1/src/test/org/apache/cocoon/xml/AbstractXMLTestCase.java
Index: AbstractXMLTestCase.java
===================================================================
RCS file: /home/cvs/cocoon-2.1/src/test/org/apache/cocoon/xml/AbstractXMLTestCase.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- AbstractXMLTestCase.java 17 Nov 2003 01:01:02 -0000 1.1
+++ AbstractXMLTestCase.java 19 Nov 2003 01:48:12 -0000 1.2
@@ -69,14 +69,32 @@
* general functions for XML related Testcases
*
* @author <a href="mailto:tcurdt@apache.org">Torsten Curdt</a>
+ * @version
*/
public abstract class AbstractXMLTestCase extends XMLTestCase {
+
public AbstractXMLTestCase(String s) {
super(s);
}
- protected void generateSAX( ContentHandler consumer ) throws SAXException {
+ protected void generateLargeSAX( ContentHandler consumer ) throws SAXException {
+ AttributesImpl atts = new AttributesImpl();
+
+ final int size = 65000;
+ char[] large = new char[size];
+ for(int i=0;i<size;i++) {
+ large[i] = 'x';
+ }
+
+ consumer.startDocument();
+ consumer.startElement("", "root", "root", atts);
+ consumer.characters(large,0,size);
+ consumer.endElement("", "root", "root");
+ consumer.endDocument();
+ }
+
+ protected void generateSmallSAX( ContentHandler consumer ) throws SAXException {
AttributesImpl atts = new AttributesImpl();
consumer.startDocument();
@@ -88,7 +106,7 @@
protected byte[] generateByteArray() throws Exception {
DOMBuilder in = new DOMBuilder();
- generateSAX(in);
+ generateSmallSAX(in);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer t = tFactory.newTransformer();
1.3 +7 -6 cocoon-2.1/src/test/org/apache/cocoon/xml/SaxBufferTestCase.java
Index: SaxBufferTestCase.java
===================================================================
RCS file: /home/cvs/cocoon-2.1/src/test/org/apache/cocoon/xml/SaxBufferTestCase.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- SaxBufferTestCase.java 18 Nov 2003 16:02:40 -0000 1.2
+++ SaxBufferTestCase.java 19 Nov 2003 01:48:12 -0000 1.3
@@ -63,6 +63,7 @@
* Testcase for SaxBuffer
*
* @author <a href="mailto:tcurdt@apache.org">Torsten Curdt</a>
+ * @version
*/
public final class SaxBufferTestCase extends AbstractXMLTestCase {
@@ -72,10 +73,10 @@
public void testCompareDOM() throws Exception {
DOMBuilder in = new DOMBuilder();
- generateSAX(in);
+ generateLargeSAX(in);
SaxBuffer sb = new SaxBuffer();
- generateSAX(sb);
+ generateLargeSAX(sb);
DOMBuilder out = new DOMBuilder();
sb.toSAX(out);
@@ -86,12 +87,12 @@
public void testStressLoop() throws Exception {
SaxBuffer sb = new SaxBuffer();
- long loop = 50000;
+ long loop = 10000;
// simply consume documents
long start = System.currentTimeMillis();
for(int i=0;i<loop;i++) {
- generateSAX(sb);
+ generateSmallSAX(sb);
sb.recycle();
}
long stop = System.currentTimeMillis();
@@ -102,7 +103,7 @@
public void testCompareToParsing() throws Exception {
DOMBuilder in = new DOMBuilder();
- generateSAX(in);
+ generateSmallSAX(in);
SAXParserFactory pfactory = SAXParserFactory.newInstance();
SAXParser p = pfactory.newSAXParser();
@@ -112,7 +113,7 @@
DefaultHandlerWrapper wrapper = new DefaultHandlerWrapper(b);
ByteArrayInputStream bis = new ByteArrayInputStream(generateByteArray());
- long loop = 50000;
+ long loop = 10000;
long start = System.currentTimeMillis();
for(int i=0;i<loop;i++) {