You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ne...@apache.org on 2002/01/05 00:27:50 UTC
cvs commit: xml-xerces/java/src/org/apache/xerces/impl/io UCSReader.java
neilg 02/01/04 15:27:50
Modified: java/docs releases.xml
java/src/org/apache/xerces/impl XMLEntityManager.java
java/src/org/apache/xerces/impl/msg XMLMessages.properties
java/src/org/apache/xerces/util EncodingMap.java
Added: java/src/org/apache/xerces/impl/io UCSReader.java
Log:
implemented support for UCS-2 and UCS-4 encodings by the use of a custom Reader class. Still need to revisit performance issues related to reading input character-by-character when that is not necessary.
Revision Changes Path
1.96 +5 -1 xml-xerces/java/docs/releases.xml
Index: releases.xml
===================================================================
RCS file: /home/cvs/xml-xerces/java/docs/releases.xml,v
retrieving revision 1.95
retrieving revision 1.96
diff -u -r1.95 -r1.96
--- releases.xml 22 Dec 2001 23:25:48 -0000 1.95
+++ releases.xml 4 Jan 2002 23:27:49 -0000 1.96
@@ -1,11 +1,15 @@
<?xml version='1.0' encoding='UTF-8'?>
-<!-- $Id: releases.xml,v 1.95 2001/12/22 23:25:48 andyc Exp $ -->
+<!-- $Id: releases.xml,v 1.96 2002/01/04 23:27:49 neilg Exp $ -->
<!DOCTYPE releases SYSTEM 'dtd/releases.dtd'>
<releases>
<release version='NOT YET RELEASED'>
<desc>
</desc>
<changes>
+ <add>
+ <note>Implemented support for UCS-4 and UCS-2 encodings.</note>
+ <submitter name='Neil Graham'/>
+ </add>
<add>
<note>Added internal subset string to DOM.</note>
<submitter name='Andy Clark'/>
1.16 +99 -37 xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java
Index: XMLEntityManager.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- XMLEntityManager.java 18 Dec 2001 16:58:45 -0000 1.15
+++ XMLEntityManager.java 4 Jan 2002 23:27:49 -0000 1.16
@@ -71,6 +71,7 @@
import org.apache.xerces.impl.XMLErrorReporter;
import org.apache.xerces.impl.io.ASCIIReader;
+import org.apache.xerces.impl.io.UCSReader;
import org.apache.xerces.impl.io.UTF8Reader;
import org.apache.xerces.impl.msg.XMLMessageFormatter;
@@ -112,7 +113,7 @@
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
*
- * @version $Id: XMLEntityManager.java,v 1.15 2001/12/18 16:58:45 neilg Exp $
+ * @version $Id: XMLEntityManager.java,v 1.16 2002/01/04 23:27:49 neilg Exp $
*/
public class XMLEntityManager
implements XMLComponent, XMLEntityResolver {
@@ -703,6 +704,7 @@
final String systemId = xmlInputSource.getSystemId();
String baseSystemId = xmlInputSource.getBaseSystemId();
String encoding = xmlInputSource.getEncoding();
+ Boolean isBigEndian = null;
// create reader
InputStream stream = null;
@@ -728,7 +730,9 @@
b4[count] = (byte)stream.read();
}
if (count == 4) {
- encoding = getEncodingName(b4, count);
+ Object [] encodingDesc = getEncodingName(b4, count);
+ encoding = (String)(encodingDesc[0]);
+ isBigEndian = (Boolean)(encodingDesc[1]);
// removed use of pushback inputstream--neilg
/*****
@@ -765,18 +769,18 @@
// indirection to get at the underlying bytes. -Ac
// create reader from input stream
- reader = createReader(new RewindableInputStream(pbstream), encoding);
+ reader = createReader(new RewindableInputStream(pbstream), encoding, isBigEndian);
******/
- reader = createReader(stream, encoding);
+ reader = createReader(stream, encoding, isBigEndian);
}
else {
- reader = createReader(stream, encoding);
+ reader = createReader(stream, encoding, isBigEndian);
}
}
// use specified encoding
else {
- reader = createReader(stream, encoding);
+ reader = createReader(stream, encoding, isBigEndian);
}
// read one character at a time so we don't jump too far
@@ -1136,15 +1140,18 @@
/**
* Returns the IANA encoding name that is auto-detected from
- * the bytes specified.
+ * the bytes specified, with the endian-ness of that encoding where appropriate.
*
* @param b4 The first four bytes of the input.
* @param count The number of bytes actually read.
+ * @return a 2-element array: the first element, an IANA-encoding string,
+ * the second element a Boolean which is true iff the document is big endian, false
+ * if it's little-endian, and null if the distinction isn't relevant.
*/
- protected String getEncodingName(byte[] b4, int count) {
+ protected Object[] getEncodingName(byte[] b4, int count) {
if (count < 2) {
- return "UTF-8";
+ return new Object[]{"UTF-8", null};
}
// UTF-16, with BOM
@@ -1152,72 +1159,72 @@
int b1 = b4[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
- return "UTF-16";
+ return new Object [] {"UTF-16BE", new Boolean(true)};
}
if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
- return "UTF-16";
+ return new Object [] {"UTF-16LE", new Boolean(false)};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 3) {
- return "UTF-8";
+ return new Object [] {"UTF-8", null};
}
// UTF-8 with a BOM
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
- return "UTF-8";
+ return new Object [] {"UTF-8", null};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 4) {
- return "UTF-8";
+ return new Object [] {"UTF-8", null};
}
// other encodings
int b3 = b4[3] & 0xFF;
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
// UCS-4, big endian (1234)
- // REVISIT: What should this be?
- return "UnicodeBig";
+ return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
// UCS-4, little endian (4321)
- // REVISIT: What should this be?
- return "UnicodeLittleUnmarked";
+ return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
}
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
- return "UnicodeBigUnmarked";
+ return new Object [] {"ISO-10646-UCS-4", null};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
- return "UnicodeLittleUnmarked";
+ return new Object [] {"ISO-10646-UCS-4", null};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
+ // (or could turn out to be UCS-2...
// REVISIT: What should this be?
- return "UnicodeBig";
+ return new Object [] {"UTF-16BE", new Boolean(true)};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
- return "UnicodeLittle";
+ // (or could turn out to be UCS-2...
+ return new Object [] {"UTF-16LE", new Boolean(false)};
}
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
- return "CP037";
+ return new Object [] {"CP037", null};
}
// default encoding
- return "UTF-8";
+ return new Object [] {"UTF-8", null};
- } // getEncodingName(byte[],int):String
+ } // getEncodingName(byte[],int):Object[]
/**
* Creates a reader capable of reading the given input stream in
@@ -1229,10 +1236,13 @@
* Java encoding names are allowed, then the
* encoding name may be a Java encoding name;
* otherwise, it is an ianaEncoding name.
+ * @param isBigEndian For encodings (like uCS-4), whose names cannot
+ * specify a byte order, this tells whether the order is bigEndian. null menas
+ * unknown or not relevant.
*
* @return Returns a reader.
*/
- protected Reader createReader(InputStream inputStream, String encoding)
+ protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
throws IOException {
// normalize encoding name
@@ -1254,6 +1264,36 @@
}
return new ASCIIReader(inputStream, fBufferSize);
}
+ if(ENCODING.equals("ISO-10646-UCS-4")) {
+ if(isBigEndian != null) {
+ boolean isBE = isBigEndian.booleanValue();
+ if(isBE) {
+ return new UCSReader(inputStream, UCSReader.UCS4BE);
+ } else {
+ return new UCSReader(inputStream, UCSReader.UCS4LE);
+ }
+ } else {
+ fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
+ "EncodingByteOrderUnsupported",
+ new Object[] { encoding },
+ XMLErrorReporter.SEVERITY_FATAL_ERROR);
+ }
+ }
+ if(ENCODING.equals("ISO-10646-UCS-2")) {
+ if(isBigEndian != null) { // sould never happen with this encoding...
+ boolean isBE = isBigEndian.booleanValue();
+ if(isBE) {
+ return new UCSReader(inputStream, UCSReader.UCS2BE);
+ } else {
+ return new UCSReader(inputStream, UCSReader.UCS2LE);
+ }
+ } else {
+ fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
+ "EncodingByteOrderUnsupported",
+ new Object[] { encoding },
+ XMLErrorReporter.SEVERITY_FATAL_ERROR);
+ }
+ }
// check for valid name
boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
@@ -1297,7 +1337,7 @@
}
return new InputStreamReader(inputStream, javaEncoding);
- } // createReader(InputStream,String):
+ } // createReader(InputStream,String, Boolean): Reader
//
// Protected static methods
@@ -1781,14 +1821,38 @@
// a single char! -Ac
if (fCurrentEntity.encoding == null ||
!fCurrentEntity.encoding.equals(encoding)) {
+ // UTF-16 is a bit of a special case. If the encoding is UTF-16,
+ // and we know the endian-ness, we shouldn't change readers.
+ // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
+ // the endian-ness from the encoding we presently have.
+ if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
+ String ENCODING = encoding.toUpperCase();
+ if(ENCODING.equals("UTF-16")) return;
+ if(ENCODING.equals("ISO-10646-UCS-4")) {
+ if(fCurrentEntity.encoding.equals("UTF-16BE")) {
+ fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
+ } else {
+ fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
+ }
+ return;
+ }
+ if(ENCODING.equals("ISO-10646-UCS-2")) {
+ if(fCurrentEntity.encoding.equals("UTF-16BE")) {
+ fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
+ } else {
+ fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
+ }
+ return;
+ }
+ }
// wrap a new reader around the input stream, changing
// the encoding
if (DEBUG_ENCODINGS) {
System.out.println("$$$ creating new reader from stream: "+
- fCurrentEntity.stream);
+ fCurrentEntity.stream);
}
//fCurrentEntity.stream.reset();
- fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding);
+ fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
} else {
if (DEBUG_ENCODINGS)
System.out.println("$$$ reusing old reader on stream");
@@ -3155,10 +3219,9 @@
}
public int read() throws IOException {
- int b;
+ int b = 0;
if (fOffset < fLength) {
- System.err.println("REturned buffered: " + (char)(fData[fOffset++] & 0xFF));
- return fData[fOffset++] & 0xFF;
+ return fData[fOffset++] & 0xff;
}
if (fOffset == fEndOffset) {
return -1;
@@ -3175,7 +3238,7 @@
}
fData[fLength++] = (byte)b;
fOffset++;
- return b;
+ return b & 0xff;
}
public int read(byte[] b, int off, int len) throws IOException {
@@ -3188,13 +3251,12 @@
if(fCurrentEntity.mayReadChunks) {
return fInputStream.read(b, off, len);
}
- b[off] = (byte)read();
- if(b[off] == -1) {
+ int returnedVal = read();
+ if(returnedVal == -1) {
fEndOffset = fOffset;
return -1;
}
- byte [] c = new byte[off];
- System.arraycopy(b,0,c,0,off);
+ b[off] = (byte)returnedVal;
return 1;
}
if (len < bytesLeft) {
1.5 +1 -0 xml-xerces/java/src/org/apache/xerces/impl/msg/XMLMessages.properties
Index: XMLMessages.properties
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/msg/XMLMessages.properties,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- XMLMessages.properties 18 Oct 2001 04:42:40 -0000 1.4
+++ XMLMessages.properties 4 Jan 2002 23:27:49 -0000 1.5
@@ -111,6 +111,7 @@
PINotInOneEntity = The processing instruction must be entirely contained within the same parsed entity.
# 4.3.3 Character Encoding in Entities
EncodingDeclInvalid = Invalid encoding name \"{0}\".
+ EncodingByteOrderUnsupported = Given byte order for encoding \"{0}\" is not supported.
# DTD Messages
# 2.2 Characters
1.4 +5 -1 xml-xerces/java/src/org/apache/xerces/util/EncodingMap.java
Index: EncodingMap.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/EncodingMap.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- EncodingMap.java 11 Dec 2001 21:57:38 -0000 1.3
+++ EncodingMap.java 4 Jan 2002 23:27:49 -0000 1.4
@@ -507,7 +507,7 @@
* @author Stubs generated by DesignDoc on Wed Jun 07 11:58:44 PDT 2000
* @author Andy Clark, IBM
*
- * @version $Id: EncodingMap.java,v 1.3 2001/12/11 21:57:38 neilg Exp $
+ * @version $Id: EncodingMap.java,v 1.4 2002/01/04 23:27:49 neilg Exp $
*/
public class EncodingMap {
@@ -647,6 +647,8 @@
fIANA2JavaMap.put("CP367", "ASCII");
fIANA2JavaMap.put("UTF-8", "UTF8");
fIANA2JavaMap.put("UTF-16", "Unicode");
+ fIANA2JavaMap.put("UTF-16BE", "UnicodeBig");
+ fIANA2JavaMap.put("UTF-16LE", "UnicodeLittle");
// REVISIT:
// j:CNS11643 -> EUC-TW?
@@ -686,6 +688,8 @@
fJava2IANAMap.put("SJIS", "SHIFT_JIS");
fJava2IANAMap.put("UTF8", "UTF-8");
fJava2IANAMap.put("Unicode", "UTF-16");
+ fJava2IANAMap.put("UnicodeBig", "UTF-16BE");
+ fJava2IANAMap.put("UnicodeLittle", "UTF-16LE");
fJava2IANAMap.put("JIS0201", "X0201");
fJava2IANAMap.put("JIS0208", "X0208");
fJava2IANAMap.put("JIS0212", "ISO-IR-159");
1.1 xml-xerces/java/src/org/apache/xerces/impl/io/UCSReader.java
Index: UCSReader.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xerces.impl.io;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
/**
* Reader for UCS-2 and UCS-4 encodings.
* (i.e., encodings from ISO-10646-UCS-(2|4)).
*
* @author Neil Graham, IBM
*
* @version $Id: UCSReader.java,v 1.1 2002/01/04 23:27:49 neilg Exp $
*/
public class UCSReader extends Reader {
//
// Constants
//
/** Default byte buffer size (8192, larger than that of ASCIIReader
* since it's reasonable to surmise that the average UCS-4-encoded
* file should be 4 times as large as the average ASCII-encoded file).
*/
public static final int DEFAULT_BUFFER_SIZE = 8192;
public static short UCS2LE = 1;
public static short UCS2BE = 2;
public static short UCS4LE = 4;
public static short UCS4BE = 8;
//
// Data
//
/** Input stream. */
protected InputStream fInputStream;
/** Byte buffer. */
protected byte[] fBuffer;
// what kind of data we're dealing with
protected short fEncoding;
//
// Constructors
//
/**
* Constructs an ASCII reader from the specified input stream
* using the default buffer size. The Endian-ness and whether this is
* UCS-2 or UCS-4 needs also to be known in advance.
*
* @param inputStream The input stream.
* @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
*/
public UCSReader(InputStream inputStream, short encoding) {
this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
} // <init>(InputStream, short)
/**
* Constructs an ASCII reader from the specified input stream
* and buffer size. The Endian-ness and whether this is
* UCS-2 or UCS-4 needs also to be known in advance.
*
* @param inputStream The input stream.
* @param size The initial buffer size.
* @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
*/
public UCSReader(InputStream inputStream, int size, short encoding) {
fInputStream = inputStream;
fBuffer = new byte[size];
fEncoding = encoding;
} // <init>(InputStream,int,short)
//
// Reader methods
//
/**
* Read a single character. This method will block until a character is
* available, an I/O error occurs, or the end of the stream is reached.
*
* <p> Subclasses that intend to support efficient single-character input
* should override this method.
*
* @return The character read, as an integer in the range 0 to 127
* (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
* been reached
*
* @exception IOException If an I/O error occurs
*/
public int read() throws IOException {
int b0 = fInputStream.read() & 0xff;
if (b0 == 0xff)
return -1;
int b1 = fInputStream.read() & 0xff;
if (b1 == 0xff)
return -1;
if(fEncoding >=4) {
int b2 = fInputStream.read() & 0xff;
if (b2 == 0xff)
return -1;
int b3 = fInputStream.read() & 0xff;
if (b3 == 0xff)
return -1;
System.err.println("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
if (fEncoding == UCS4BE)
return (b0<<24)+(b1<<16)+(b2<<8)+b3;
else
return (b3<<24)+(b2<<16)+(b1<<8)+b0;
} else { // UCS-2
if (fEncoding == UCS2BE)
return (b0<<8)+b1;
else
return (b1<<8)+b0;
}
} // read():int
/**
* Read characters into a portion of an array. This method will block
* until some input is available, an I/O error occurs, or the end of the
* stream is reached.
*
* @param ch Destination buffer
* @param offset Offset at which to start storing characters
* @param length Maximum number of characters to read
*
* @return The number of characters read, or -1 if the end of the
* stream has been reached
*
* @exception IOException If an I/O error occurs
*/
public int read(char ch[], int offset, int length) throws IOException {
int byteLength = length << ((fEncoding >= 4)?2:1);
if (byteLength > fBuffer.length) {
byteLength = fBuffer.length;
}
int count = fInputStream.read(fBuffer, 0, byteLength);
if(count == -1) return -1;
// try and make count be a multiple of the number of bytes we're looking for
if(fEncoding >= 4) { // BigEndian
// this looks ugly, but it avoids an if at any rate...
int numToRead = (4 - (count & 3) & 3);
for(int i=0; i<numToRead; i++) {
int charRead = fInputStream.read();
if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
for (int j = i;j<numToRead; j++)
fBuffer[count+j] = 0;
break;
} else {
fBuffer[count+i] = (byte)charRead;
}
}
count += numToRead;
} else {
int numToRead = count & 1;
if(numToRead != 0) {
count++;
int charRead = fInputStream.read();
if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
fBuffer[count] = 0;
} else {
fBuffer[count] = (byte)charRead;
}
}
}
// now count is a multiple of the right number of bytes
int numChars = count >> ((fEncoding >= 4)?2:1);
int curPos = 0;
for (int i = 0; i < numChars; i++) {
int b0 = fBuffer[curPos++] & 0xff;
int b1 = fBuffer[curPos++] & 0xff;
if(fEncoding >=4) {
int b2 = fBuffer[curPos++] & 0xff;
int b3 = fBuffer[curPos++] & 0xff;
if (fEncoding == UCS4BE)
ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
else
ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
} else { // UCS-2
if (fEncoding == UCS2BE)
ch[offset+i] = (char)((b0<<8)+b1);
else
ch[offset+i] = (char)((b1<<8)+b0);
}
}
return numChars;
} // read(char[],int,int)
/**
* Skip characters. This method will block until some characters are
* available, an I/O error occurs, or the end of the stream is reached.
*
* @param n The number of characters to skip
*
* @return The number of characters actually skipped
*
* @exception IOException If an I/O error occurs
*/
public long skip(long n) throws IOException {
// charWidth will represent the number of bits to move
// n leftward to get num of bytes to skip, and then move the result rightward
// to get num of chars effectively skipped.
// The trick with &'ing, as with elsewhere in this dcode, is
// intended to avoid an expensive use of / that might not be optimized
// away.
int charWidth = (fEncoding >=4)?2:1;
long bytesSkipped = fInputStream.skip(n<<charWidth);
if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
return (bytesSkipped >> charWidth) + 1;
} // skip(long):long
/**
* Tell whether this stream is ready to be read.
*
* @return True if the next read() is guaranteed not to block for input,
* false otherwise. Note that returning false does not guarantee that the
* next read will block.
*
* @exception IOException If an I/O error occurs
*/
public boolean ready() throws IOException {
return false;
} // ready()
/**
* Tell whether this stream supports the mark() operation.
*/
public boolean markSupported() {
return fInputStream.markSupported();
} // markSupported()
/**
* Mark the present position in the stream. Subsequent calls to reset()
* will attempt to reposition the stream to this point. Not all
* character-input streams support the mark() operation.
*
* @param readAheadLimit Limit on the number of characters that may be
* read while still preserving the mark. After
* reading this many characters, attempting to
* reset the stream may fail.
*
* @exception IOException If the stream does not support mark(),
* or if some other I/O error occurs
*/
public void mark(int readAheadLimit) throws IOException {
fInputStream.mark(readAheadLimit);
} // mark(int)
/**
* Reset the stream. If the stream has been marked, then attempt to
* reposition it at the mark. If the stream has not been marked, then
* attempt to reset it in some way appropriate to the particular stream,
* for example by repositioning it to its starting point. Not all
* character-input streams support the reset() operation, and some support
* reset() without supporting mark().
*
* @exception IOException If the stream has not been marked,
* or if the mark has been invalidated,
* or if the stream does not support reset(),
* or if some other I/O error occurs
*/
public void reset() throws IOException {
fInputStream.reset();
} // reset()
/**
* Close the stream. Once a stream has been closed, further read(),
* ready(), mark(), or reset() invocations will throw an IOException.
* Closing a previously-closed stream, however, has no effect.
*
* @exception IOException If an I/O error occurs
*/
public void close() throws IOException {
fInputStream.close();
} // close()
} // class UCSReader
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org