You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by er...@locus.apache.org on 2000/07/31 22:42:30 UTC
cvs commit: xml-xerces/java/src/org/apache/xerces/readers UTF8Recognizer.java
ericye 00/07/31 13:42:30
Modified: java/src/org/apache/xerces/readers UTF8Recognizer.java
Log:
Add the UTF8 BOM check: skip it if find a beginning sequence of {0xEF, 0xBB, 0xBF}.
Revision Changes Path
1.5 +19 -0 xml-xerces/java/src/org/apache/xerces/readers/UTF8Recognizer.java
Index: UTF8Recognizer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/readers/UTF8Recognizer.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- UTF8Recognizer.java 2000/05/17 18:32:42 1.4
+++ UTF8Recognizer.java 2000/07/31 20:42:30 1.5
@@ -71,6 +71,7 @@
* @version
*/
final class UTF8Recognizer extends XMLDeclRecognizer {
+ private byte[] fUTF8BOM = {(byte)0xEF, (byte)0xBB, (byte)0xBF};
//
//
//
@@ -83,6 +84,24 @@
boolean xmlDecl,
boolean allowJavaEncodingName) throws Exception {
XMLEntityHandler.EntityReader reader = null;
+
+ //check to see if there is a UTF8 BOM, if see one, skip past it.
+ boolean seeBOM = false;
+ byte bom0 = data.byteAt(0);
+ if (bom0 == fUTF8BOM[0]) {
+ byte bom1 = data.byteAt(1);
+ if (bom1 == fUTF8BOM[1]) {
+ byte bom2 = data.byteAt(2);
+ if (bom2 == fUTF8BOM[2]) {
+ seeBOM = true;
+ }
+ }
+ }
+ if (seeBOM) {
+ // it will have the same content anyway.
+ data.read(fUTF8BOM, 0, 3);
+ }
+
byte b0 = data.byteAt(0);
boolean debug = false;