You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@xerces.apache.org by er...@locus.apache.org on 2000/07/31 22:42:30 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/readers UTF8Recognizer.java

ericye      00/07/31 13:42:30

  Modified:    java/src/org/apache/xerces/readers UTF8Recognizer.java
  Log:
  Add the UTF8 BOM check: skip it if find a beginning sequence of {0xEF, 0xBB, 0xBF}.
  
  Revision  Changes    Path
  1.5       +19 -0     xml-xerces/java/src/org/apache/xerces/readers/UTF8Recognizer.java
  
  Index: UTF8Recognizer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/readers/UTF8Recognizer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- UTF8Recognizer.java	2000/05/17 18:32:42	1.4
  +++ UTF8Recognizer.java	2000/07/31 20:42:30	1.5
  @@ -71,6 +71,7 @@
    * @version
    */
   final class UTF8Recognizer extends XMLDeclRecognizer {
  +    private byte[] fUTF8BOM = {(byte)0xEF, (byte)0xBB, (byte)0xBF};
       //
       //
       //
  @@ -83,6 +84,24 @@
                                                      boolean xmlDecl,
                                                      boolean allowJavaEncodingName) throws Exception {
           XMLEntityHandler.EntityReader reader = null;
  +
  +        //check to see if there is a UTF8 BOM, if see one, skip past it.
  +        boolean seeBOM = false;
  +        byte bom0 = data.byteAt(0);
  +        if (bom0 == fUTF8BOM[0]) {
  +            byte bom1 = data.byteAt(1);
  +            if (bom1 == fUTF8BOM[1]) {
  +                byte bom2 = data.byteAt(2);
  +                if (bom2 == fUTF8BOM[2]) {
  +                    seeBOM = true;
  +                }
  +            }
  +        }
  +        if (seeBOM) {
  +            // it will have the same content anyway.
  +            data.read(fUTF8BOM, 0, 3);
  +        }
  +
           byte b0 = data.byteAt(0);
           boolean debug = false;