You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2006/08/26 01:15:59 UTC

svn commit: r437010 - /incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java

Author: jmsnell
Date: Fri Aug 25 16:15:58 2006
New Revision: 437010

URL: http://svn.apache.org/viewvc?rev=437010&view=rev
Log:
A few minor cleanups to make the code a bit easier to manage

Modified:
    incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java

Modified: incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java?rev=437010&r1=437009&r2=437010&view=diff
==============================================================================
--- incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java (original)
+++ incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java Fri Aug 25 16:15:58 2006
@@ -31,6 +31,18 @@
 public class FOMSniffingInputStream 
   extends FilterInputStream {
 
+  private static byte[] UTF32be  = new byte[] {0x00,0x00,0xFFFFFFFE,0xFFFFFFFF};
+  private static byte[] UTF32le  = new byte[] {0xFFFFFFFF,0xFFFFFFFE,0x00,0x00};
+  private static byte[] INVALID1 = new byte[] {0xFFFFFFFE,0xFFFFFFFF,0x00,0x00};
+  private static byte[] INVALID2 = new byte[] {0x00,0x00,0xFFFFFFFF,0xFFFFFFFE};
+  private static byte[] UTF16be  = new byte[] {0xFFFFFFFE,0xFFFFFFFF};
+  private static byte[] UTF16le  = new byte[] {0xFFFFFFFF,0xFFFFFFFE};
+  private static byte[] UTF8     = new byte[] {0xFFFFFFEF,0xFFFFFFBB,0xFFFFFFBF};
+  private static byte[] UTF32be2 = new byte[] {0x00,0x00,0x00,0x3C};
+  private static byte[] UTF32le2 = new byte[] {0x3C,0x00,0x00,0x00};
+  private static byte[] UTF16be2 = new byte[] {0x00,0x3C,0x00,0x3F};
+  private static byte[] UTF16le2 = new byte[] {0x3C,0x00,0x3F,0x00};
+  
   private String encoding = null;
   private boolean bomset = false;
   
@@ -51,41 +63,46 @@
     return encoding;
   }
   
+  private boolean equals(byte[] a1, int start, int len, byte[] a2) {
+    for (int n = start, i = 0; n < start + (len - start); n++, i++) {  
+      if (a1[n] != a2[i]) return false;
+    }
+    return true;
+  }
+  
   private String detectEncoding() throws IOException {
     PeekAheadInputStream pin = (PeekAheadInputStream) this.in;
     byte[] bom = new byte[4];
     pin.peek(bom);
     String charset = null;
-    if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == 0xFFFFFFFE && bom[3] == 0xFFFFFFFF) {
+    if (equals(bom,0,4,UTF32be)) {
       bomset = true;
       return "utf-32be";
-    } else if (bom[0] == 0xFFFFFFFF && bom[1] == 0xFFFFFFFE && bom[2] == 0x00 && bom[3] == 0x00) {
+    } else if (equals(bom,0,4,UTF32le)) {
       bomset = true;
       return "utf-32le";
-    } else if ((bom[0] == 0xFFFFFFFE && bom[1] == 0xFFFFFFFF && bom[2] == 0x00 && bom[3] == 0x00) ||
-               (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == 0xFFFFFFFF && bom[3] == 0xFFFFFFFE)) {
+    } else if ((equals(bom,0,4,INVALID1)) ||
+               (equals(bom,0,4,INVALID2))) {
       bomset = true;
       return null;
-    } else if (bom[0] == 0xFFFFFFFE && bom[1] == 0xFFFFFFFF) {
+    } else if (equals(bom,0,2,UTF16be)) {
       bomset = true;
       return "utf-16be";
-    } else if (bom[0] == 0xFFFFFFFF && bom[1] == 0xFFFFFFFE) {
+    } else if (equals(bom,0,2,UTF16le)) {
       bomset = true;
       return "utf-16le";
-    } else if (bom[0] == 0xFFFFFFEF && bom[1] == 0xFFFFFFBB && bom[2] == 0xFFFFFFBF)  {
+    } else if (equals(bom,0,3,UTF8))  {
       bomset = true;
       return "utf-8";
-    } else if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == 0x00 && bom[3] == 0x3C) {
+    } else if (equals(bom,0,4,UTF32be2)) {
       charset = "utf-32be";
-    } else if (bom[0] == 0x3C && bom[1] == 0x00 && bom[2] == 0x00 && bom[3] == 0x00) {
+    } else if (equals(bom,0,4,UTF32le2)) {
       charset = "utf-32le";
-    } else if (bom[0] == 0x00 && bom[1] == 0x3C && bom[2] == 0x00 && bom[3] == 0x3F) {
+    } else if (equals(bom,0,4,UTF16be2)) {
       charset = "utf-16be";
-    } else if (bom[0] == 0x3C && bom[1] == 0x00 && bom[2] == 0x3F && bom[3] == 0x00) {
+    } else if (equals(bom,0,4,UTF16le2)) {
       charset = "utf-16le";
-    } else if (bom[0] == 0x4C && bom[1] == 0x6F && bom[2] == 0xA7 && bom[3] == 0x94) {
-      charset = "edbdic";
-    } 
+    }
     bomset = false;
     try { 
       byte[] p = new byte[200];