You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2006/07/15 08:24:35 UTC

svn commit: r422148 - in /incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util: FOMSniffingInputStream.java PeekAheadInputStream.java

Author: jmsnell
Date: Fri Jul 14 23:24:34 2006
New Revision: 422148

URL: http://svn.apache.org/viewvc?rev=422148&view=rev
Log:
Changing from a BufferedInputStream to a PushbackInputStream allows the sniffer to work
with the StAX reference implementation (and likely others).  To make things a bit easier,
I'm using a PeekAheadInputStream impl that, while incuring a minor additional performance cost,
allows the PushbackInputStream to have a dynamically sized internal buffer that is deallocated
as soon as it is consumed.

Added:
    incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/PeekAheadInputStream.java
Modified:
    incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java

Modified: incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java?rev=422148&r1=422147&r2=422148&view=diff
==============================================================================
--- incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java (original)
+++ incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java Fri Jul 14 23:24:34 2006
@@ -17,7 +17,6 @@
 */
 package org.apache.abdera.parser.stax.util;
 
-import java.io.BufferedInputStream;
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -36,10 +35,12 @@
   private boolean bomset = false;
   
   public FOMSniffingInputStream(InputStream in) {
-    super(new BufferedInputStream(in));
+    super(new PeekAheadInputStream(in,4));
     try {
       encoding = detectEncoding();
-    } catch (IOException e) {}
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
   }
 
   public boolean isBomSet() {
@@ -51,11 +52,9 @@
   }
   
   private String detectEncoding() throws IOException {
-    BufferedInputStream pin = (BufferedInputStream) this.in;
+    PeekAheadInputStream pin = (PeekAheadInputStream) this.in;
     byte[] bom = new byte[4];
-    pin.mark(pin.available());
-    pin.read(bom);
-    pin.reset();  
+    pin.peek(bom);
     String charset = null;
     if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == 0xFFFFFFFE && bom[3] == 0xFFFFFFFF) {
       bomset = true;
@@ -88,19 +87,15 @@
       charset = "edbdic";
     } 
     bomset = false;
-    try {
+    try { 
+      byte[] p = new byte[200];
+      pin.peek(p);
       XMLStreamReader xmlreader = 
-        XMLInputFactory.newInstance().createXMLStreamReader(pin);
+        XMLInputFactory.newInstance().createXMLStreamReader(
+          new java.io.ByteArrayInputStream(p));
       String cs = xmlreader.getCharacterEncodingScheme();
       if (cs != null) charset = cs;
-    } catch (Exception e) {
-    } finally {
-      try {
-        pin.reset();
-      } catch (Exception ex) {
-        ex.printStackTrace();
-      }
-    }
+    } catch (Exception e) {}
     return charset;
   }
   

Added: incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/PeekAheadInputStream.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/PeekAheadInputStream.java?rev=422148&view=auto
==============================================================================
--- incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/PeekAheadInputStream.java (added)
+++ incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/PeekAheadInputStream.java Fri Jul 14 23:24:34 2006
@@ -0,0 +1,136 @@
+package org.apache.abdera.parser.stax.util;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.  For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+/**
+ * A version of PushbackInputStream that provides methods for peeking ahead 
+ * in the stream (equivalent to read() followed by an appropriate unread() 
+ * and a dynamically (de)allocated pushback buffer.  Whenever the buffer is 
+ * consumed completely, it is deallocated.  Whenever bytes are unread, the 
+ * buffer is automatically sized up to fit the number of bytes being unread 
+ */
+public class PeekAheadInputStream 
+  extends PushbackInputStream {
+
+  private int origsize = 1;
+  
+  public PeekAheadInputStream(InputStream in) {
+    super(in);
+  }
+  
+  public PeekAheadInputStream(InputStream in, int initialSize) {
+    super(in,initialSize);
+    this.origsize = initialSize;
+  }
+
+  public int clear() {
+    int m = buf.length;
+    buf = new byte[origsize];
+    pos = origsize;
+    return m;
+  }
+  
+  public int shrink() {
+    byte[] old = buf;
+    if (pos == 0) return 0; // nothing to do
+    int n = old.length - pos;
+    int m, p,s,l;
+    if (n < origsize) {
+      buf = new byte[origsize];
+      p = pos;
+      s = origsize - n;
+      l = old.length-p;
+      m = old.length - origsize;
+      pos = s;
+    } else {
+      buf = new byte[n];
+      p = pos;
+      s = 0;
+      l = n;
+      m = old.length - l;
+      pos = 0;
+    }
+    System.arraycopy(old, p, buf, s, l);
+    return m;
+  }
+  
+  private void resize(int len) {
+    byte[] old = buf;
+    buf = new byte[old.length + len];
+    System.arraycopy(old, 0, buf, len, old.length);
+  }
+
+  @Override
+  public void unread(byte[] b, int off, int len) throws IOException {
+    if (len > pos && pos + len > buf.length) {
+      resize(len-pos);
+      pos += len-pos;
+    }
+    super.unread(b, off, len);
+  }
+
+  @Override
+  public void unread(int b) throws IOException {
+    if (pos == 0) {
+      resize(1);
+      pos++;
+    }
+    super.unread(b);
+  }
+  
+  public int peek() throws IOException {
+    int m = read();
+    unread(m);
+    return m;
+  }
+  
+  public int peek(byte[] buf) throws IOException {
+    return peek(buf, 0, buf.length);
+  }
+  
+  public int peek(byte[] buf, int off, int len) throws IOException {
+    int r = read(buf, off, len);
+    unread(buf,off,len);
+    return r;
+  }
+
+  @Override
+  public int read() throws IOException {
+    int m = super.read();
+    if (pos >= buf.length && buf.length > origsize) shrink();
+    return m;
+  }
+
+  @Override
+  public int read(byte[] b, int off, int len) throws IOException {
+    int r = super.read(b, off, len);
+    if (pos >= buf.length && buf.length > origsize) shrink();
+    return r;
+  }
+
+  @Override
+  public long skip(long n) throws IOException {
+    long r = super.skip(n);
+    if (pos >= buf.length && buf.length > origsize) shrink();
+    return r;
+  }
+}