You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/03/25 15:31:11 UTC

svn commit: r1085399 - /tika/trunk/tika-core/src/main/java/org/apache/tika/io/LookaheadInputStream.java

Author: jukka
Date: Fri Mar 25 14:31:11 2011
New Revision: 1085399

URL: http://svn.apache.org/viewvc?rev=1085399&view=rev
Log:
TIKA-160: Support encryption formats

Add a LookaheadInputStream class that will make it easier to do
conditional parsing or type detection with formats that read input
as blocks or need to mark/reset a given stream at various locations.

Added:
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/LookaheadInputStream.java   (with props)

Added: tika/trunk/tika-core/src/main/java/org/apache/tika/io/LookaheadInputStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/LookaheadInputStream.java?rev=1085399&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/LookaheadInputStream.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/LookaheadInputStream.java Fri Mar 25 14:31:11 2011
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Stream wrapper that make it easy to read up to n bytes ahead from
+ * a stream that supports the mark feature. This class insulates the
+ * underlying stream from things like possible mark(), reset() and close()
+ * calls by external components that might otherwise invalidate the marked
+ * state of a stream.
+ * <p>
+ * The recommended usage pattern of this class is:
+ * <pre>
+ *     InputStream lookahead = new LookaheadInputStream(stream, n);
+ *     try {
+ *         processStream(lookahead);
+ *     } finally {
+ *         lookahead.close();
+ *     }
+ * </pre>
+ * <p>
+ * This usage pattern guarantees that only up to n bytes from the original
+ * stream can ever be read, and that the stream will have been marked and
+ * then reset to its original state once the above code block exits. No
+ * code in the fictional processStream() method can affect the the state of
+ * the original stream.
+ *
+ * @since Apache Tika 1.0
+ */
+public class LookaheadInputStream extends InputStream {
+
+    private InputStream stream;
+
+    private final byte[] buffer;
+
+    private int buffered = 0;
+
+    private int position = 0;
+
+    private int mark = 0;
+
+    public LookaheadInputStream(InputStream stream, int n) {
+        this.stream = stream;
+        this.buffer = new byte[0];
+        stream.mark(n);
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (stream != null) {
+            stream.reset();
+            stream = null;
+        }
+    }
+
+    private void fill() throws IOException {
+        if (available() == 0 && buffered < buffer.length && stream != null) {
+            int n = stream.read(buffer, buffered, buffer.length - buffered);
+            if (n != -1) {
+                buffered += n;
+            } else {
+                close();
+            }
+        }
+    }
+
+    @Override
+    public int read() throws IOException {
+        fill();
+        if (buffered > position) {
+            return 0xff & buffer[position++];
+        } else {
+            return -1;
+        }
+    }
+
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+        fill();
+        if (buffered > position) {
+            len = Math.min(len, buffered - position);
+            System.arraycopy(buffer, position, b, off, len);
+            position += len;
+            return len;
+        } else {
+            return -1;
+        }
+    }
+
+    @Override
+    public long skip(long n) {
+        n = Math.min(n, available());
+        position += n;
+        return n;
+    }
+
+    @Override
+    public int available() {
+        return buffered - position;
+    }
+
+    @Override
+    public boolean markSupported() {
+        return true;
+    }
+
+    @Override
+    public synchronized void mark(int readlimit) {
+        mark = position;
+    }
+
+    @Override
+    public synchronized void reset() {
+        position = mark;
+    }
+
+}

Propchange: tika/trunk/tika-core/src/main/java/org/apache/tika/io/LookaheadInputStream.java
------------------------------------------------------------------------------
    svn:executable = *