You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/27 16:48:23 UTC

[1/2] tika git commit: TIKA-2023 -- clean up RTFParser to use EndianUtils

Repository: tika
Updated Branches:
  refs/heads/master e6c2839c0 -> 7db0ab628


TIKA-2023 -- clean up RTFParser to use EndianUtils


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c1cea20e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c1cea20e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c1cea20e

Branch: refs/heads/master
Commit: c1cea20efb5ed28cfd3fe889120d7ddbef9966a8
Parents: e6c2839
Author: tballison <ta...@mitre.org>
Authored: Mon Jun 27 12:46:47 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Jun 27 12:46:47 2016 -0400

----------------------------------------------------------------------
 .../java/org/apache/tika/io/EndianUtils.java    | 20 ++++++++++
 .../org/apache/tika/io/EndianUtilsTest.java     | 19 +++++++++
 .../tika/parser/rtf/RTFEmbObjHandler.java       |  7 +---
 .../tika/parser/rtf/RTFObjDataParser.java       | 41 ++++++++------------
 .../apache/tika/parser/rtf/TextExtractor.java   | 11 +-----
 5 files changed, 60 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c1cea20e/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
index 1e33986..3416f55 100644
--- a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
@@ -70,6 +70,26 @@ public class EndianUtils {
       return (ch1 << 8) + (ch2 << 0);
    }
 
+
+   /**
+    * Get a LE unsigned int value from an InputStream
+    *
+    * @param  stream the InputStream from which the int is to be read
+    * @return                              the int (32-bit) value
+    * @exception  IOException              will be propagated back to the caller
+    * @exception  BufferUnderrunException  if the stream cannot provide enough bytes
+    */
+   public static long readUIntLE(InputStream stream) throws IOException, BufferUnderrunException {
+      int ch1 = stream.read();
+      int ch2 = stream.read();
+      int ch3 = stream.read();
+      int ch4 = stream.read();
+      if ((ch1 | ch2 | ch3 | ch4) < 0) {
+         throw new BufferUnderrunException();
+      }
+      return ((ch4 << 24) + (ch3<<16) + (ch2 << 8) + (ch1 << 0)) & 0x00FFFFFFFFl;
+   }
+
    /**
     * Get a LE int value from an InputStream
     *

http://git-wip-us.apache.org/repos/asf/tika/blob/c1cea20e/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java b/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
index 7f10cdd..8f481c3 100644
--- a/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
@@ -18,6 +18,7 @@
 package org.apache.tika.io;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
 
 import java.io.ByteArrayInputStream;
 
@@ -37,4 +38,22 @@ public class EndianUtilsTest {
         data = new byte[] { (byte)0xac, (byte)0xbe, 0x17 };
         assertEquals((long)728855, EndianUtils.readUE7(new ByteArrayInputStream(data)));
     }
+
+    @Test
+    public void testReadUIntLE() throws Exception {
+        byte[] data = new byte[] {(byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00 };
+        assertEquals((long) 8, EndianUtils.readUIntLE(new ByteArrayInputStream(data)));
+
+        data = new byte[] {(byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF };
+        assertEquals(4294967295L, EndianUtils.readUIntLE(new ByteArrayInputStream(data)));
+
+        data = new byte[] {(byte)0xFF, (byte)0xFF, (byte)0xFF  };
+        try {
+            EndianUtils.readUIntLE(new ByteArrayInputStream(data));
+            fail("Should have thrown exception");
+        } catch (EndianUtils.BufferUnderrunException e) {
+
+        }
+
+    }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/c1cea20e/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
index 183df62..1334906 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOUtils;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
@@ -157,11 +158,7 @@ class RTFEmbObjHandler {
         }
 
         byte[] bytes = new byte[len];
-        int bytesRead = is.read(bytes);
-        if (bytesRead < len) {
-            throw new TikaException("unexpected end of file: need " + len +
-                    " bytes of binary data, found " + (len - bytesRead));
-        }
+        IOUtils.readFully(is, bytes);
         os.write(bytes);
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/c1cea20e/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
index cc9d62f..147d2e8 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
@@ -36,6 +36,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.Ole10Native;
 import org.apache.poi.poifs.filesystem.Ole10NativeException;
 import org.apache.poi.util.IOUtils;
+import org.apache.tika.io.EndianUtils;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.RTFMetadata;
@@ -48,10 +49,6 @@ import org.apache.tika.parser.microsoft.OfficeParser.POIFSDocumentType;
  */
 class RTFObjDataParser {
 
-    private final static int[] INT_LE_POWS = new int[]{
-            1, 256, 65536, 16777216
-    };
-
     private final static String WIN_ASCII = "WINDOWS-1252";
 
     /**
@@ -195,7 +192,12 @@ class RTFObjDataParser {
 
         //should we add this to the metadata?
         readAnsiString(is); //iconFilePath
-        readUShort(is); //iconIndex
+        try {
+            //iconIndex
+            EndianUtils.readUShortBE(is);
+        } catch (EndianUtils.BufferUnderrunException e) {
+            throw new IOException(e);
+        }
         int type = readUShort(is); //type
 
         //1 is link, 3 is embedded object
@@ -209,7 +211,7 @@ class RTFObjDataParser {
         String ansiFilePath = readAnsiString(is); //filePath
         long bytesLen = readUInt(is);
         byte[] objBytes = initByteArray(bytesLen);
-        is.read(objBytes);
+        IOUtils.readFully(is, objBytes);
         StringBuilder unicodeFilePath = new StringBuilder();
 
         try {
@@ -248,24 +250,19 @@ class RTFObjDataParser {
 
 
     private int readUShort(InputStream is) throws IOException {
-        int lo = is.read();
-        int hi = is.read() * 256;
-        if (lo == -1 || hi == -1) {
-            throw new IOException("Hit end of stream before reading little endian unsigned short.");
+        try {
+            return EndianUtils.readUShortLE(is);
+        } catch (EndianUtils.BufferUnderrunException e) {
+            throw new IOException(e);
         }
-        return hi + lo;
     }
 
     private long readUInt(InputStream is) throws IOException {
-        long sum = 0;
-        for (int i = 0; i < 4; i++) {
-            int v = is.read();
-            if (v == -1) {
-                throw new IOException("Hit end of stream before finishing little endian unsigned int.");
-            }
-            sum += v * (long) INT_LE_POWS[i];
+        try {
+            return EndianUtils.readUIntLE(is);
+        } catch (EndianUtils.BufferUnderrunException e) {
+            throw new IOException(e);
         }
-        return sum;
     }
 
     private String readAnsiString(InputStream is) throws IOException {
@@ -296,11 +293,7 @@ class RTFObjDataParser {
     private byte[] readBytes(InputStream is, long len) throws IOException {
         //initByteArray tests for "reading of too many bytes"
         byte[] bytes = initByteArray(len);
-        int read = is.read(bytes);
-        if (read != len) {
-            throw new IOException("Hit end of stream before reading all bytes");
-        }
-
+        IOUtils.readFully(is, bytes);
         return bytes;
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/c1cea20e/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
index eeb58ce..6c86765 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
@@ -33,6 +33,7 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.TimeZone;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Office;
@@ -952,15 +953,7 @@ final class TextExtractor {
                         embObjHandler.reset();
                     }
                 } else {
-                    int bytesToRead = param;
-                    byte[] tmpArray = new byte[Math.min(1024, bytesToRead)];
-                    while (bytesToRead > 0) {
-                        int r = in.read(tmpArray, 0, Math.min(bytesToRead, tmpArray.length));
-                        if (r < 0) {
-                            throw new TikaException("unexpected end of file: need " + param + " bytes of binary data, found " + (param - bytesToRead));
-                        }
-                        bytesToRead -= r;
-                    }
+                    IOUtils.skipFully(in, param);
                 }
             } else {
                 // log some warning?


[2/2] tika git commit: TIKA-2023 -- clean up newlines and indenting

Posted by ta...@apache.org.
TIKA-2023 -- clean up newlines and indenting


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/7db0ab62
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/7db0ab62
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/7db0ab62

Branch: refs/heads/master
Commit: 7db0ab6287b2c795ccd7a6383d344434e79611ae
Parents: c1cea20
Author: tballison <ta...@mitre.org>
Authored: Mon Jun 27 12:48:14 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Jun 27 12:48:14 2016 -0400

----------------------------------------------------------------------
 .../java/org/apache/tika/io/EndianUtils.java    | 831 ++++++++++---------
 1 file changed, 421 insertions(+), 410 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/7db0ab62/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
index 3416f55..2ab85b3 100644
--- a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
@@ -25,418 +25,429 @@ import org.apache.tika.exception.TikaException;
  * General Endian Related Utilties.
  * <p>
  * This class provides static utility methods for input/output operations
- *  on numbers in Big and Little Endian formats.
+ * on numbers in Big and Little Endian formats.
  * <p>
  * Origin of code: Based on the version in POI
  */
 public class EndianUtils {
-   /**
-    * Get a LE short value from an InputStream
-    *
-    * @param  stream the InputStream from which the short is to be read
-    * @return                              the short (16-bit) value
-    * @exception  IOException              will be propagated back to the caller
-    * @exception  BufferUnderrunException  if the stream cannot provide enough bytes
-    */
-   public static short readShortLE(InputStream stream) throws IOException, BufferUnderrunException {
-      return (short) readUShortLE(stream);
-   }
-   /**
-    * Get a BE short value from an InputStream
-    *
-    * @param  stream the InputStream from which the short is to be read
-    * @return                              the short (16-bit) value
-    * @exception  IOException              will be propagated back to the caller
-    * @exception  BufferUnderrunException  if the stream cannot provide enough bytes
-    */
-   public static short readShortBE(InputStream stream) throws IOException, BufferUnderrunException {
-      return (short) readUShortBE(stream);
-   }
-
-   public static int readUShortLE(InputStream stream) throws IOException, BufferUnderrunException {
-      int ch1 = stream.read();
-      int ch2 = stream.read();
-      if ((ch1 | ch2) < 0) {
-         throw new BufferUnderrunException();
-      }
-      return (ch2 << 8) + (ch1 << 0);
-   }
-   public static int readUShortBE(InputStream stream) throws IOException, BufferUnderrunException {
-      int ch1 = stream.read();
-      int ch2 = stream.read();
-      if ((ch1 | ch2) < 0) {
-         throw new BufferUnderrunException();
-      }
-      return (ch1 << 8) + (ch2 << 0);
-   }
-
-
-   /**
-    * Get a LE unsigned int value from an InputStream
-    *
-    * @param  stream the InputStream from which the int is to be read
-    * @return                              the int (32-bit) value
-    * @exception  IOException              will be propagated back to the caller
-    * @exception  BufferUnderrunException  if the stream cannot provide enough bytes
-    */
-   public static long readUIntLE(InputStream stream) throws IOException, BufferUnderrunException {
-      int ch1 = stream.read();
-      int ch2 = stream.read();
-      int ch3 = stream.read();
-      int ch4 = stream.read();
-      if ((ch1 | ch2 | ch3 | ch4) < 0) {
-         throw new BufferUnderrunException();
-      }
-      return ((ch4 << 24) + (ch3<<16) + (ch2 << 8) + (ch1 << 0)) & 0x00FFFFFFFFl;
-   }
-
-   /**
-    * Get a LE int value from an InputStream
-    *
-    * @param  stream the InputStream from which the int is to be read
-    * @return                              the int (32-bit) value
-    * @exception  IOException              will be propagated back to the caller
-    * @exception  BufferUnderrunException  if the stream cannot provide enough bytes
-    */
-   public static int readIntLE(InputStream stream) throws IOException, BufferUnderrunException {
-      int ch1 = stream.read();
-      int ch2 = stream.read();
-      int ch3 = stream.read();
-      int ch4 = stream.read();
-      if ((ch1 | ch2 | ch3 | ch4) < 0) {
-         throw new BufferUnderrunException();
-      }
-      return (ch4 << 24) + (ch3<<16) + (ch2 << 8) + (ch1 << 0);
-   }
-   /**
-    * Get a BE int value from an InputStream
-    *
-    * @param  stream the InputStream from which the int is to be read
-    * @return                              the int (32-bit) value
-    * @exception  IOException              will be propagated back to the caller
-    * @exception  BufferUnderrunException  if the stream cannot provide enough bytes
-    */
-   public static int readIntBE(InputStream stream) throws IOException, BufferUnderrunException {
-      int ch1 = stream.read();
-      int ch2 = stream.read();
-      int ch3 = stream.read();
-      int ch4 = stream.read();
-      if ((ch1 | ch2 | ch3 | ch4) < 0) {
-         throw new BufferUnderrunException();
-      }
-      return (ch1 << 24) + (ch2<<16) + (ch3 << 8) + (ch4 << 0);
-   }
-
-   /**
-    * Get a LE long value from an InputStream
-    *
-    * @param  stream the InputStream from which the long is to be read
-    * @return                              the long (64-bit) value
-    * @exception  IOException              will be propagated back to the caller
-    * @exception  BufferUnderrunException  if the stream cannot provide enough bytes
-    */
-   public static long readLongLE(InputStream stream) throws IOException, BufferUnderrunException {
-      int ch1 = stream.read();
-      int ch2 = stream.read();
-      int ch3 = stream.read();
-      int ch4 = stream.read();
-      int ch5 = stream.read();
-      int ch6 = stream.read();
-      int ch7 = stream.read();
-      int ch8 = stream.read();
-      if ((ch1 | ch2 | ch3 | ch4 | ch5 | ch6 | ch7 | ch8) < 0) {
-         throw new BufferUnderrunException();
-      }
-
-      return
-      ((long)ch8 << 56) +
-      ((long)ch7 << 48) +
-      ((long)ch6 << 40) +
-      ((long)ch5 << 32) +
-      ((long)ch4 << 24) + // cast to long to preserve bit 31 (sign bit for ints)
-      (ch3 << 16) +
-      (ch2 <<  8) +
-      (ch1 <<  0);
-   }
-   /**
-    * Get a NE long value from an InputStream
-    *
-    * @param  stream the InputStream from which the long is to be read
-    * @return                              the long (64-bit) value
-    * @exception  IOException              will be propagated back to the caller
-    * @exception  BufferUnderrunException  if the stream cannot provide enough bytes
-    */
-   public static long readLongBE(InputStream stream) throws IOException, BufferUnderrunException {
-      int ch1 = stream.read();
-      int ch2 = stream.read();
-      int ch3 = stream.read();
-      int ch4 = stream.read();
-      int ch5 = stream.read();
-      int ch6 = stream.read();
-      int ch7 = stream.read();
-      int ch8 = stream.read();
-      if ((ch1 | ch2 | ch3 | ch4 | ch5 | ch6 | ch7 | ch8) < 0) {
-         throw new BufferUnderrunException();
-      }
-
-      return
-      ((long)ch1 << 56) +
-      ((long)ch2 << 48) +
-      ((long)ch3 << 40) +
-      ((long)ch4 << 32) +
-      ((long)ch5 << 24) + // cast to long to preserve bit 31 (sign bit for ints)
-      (ch6 << 16) +
-      (ch7 <<  8) +
-      (ch8 <<  0);
-   }
-   
-   /**
-    * Gets the integer value that is stored in UTF-8 like fashion, in Big Endian
-    *   but with the high bit on each number indicating if it continues or not
-    */
-   public static long readUE7(InputStream stream) throws IOException {
-       int i;
-       long v = 0;
-       while ((i = stream.read()) >= 0) {
-           v = v << 7;
-           if ((i & 128) == 128) {
-               // Continues
-               v += (i&127);
-           } else {
-               // Last value
-               v += i;
-               break;
-           }
-       }
-       return v;
-   }
-   
-   
-   /**
-    * Get a LE short value from the beginning of a byte array
-    *
-    *@param  data  the byte array
-    *@return       the short (16-bit) value
-    */
-   public static short getShortLE(byte[] data) {
-      return getShortLE(data, 0);
-   }
-   /**
-    * Get a LE short value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the short (16-bit) value
-    */
-   public static short getShortLE(byte[] data, int offset) {
-      return (short)getUShortLE(data, offset);
-   }
-
-   /**
-    * Get a LE unsigned short value from the beginning of a byte array
-    *
-    *@param  data  the byte array
-    *@return       the unsigned short (16-bit) value in an int
-    */
-   public static int getUShortLE(byte[] data) {
-      return getUShortLE(data, 0);
-   }
-   /**
-    * Get a LE unsigned short value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the unsigned short (16-bit) value in an integer
-    */
-   public static int getUShortLE(byte[] data, int offset) {
-      int b0 = data[offset] & 0xFF;
-      int b1 = data[offset+1] & 0xFF;
-      return (b1 << 8) + (b0 << 0);
-   }
-   
-   /**
-    * Get a BE short value from the beginning of a byte array
-    *
-    *@param  data  the byte array
-    *@return       the short (16-bit) value
-    */
-   public static short getShortBE(byte[] data) {
-      return getShortBE(data, 0);
-   }
-   /**
-    * Get a BE short value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the short (16-bit) value
-    */
-   public static short getShortBE(byte[] data, int offset) {
-      return (short)getUShortBE(data, offset);
-   }
-
-   /**
-    * Get a BE unsigned short value from the beginning of a byte array
-    *
-    *@param  data  the byte array
-    *@return       the unsigned short (16-bit) value in an int
-    */
-   public static int getUShortBE(byte[] data) {
-      return getUShortBE(data, 0);
-   }
-   /**
-    * Get a BE unsigned short value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the unsigned short (16-bit) value in an integer
-    */
-   public static int getUShortBE(byte[] data, int offset) {
-      int b0 = data[offset] & 0xFF;
-      int b1 = data[offset+1] & 0xFF;
-      return (b0 << 8) + (b1 << 0);
-   }
-
-   /**
-    * Get a LE int value from the beginning of a byte array
-    *
-    *@param  data  the byte array
-    *@return the int (32-bit) value
-    */
-   public static int getIntLE(byte[] data) {
-       return getIntLE(data, 0);
-   }
-   /**
-    * Get a LE int value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the int (32-bit) value
-    */
-   public static int getIntLE(byte[] data, int offset) {
-       int i=offset;
-       int b0 = data[i++] & 0xFF;
-       int b1 = data[i++] & 0xFF;
-       int b2 = data[i++] & 0xFF;
-       int b3 = data[i++] & 0xFF;
-       return (b3 << 24) + (b2 << 16) + (b1 << 8) + (b0 << 0);
-   }
-
-   /**
-    * Get a BE int value from the beginning of a byte array
-    *
-    *@param  data  the byte array
-    *@return the int (32-bit) value
-    */
-   public static int getIntBE(byte[] data) {
-       return getIntBE(data, 0);
-   }
-   /**
-    * Get a BE int value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the int (32-bit) value
-    */
-   public static int getIntBE(byte[] data, int offset) {
-       int i=offset;
-       int b0 = data[i++] & 0xFF;
-       int b1 = data[i++] & 0xFF;
-       int b2 = data[i++] & 0xFF;
-       int b3 = data[i++] & 0xFF;
-       return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0);
-   }
-
-   /**
-    * Get a LE unsigned int value from a byte array
-    *
-    *@param  data    the byte array
-    *@return         the unsigned int (32-bit) value in a long
-    */
-   public static long getUIntLE(byte[] data) {
-       return getUIntLE(data,0);
-   }
-   /**
-    * Get a LE unsigned int value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the unsigned int (32-bit) value in a long
-    */
-   public static long getUIntLE(byte[] data, int offset) {
-       long retNum = getIntLE(data, offset);
-       return retNum & 0x00FFFFFFFFl;
-   }
-
-   /**
-    * Get a BE unsigned int value from a byte array
-    *
-    *@param  data    the byte array
-    *@return         the unsigned int (32-bit) value in a long
-    */
-   public static long getUIntBE(byte[] data) {
-       return getUIntBE(data,0);
-   }
-   /**
-    * Get a BE unsigned int value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the unsigned int (32-bit) value in a long
-    */
-   public static long getUIntBE(byte[] data, int offset) {
-       long retNum = getIntBE(data, offset);
-       return retNum & 0x00FFFFFFFFl;
-   }
-
-   /**
-    * Get a LE long value from a byte array
-    *
-    *@param  data    the byte array
-    *@param  offset  a starting offset into the byte array
-    *@return         the long (64-bit) value
-    */
-   public static long getLongLE(byte[] data, int offset) {
-      long result = 0;
-
-      for (int j = offset + LONG_SIZE - 1; j >= offset; j--) {
-         result <<= 8;
-         result |= 0xff & data[j];
-      }
-      return result;
-   }
-   private static final int LONG_SIZE = 8;
-
-   
-   /**
-    *  Convert an 'unsigned' byte to an integer. ie, don't carry across the
-    *  sign.
-    *
-    * @param  b  Description of the Parameter
-    * @return    Description of the Return Value
-    */
-   public static int ubyteToInt(byte b) {
-      return b & 0xFF;
-   }
-
-   /**
-    * get the unsigned value of a byte.
-    * 
-    * @param data
-    *            the byte array.
-    * @param offset
-    *            a starting offset into the byte array.
-    * @return the unsigned value of the byte as a 16 bit short
-    */
-   public static short getUByte( byte[] data, int offset )
-   {
-      return (short) ( data[offset] & 0xFF );
-   }
-   
-   
-   public static class BufferUnderrunException extends TikaException {
-      private static final long serialVersionUID = 8358288231138076276L;
-      public BufferUnderrunException() {
-         super("Insufficient data left in stream for required read");
-      }
-   }
+    /**
+     * Get a LE short value from an InputStream
+     *
+     * @param stream the InputStream from which the short is to be read
+     * @return the short (16-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static short readShortLE(InputStream stream) throws IOException, BufferUnderrunException {
+        return (short) readUShortLE(stream);
+    }
+
+    /**
+     * Get a BE short value from an InputStream
+     *
+     * @param stream the InputStream from which the short is to be read
+     * @return the short (16-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static short readShortBE(InputStream stream) throws IOException, BufferUnderrunException {
+        return (short) readUShortBE(stream);
+    }
+
+    public static int readUShortLE(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        if ((ch1 | ch2) < 0) {
+            throw new BufferUnderrunException();
+        }
+        return (ch2 << 8) + (ch1 << 0);
+    }
+
+    public static int readUShortBE(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        if ((ch1 | ch2) < 0) {
+            throw new BufferUnderrunException();
+        }
+        return (ch1 << 8) + (ch2 << 0);
+    }
+
+
+    /**
+     * Get a LE unsigned int value from an InputStream
+     *
+     * @param stream the InputStream from which the int is to be read
+     * @return the int (32-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static long readUIntLE(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        int ch3 = stream.read();
+        int ch4 = stream.read();
+        if ((ch1 | ch2 | ch3 | ch4) < 0) {
+            throw new BufferUnderrunException();
+        }
+        return ((ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0)) & 0x00FFFFFFFFl;
+    }
+
+    /**
+     * Get a LE int value from an InputStream
+     *
+     * @param stream the InputStream from which the int is to be read
+     * @return the int (32-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static int readIntLE(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        int ch3 = stream.read();
+        int ch4 = stream.read();
+        if ((ch1 | ch2 | ch3 | ch4) < 0) {
+            throw new BufferUnderrunException();
+        }
+        return (ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0);
+    }
+
+    /**
+     * Get a BE int value from an InputStream
+     *
+     * @param stream the InputStream from which the int is to be read
+     * @return the int (32-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static int readIntBE(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        int ch3 = stream.read();
+        int ch4 = stream.read();
+        if ((ch1 | ch2 | ch3 | ch4) < 0) {
+            throw new BufferUnderrunException();
+        }
+        return (ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0);
+    }
+
+    /**
+     * Get a LE long value from an InputStream
+     *
+     * @param stream the InputStream from which the long is to be read
+     * @return the long (64-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static long readLongLE(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        int ch3 = stream.read();
+        int ch4 = stream.read();
+        int ch5 = stream.read();
+        int ch6 = stream.read();
+        int ch7 = stream.read();
+        int ch8 = stream.read();
+        if ((ch1 | ch2 | ch3 | ch4 | ch5 | ch6 | ch7 | ch8) < 0) {
+            throw new BufferUnderrunException();
+        }
+
+        return
+                ((long) ch8 << 56) +
+                        ((long) ch7 << 48) +
+                        ((long) ch6 << 40) +
+                        ((long) ch5 << 32) +
+                        ((long) ch4 << 24) + // cast to long to preserve bit 31 (sign bit for ints)
+                        (ch3 << 16) +
+                        (ch2 << 8) +
+                        (ch1 << 0);
+    }
+
+    /**
+     * Get a NE long value from an InputStream
+     *
+     * @param stream the InputStream from which the long is to be read
+     * @return the long (64-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static long readLongBE(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        int ch3 = stream.read();
+        int ch4 = stream.read();
+        int ch5 = stream.read();
+        int ch6 = stream.read();
+        int ch7 = stream.read();
+        int ch8 = stream.read();
+        if ((ch1 | ch2 | ch3 | ch4 | ch5 | ch6 | ch7 | ch8) < 0) {
+            throw new BufferUnderrunException();
+        }
+
+        return
+                ((long) ch1 << 56) +
+                        ((long) ch2 << 48) +
+                        ((long) ch3 << 40) +
+                        ((long) ch4 << 32) +
+                        ((long) ch5 << 24) + // cast to long to preserve bit 31 (sign bit for ints)
+                        (ch6 << 16) +
+                        (ch7 << 8) +
+                        (ch8 << 0);
+    }
+
+    /**
+     * Gets the integer value that is stored in UTF-8 like fashion, in Big Endian
+     * but with the high bit on each number indicating if it continues or not
+     */
+    public static long readUE7(InputStream stream) throws IOException {
+        int i;
+        long v = 0;
+        while ((i = stream.read()) >= 0) {
+            v = v << 7;
+            if ((i & 128) == 128) {
+                // Continues
+                v += (i & 127);
+            } else {
+                // Last value
+                v += i;
+                break;
+            }
+        }
+        return v;
+    }
+
+
+    /**
+     * Get a LE short value from the beginning of a byte array
+     *
+     * @param data the byte array
+     * @return the short (16-bit) value
+     */
+    public static short getShortLE(byte[] data) {
+        return getShortLE(data, 0);
+    }
+
+    /**
+     * Get a LE short value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the short (16-bit) value
+     */
+    public static short getShortLE(byte[] data, int offset) {
+        return (short) getUShortLE(data, offset);
+    }
+
+    /**
+     * Get a LE unsigned short value from the beginning of a byte array
+     *
+     * @param data the byte array
+     * @return the unsigned short (16-bit) value in an int
+     */
+    public static int getUShortLE(byte[] data) {
+        return getUShortLE(data, 0);
+    }
+
+    /**
+     * Get a LE unsigned short value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the unsigned short (16-bit) value in an integer
+     */
+    public static int getUShortLE(byte[] data, int offset) {
+        int b0 = data[offset] & 0xFF;
+        int b1 = data[offset + 1] & 0xFF;
+        return (b1 << 8) + (b0 << 0);
+    }
+
+    /**
+     * Get a BE short value from the beginning of a byte array
+     *
+     * @param data the byte array
+     * @return the short (16-bit) value
+     */
+    public static short getShortBE(byte[] data) {
+        return getShortBE(data, 0);
+    }
+
+    /**
+     * Get a BE short value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the short (16-bit) value
+     */
+    public static short getShortBE(byte[] data, int offset) {
+        return (short) getUShortBE(data, offset);
+    }
+
+    /**
+     * Get a BE unsigned short value from the beginning of a byte array
+     *
+     * @param data the byte array
+     * @return the unsigned short (16-bit) value in an int
+     */
+    public static int getUShortBE(byte[] data) {
+        return getUShortBE(data, 0);
+    }
+
+    /**
+     * Get a BE unsigned short value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the unsigned short (16-bit) value in an integer
+     */
+    public static int getUShortBE(byte[] data, int offset) {
+        int b0 = data[offset] & 0xFF;
+        int b1 = data[offset + 1] & 0xFF;
+        return (b0 << 8) + (b1 << 0);
+    }
+
+    /**
+     * Get a LE int value from the beginning of a byte array
+     *
+     * @param data the byte array
+     * @return the int (32-bit) value
+     */
+    public static int getIntLE(byte[] data) {
+        return getIntLE(data, 0);
+    }
+
+    /**
+     * Get a LE int value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the int (32-bit) value
+     */
+    public static int getIntLE(byte[] data, int offset) {
+        int i = offset;
+        int b0 = data[i++] & 0xFF;
+        int b1 = data[i++] & 0xFF;
+        int b2 = data[i++] & 0xFF;
+        int b3 = data[i++] & 0xFF;
+        return (b3 << 24) + (b2 << 16) + (b1 << 8) + (b0 << 0);
+    }
+
+    /**
+     * Get a BE int value from the beginning of a byte array
+     *
+     * @param data the byte array
+     * @return the int (32-bit) value
+     */
+    public static int getIntBE(byte[] data) {
+        return getIntBE(data, 0);
+    }
+
+    /**
+     * Get a BE int value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the int (32-bit) value
+     */
+    public static int getIntBE(byte[] data, int offset) {
+        int i = offset;
+        int b0 = data[i++] & 0xFF;
+        int b1 = data[i++] & 0xFF;
+        int b2 = data[i++] & 0xFF;
+        int b3 = data[i++] & 0xFF;
+        return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0);
+    }
+
+    /**
+     * Get a LE unsigned int value from a byte array
+     *
+     * @param data the byte array
+     * @return the unsigned int (32-bit) value in a long
+     */
+    public static long getUIntLE(byte[] data) {
+        return getUIntLE(data, 0);
+    }
+
+    /**
+     * Get a LE unsigned int value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the unsigned int (32-bit) value in a long
+     */
+    public static long getUIntLE(byte[] data, int offset) {
+        long retNum = getIntLE(data, offset);
+        return retNum & 0x00FFFFFFFFl;
+    }
+
+    /**
+     * Get a BE unsigned int value from a byte array
+     *
+     * @param data the byte array
+     * @return the unsigned int (32-bit) value in a long
+     */
+    public static long getUIntBE(byte[] data) {
+        return getUIntBE(data, 0);
+    }
+
+    /**
+     * Get a BE unsigned int value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the unsigned int (32-bit) value in a long
+     */
+    public static long getUIntBE(byte[] data, int offset) {
+        long retNum = getIntBE(data, offset);
+        return retNum & 0x00FFFFFFFFl;
+    }
+
+    /**
+     * Get a LE long value from a byte array
+     *
+     * @param data   the byte array
+     * @param offset a starting offset into the byte array
+     * @return the long (64-bit) value
+     */
+    public static long getLongLE(byte[] data, int offset) {
+        long result = 0;
+
+        for (int j = offset + LONG_SIZE - 1; j >= offset; j--) {
+            result <<= 8;
+            result |= 0xff & data[j];
+        }
+        return result;
+    }
+
+    private static final int LONG_SIZE = 8;
+
+
+    /**
+     * Convert an 'unsigned' byte to an integer. ie, don't carry across the
+     * sign.
+     *
+     * @param b Description of the Parameter
+     * @return Description of the Return Value
+     */
+    public static int ubyteToInt(byte b) {
+        return b & 0xFF;
+    }
+
+    /**
+     * get the unsigned value of a byte.
+     *
+     * @param data   the byte array.
+     * @param offset a starting offset into the byte array.
+     * @return the unsigned value of the byte as a 16 bit short
+     */
+    public static short getUByte(byte[] data, int offset) {
+        return (short) (data[offset] & 0xFF);
+    }
+
+
+    public static class BufferUnderrunException extends TikaException {
+        private static final long serialVersionUID = 8358288231138076276L;
+
+        public BufferUnderrunException() {
+            super("Insufficient data left in stream for required read");
+        }
+    }
 }