You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 00:54:50 UTC

[2/5] tika git commit: TIKA-2022 - clean up -- make entries private, move more into EndianUtils

TIKA-2022 - clean up -- make entries private, move more into EndianUtils


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c84855f6
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c84855f6
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c84855f6

Branch: refs/heads/2.x
Commit: c84855f6757c714a9fdcec55ca14b628a107642e
Parents: 865c45c
Author: tballison <ta...@mitre.org>
Authored: Tue Jun 28 20:13:01 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Tue Jun 28 20:13:01 2016 -0400

----------------------------------------------------------------------
 .../java/org/apache/tika/io/EndianUtils.java    | 19 +++++++++++
 .../org/apache/tika/io/EndianUtilsTest.java     | 16 +++++++++
 .../parser/apple/AppleSingleFileParser.java     | 35 ++++++++++----------
 3 files changed, 52 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c84855f6/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
index 2ab85b3..05da5e0 100644
--- a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
@@ -93,6 +93,25 @@ public class EndianUtils {
     }
 
     /**
+     * Get a BE unsigned int value from an InputStream
+     *
+     * @param stream the InputStream from which the int is to be read
+     * @return the int (32-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static long readUIntBE(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        int ch3 = stream.read();
+        int ch4 = stream.read();
+        if ((ch1 | ch2 | ch3 | ch4) < 0) {
+            throw new BufferUnderrunException();
+        }
+        return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)) & 0x00FFFFFFFFl;
+    }
+
+    /**
      * Get a LE int value from an InputStream
      *
      * @param stream the InputStream from which the int is to be read

http://git-wip-us.apache.org/repos/asf/tika/blob/c84855f6/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java b/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
index 8f481c3..50084d2 100644
--- a/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
@@ -54,6 +54,22 @@ public class EndianUtilsTest {
         } catch (EndianUtils.BufferUnderrunException e) {
 
         }
+    }
+
+    @Test
+    public void testReadUIntBE() throws Exception {
+        byte[] data = new byte[] {(byte)0x00, (byte)0x00, (byte)0x00, (byte)0x08 };
+        assertEquals((long) 8, EndianUtils.readUIntBE(new ByteArrayInputStream(data)));
 
+        data = new byte[] {(byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xF0 };
+        assertEquals(4294967280L, EndianUtils.readUIntBE(new ByteArrayInputStream(data)));
+
+        data = new byte[] {(byte)0xFF, (byte)0xFF, (byte)0xFF  };
+        try {
+            EndianUtils.readUIntLE(new ByteArrayInputStream(data));
+            fail("Should have thrown exception");
+        } catch (EndianUtils.BufferUnderrunException e) {
+
+        }
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/c84855f6/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
index ffb5759..3f548ca 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
@@ -50,21 +50,21 @@ public class AppleSingleFileParser extends AbstractParser {
     /**
      * Entry types
      */
-    public static final int DATA_FORK = 1;
-    public static final int RESOURCE_FORK = 2;
-    public static final int REAL_NAME = 3;
-    public static final int COMMENT = 4;
-    public static final int ICON_BW = 5;
-    public static final int ICON_COLOR = 6;
+    private static final int DATA_FORK = 1;
+    private static final int RESOURCE_FORK = 2;
+    private static final int REAL_NAME = 3;
+    private static final int COMMENT = 4;
+    private static final int ICON_BW = 5;
+    private static final int ICON_COLOR = 6;
     //7?!
-    public static final int FILE_DATES_INFO = 8;
-    public static final int FINDER_INFO = 9;
-    public static final int MACINTOSH_FILE_INFO = 10;
-    public static final int PRODOS_FILE_INFO = 11;
-    public static final int MSDOS_FILE_INFO = 12;
-    public static final int SHORT_NAME = 13;
-    public static final int AFP_FILE_INFO = 14;
-    public static final int DIRECTORY_ID = 15;
+    private static final int FILE_DATES_INFO = 8;
+    private static final int FINDER_INFO = 9;
+    private static final int MACINTOSH_FILE_INFO = 10;
+    private static final int PRODOS_FILE_INFO = 11;
+    private static final int MSDOS_FILE_INFO = 12;
+    private static final int SHORT_NAME = 13;
+    private static final int AFP_FILE_INFO = 14;
+    private static final int DIRECTORY_ID = 15;
 
     private static final Set<MediaType> SUPPORTED_TYPES =
             Collections.singleton(MediaType.application("applefile"));
@@ -94,7 +94,6 @@ public class AppleSingleFileParser extends AbstractParser {
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
         xhtml.startDocument();
         if (contentFieldInfo != null) {
-            System.out.println(contentFieldInfo.offset + " "+bytesRead);
             long diff = contentFieldInfo.offset-bytesRead;
             IOUtils.skipFully(stream, diff);
             if (ex.shouldParseEmbedded(embeddedMetadata)) {
@@ -153,9 +152,9 @@ public class AppleSingleFileParser extends AbstractParser {
             //convert 32-bit unsigned ints to longs
             fieldInfoList.add(
                     new FieldInfo(
-                            EndianUtils.readIntBE(stream) & 0x00000000ffffffffL, //entry id
-                            EndianUtils.readIntBE(stream) & 0x00000000ffffffffL, //offset
-                            EndianUtils.readIntBE(stream) & 0x00000000ffffffffL  //length
+                            EndianUtils.readUIntBE(stream), //entry id
+                            EndianUtils.readUIntBE(stream), //offset
+                            EndianUtils.readUIntBE(stream) //length
                     )
             );
         }