You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 00:54:50 UTC
[2/5] tika git commit: TIKA-2022 - clean up -- make entries private,
move more into EndianUtils
TIKA-2022 - clean up -- make entries private, move more into EndianUtils
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c84855f6
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c84855f6
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c84855f6
Branch: refs/heads/2.x
Commit: c84855f6757c714a9fdcec55ca14b628a107642e
Parents: 865c45c
Author: tballison <ta...@mitre.org>
Authored: Tue Jun 28 20:13:01 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Tue Jun 28 20:13:01 2016 -0400
----------------------------------------------------------------------
.../java/org/apache/tika/io/EndianUtils.java | 19 +++++++++++
.../org/apache/tika/io/EndianUtilsTest.java | 16 +++++++++
.../parser/apple/AppleSingleFileParser.java | 35 ++++++++++----------
3 files changed, 52 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/c84855f6/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
index 2ab85b3..05da5e0 100644
--- a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
@@ -93,6 +93,25 @@ public class EndianUtils {
}
/**
+ * Get a BE unsigned int value from an InputStream
+ *
+ * @param stream the InputStream from which the int is to be read
+ * @return the int (32-bit) value
+ * @throws IOException will be propagated back to the caller
+ * @throws BufferUnderrunException if the stream cannot provide enough bytes
+ */
+ public static long readUIntBE(InputStream stream) throws IOException, BufferUnderrunException {
+ int ch1 = stream.read();
+ int ch2 = stream.read();
+ int ch3 = stream.read();
+ int ch4 = stream.read();
+ if ((ch1 | ch2 | ch3 | ch4) < 0) {
+ throw new BufferUnderrunException();
+ }
+ return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)) & 0x00FFFFFFFFl;
+ }
+
+ /**
* Get a LE int value from an InputStream
*
* @param stream the InputStream from which the int is to be read
http://git-wip-us.apache.org/repos/asf/tika/blob/c84855f6/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java b/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
index 8f481c3..50084d2 100644
--- a/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
@@ -54,6 +54,22 @@ public class EndianUtilsTest {
} catch (EndianUtils.BufferUnderrunException e) {
}
+ }
+
+ @Test
+ public void testReadUIntBE() throws Exception {
+ byte[] data = new byte[] {(byte)0x00, (byte)0x00, (byte)0x00, (byte)0x08 };
+ assertEquals((long) 8, EndianUtils.readUIntBE(new ByteArrayInputStream(data)));
+ data = new byte[] {(byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xF0 };
+ assertEquals(4294967280L, EndianUtils.readUIntBE(new ByteArrayInputStream(data)));
+
+ data = new byte[] {(byte)0xFF, (byte)0xFF, (byte)0xFF };
+ try {
+ EndianUtils.readUIntLE(new ByteArrayInputStream(data));
+ fail("Should have thrown exception");
+ } catch (EndianUtils.BufferUnderrunException e) {
+
+ }
}
}
http://git-wip-us.apache.org/repos/asf/tika/blob/c84855f6/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
index ffb5759..3f548ca 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
@@ -50,21 +50,21 @@ public class AppleSingleFileParser extends AbstractParser {
/**
* Entry types
*/
- public static final int DATA_FORK = 1;
- public static final int RESOURCE_FORK = 2;
- public static final int REAL_NAME = 3;
- public static final int COMMENT = 4;
- public static final int ICON_BW = 5;
- public static final int ICON_COLOR = 6;
+ private static final int DATA_FORK = 1;
+ private static final int RESOURCE_FORK = 2;
+ private static final int REAL_NAME = 3;
+ private static final int COMMENT = 4;
+ private static final int ICON_BW = 5;
+ private static final int ICON_COLOR = 6;
//7?!
- public static final int FILE_DATES_INFO = 8;
- public static final int FINDER_INFO = 9;
- public static final int MACINTOSH_FILE_INFO = 10;
- public static final int PRODOS_FILE_INFO = 11;
- public static final int MSDOS_FILE_INFO = 12;
- public static final int SHORT_NAME = 13;
- public static final int AFP_FILE_INFO = 14;
- public static final int DIRECTORY_ID = 15;
+ private static final int FILE_DATES_INFO = 8;
+ private static final int FINDER_INFO = 9;
+ private static final int MACINTOSH_FILE_INFO = 10;
+ private static final int PRODOS_FILE_INFO = 11;
+ private static final int MSDOS_FILE_INFO = 12;
+ private static final int SHORT_NAME = 13;
+ private static final int AFP_FILE_INFO = 14;
+ private static final int DIRECTORY_ID = 15;
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("applefile"));
@@ -94,7 +94,6 @@ public class AppleSingleFileParser extends AbstractParser {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
if (contentFieldInfo != null) {
- System.out.println(contentFieldInfo.offset + " "+bytesRead);
long diff = contentFieldInfo.offset-bytesRead;
IOUtils.skipFully(stream, diff);
if (ex.shouldParseEmbedded(embeddedMetadata)) {
@@ -153,9 +152,9 @@ public class AppleSingleFileParser extends AbstractParser {
//convert 32-bit unsigned ints to longs
fieldInfoList.add(
new FieldInfo(
- EndianUtils.readIntBE(stream) & 0x00000000ffffffffL, //entry id
- EndianUtils.readIntBE(stream) & 0x00000000ffffffffL, //offset
- EndianUtils.readIntBE(stream) & 0x00000000ffffffffL //length
+ EndianUtils.readUIntBE(stream), //entry id
+ EndianUtils.readUIntBE(stream), //offset
+ EndianUtils.readUIntBE(stream) //length
)
);
}