You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2022/04/28 10:27:47 UTC

[tika] branch main updated: PDP-11 style "Middle Endian" 32 bit read util, as used in the DGN file format

This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new f33d8930e PDP-11 style "Middle Endian" 32 bit read util, as used in the DGN file format
f33d8930e is described below

commit f33d8930e660e61fb04f9232cd7fb6a96cdacdf3
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Thu Apr 28 11:27:36 2022 +0100

    PDP-11 style "Middle Endian" 32 bit read util, as used in the DGN file format
---
 .../src/main/java/org/apache/tika/io/EndianUtils.java | 19 +++++++++++++++++++
 .../test/java/org/apache/tika/io/EndianUtilsTest.java | 18 ++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
index c09eadceb..242dd8c74 100644
--- a/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
@@ -152,6 +152,25 @@ public class EndianUtils {
         return (ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4);
     }
 
+    /**
+     * Get a PDP-11 style Middle Endian int value from an InputStream
+     *
+     * @param stream the InputStream from which the int is to be read
+     * @return the int (32-bit) value
+     * @throws IOException             will be propagated back to the caller
+     * @throws BufferUnderrunException if the stream cannot provide enough bytes
+     */
+    public static int readIntME(InputStream stream) throws IOException, BufferUnderrunException {
+        int ch1 = stream.read();
+        int ch2 = stream.read();
+        int ch3 = stream.read();
+        int ch4 = stream.read();
+        if ((ch1 | ch2 | ch3 | ch4) < 0) {
+            throw new BufferUnderrunException();
+        }
+        return (ch2 << 24) + (ch1 << 16) + (ch4 << 8) + (ch3);
+    }
+
     /**
      * Get a LE long value from an InputStream
      *
diff --git a/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java b/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
index 8ead23218..906870e73 100644
--- a/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/EndianUtilsTest.java
@@ -72,4 +72,22 @@ public class EndianUtilsTest {
             //swallow
         }
     }
+
+    @Test
+    public void testReadIntME() throws Exception {
+        // Example from https://yamm.finance/wiki/Endianness.html#mwAiw 
+        byte[] data = new byte[]{(byte) 0x0b, (byte) 0x0a, (byte) 0x0d, (byte) 0x0c};
+        assertEquals(0x0a0b0c0d, EndianUtils.readIntME(new ByteArrayInputStream(data)));
+
+        data = new byte[]{(byte) 0xFE, (byte) 0xFF, (byte) 0xFC, (byte) 0xFD};
+        assertEquals(0xfffefdfc, EndianUtils.readIntME(new ByteArrayInputStream(data)));
+
+        data = new byte[]{(byte) 0xFF, (byte) 0xFF, (byte) 0xFF};
+        try {
+            EndianUtils.readIntME(new ByteArrayInputStream(data));
+            fail("Should have thrown exception");
+        } catch (EndianUtils.BufferUnderrunException e) {
+            //swallow
+        }
+    }
 }