You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2021/02/03 16:55:41 UTC

[avro] branch branch-1.10 updated: AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader (#1080)

This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch branch-1.10
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.10 by this push:
     new 2204341  AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader (#1080)
2204341 is described below

commit 2204341be6ba156669b094fb738325edd675fd19
Author: Andrew Olson <93...@users.noreply.github.com>
AuthorDate: Wed Feb 3 10:43:12 2021 -0600

    AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader (#1080)
    
    * AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader
    
    * AVRO-2944: Fix unit test code formatting
    
    Co-authored-by: Andrew Olson <ao...@cerner.com>
---
 .../java/org/apache/avro/file/DataFileReader.java  | 11 +++-
 .../java/org/apache/avro/TestDataFileReader.java   | 70 ++++++++++++++++++++++
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
index f027852..7a23535 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.avro.file;
 
+import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.File;
@@ -58,7 +59,15 @@ public class DataFileReader<D> extends DataFileStream<D> implements FileReader<D
     // read magic header
     byte[] magic = new byte[MAGIC.length];
     in.seek(0);
-    for (int c = 0; c < magic.length; c += in.read(magic, c, magic.length - c)) {
+    int offset = 0;
+    int length = magic.length;
+    while (length > 0) {
+      int bytesRead = in.read(magic, offset, length);
+      if (bytesRead < 0)
+        throw new EOFException("Unexpected EOF with " + length + " bytes remaining to read");
+
+      length -= bytesRead;
+      offset += bytesRead;
     }
     in.seek(0);
 
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
index c222685..3e2dd6b 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
@@ -21,8 +21,10 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.io.EOFException;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.lang.management.ManagementFactory;
 import java.lang.management.OperatingSystemMXBean;
@@ -131,6 +133,55 @@ public class TestDataFileReader {
     };
   }
 
+  @Test(expected = EOFException.class)
+  // another regression test for bug AVRO-2944, testing EOF case
+  public void testInputStreamEOF() throws IOException {
+    // AVRO-2944 describes hanging/failure in reading Avro file with performing
+    // magic header check. This potentially happens with a defective input stream
+    // where a -1 value is unexpectedly returned from a read.
+    Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false)
+        .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": "
+            + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}");
+    File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile();
+    try (DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>())) {
+      w.create(legacySchema, f);
+      w.flush();
+    }
+
+    // Should throw an EOFException
+    DataFileReader.openReader(eofInputStream(f), new GenericDatumReader<>());
+  }
+
+  private SeekableInput eofInputStream(File f) throws IOException {
+    SeekableFileInput input = new SeekableFileInput(f);
+    return new SeekableInput() {
+      @Override
+      public void close() throws IOException {
+        input.close();
+      }
+
+      @Override
+      public void seek(long p) throws IOException {
+        input.seek(p);
+      }
+
+      @Override
+      public long tell() throws IOException {
+        return input.tell();
+      }
+
+      @Override
+      public long length() throws IOException {
+        return input.length();
+      }
+
+      @Override
+      public int read(byte[] b, int off, int len) throws IOException {
+        return -1;
+      }
+    };
+  }
+
   @Test
   public void testIgnoreSchemaValidationOnRead() throws IOException {
     // This schema has an accent in the name and the default for the field doesn't
@@ -153,4 +204,23 @@ public class TestDataFileReader {
     }
   }
 
+  @Test(expected = InvalidAvroMagicException.class)
+  public void testInvalidMagicLength() throws IOException {
+    File f = Files.createTempFile("testInvalidMagicLength", ".avro").toFile();
+    try (FileWriter w = new FileWriter(f)) {
+      w.write("-");
+    }
+
+    DataFileReader.openReader(new SeekableFileInput(f), new GenericDatumReader<>());
+  }
+
+  @Test(expected = InvalidAvroMagicException.class)
+  public void testInvalidMagicBytes() throws IOException {
+    File f = Files.createTempFile("testInvalidMagicBytes", ".avro").toFile();
+    try (FileWriter w = new FileWriter(f)) {
+      w.write("invalid");
+    }
+
+    DataFileReader.openReader(new SeekableFileInput(f), new GenericDatumReader<>());
+  }
 }