You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2021/02/03 16:55:41 UTC
[avro] branch branch-1.10 updated: AVRO-2944: Handle unexpected EOF
reading magic bytes in DataFileReader (#1080)
This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch branch-1.10
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/branch-1.10 by this push:
new 2204341 AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader (#1080)
2204341 is described below
commit 2204341be6ba156669b094fb738325edd675fd19
Author: Andrew Olson <93...@users.noreply.github.com>
AuthorDate: Wed Feb 3 10:43:12 2021 -0600
AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader (#1080)
* AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader
* AVRO-2944: Fix unit test code formatting
Co-authored-by: Andrew Olson <ao...@cerner.com>
---
.../java/org/apache/avro/file/DataFileReader.java | 11 +++-
.../java/org/apache/avro/TestDataFileReader.java | 70 ++++++++++++++++++++++
2 files changed, 80 insertions(+), 1 deletion(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
index f027852..7a23535 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.avro.file;
+import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.File;
@@ -58,7 +59,15 @@ public class DataFileReader<D> extends DataFileStream<D> implements FileReader<D
// read magic header
byte[] magic = new byte[MAGIC.length];
in.seek(0);
- for (int c = 0; c < magic.length; c += in.read(magic, c, magic.length - c)) {
+ int offset = 0;
+ int length = magic.length;
+ while (length > 0) {
+ int bytesRead = in.read(magic, offset, length);
+ if (bytesRead < 0)
+ throw new EOFException("Unexpected EOF with " + length + " bytes remaining to read");
+
+ length -= bytesRead;
+ offset += bytesRead;
}
in.seek(0);
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
index c222685..3e2dd6b 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
@@ -21,8 +21,10 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileWriter;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
@@ -131,6 +133,55 @@ public class TestDataFileReader {
};
}
+ @Test(expected = EOFException.class)
+ // another regression test for bug AVRO-2944, testing EOF case
+ public void testInputStreamEOF() throws IOException {
+ // AVRO-2944 describes hanging/failure in reading Avro file with performing
+ // magic header check. This potentially happens with a defective input stream
+ // where a -1 value is unexpectedly returned from a read.
+ Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false)
+ .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": "
+ + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}");
+ File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile();
+ try (DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>())) {
+ w.create(legacySchema, f);
+ w.flush();
+ }
+
+ // Should throw an EOFException
+ DataFileReader.openReader(eofInputStream(f), new GenericDatumReader<>());
+ }
+
+ private SeekableInput eofInputStream(File f) throws IOException {
+ SeekableFileInput input = new SeekableFileInput(f);
+ return new SeekableInput() {
+ @Override
+ public void close() throws IOException {
+ input.close();
+ }
+
+ @Override
+ public void seek(long p) throws IOException {
+ input.seek(p);
+ }
+
+ @Override
+ public long tell() throws IOException {
+ return input.tell();
+ }
+
+ @Override
+ public long length() throws IOException {
+ return input.length();
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ return -1;
+ }
+ };
+ }
+
@Test
public void testIgnoreSchemaValidationOnRead() throws IOException {
// This schema has an accent in the name and the default for the field doesn't
@@ -153,4 +204,23 @@ public class TestDataFileReader {
}
}
+ @Test(expected = InvalidAvroMagicException.class)
+ public void testInvalidMagicLength() throws IOException {
+ File f = Files.createTempFile("testInvalidMagicLength", ".avro").toFile();
+ try (FileWriter w = new FileWriter(f)) {
+ w.write("-");
+ }
+
+ DataFileReader.openReader(new SeekableFileInput(f), new GenericDatumReader<>());
+ }
+
+ @Test(expected = InvalidAvroMagicException.class)
+ public void testInvalidMagicBytes() throws IOException {
+ File f = Files.createTempFile("testInvalidMagicBytes", ".avro").toFile();
+ try (FileWriter w = new FileWriter(f)) {
+ w.write("invalid");
+ }
+
+ DataFileReader.openReader(new SeekableFileInput(f), new GenericDatumReader<>());
+ }
}