You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2020/10/14 09:35:20 UTC
[avro] branch master updated: AVRO-2817: Do not validate schema
defaults in file (#966)
This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 328c539 AVRO-2817: Do not validate schema defaults in file (#966)
328c539 is described below
commit 328c539afc77da347ec52be1e112a6a7c371143b
Author: RyanSkraba <ry...@skraba.com>
AuthorDate: Wed Oct 14 11:35:05 2020 +0200
AVRO-2817: Do not validate schema defaults in file (#966)
* AVRO-2817: Do not validate schema defaults in file.
* AVRO-2817: Add missing imports
---
.../java/org/apache/avro/file/DataFileStream.java | 9 ++++---
.../java/org/apache/avro/TestDataFileReader.java | 29 ++++++++++++++++++++++
2 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
index 4b415ec..8d26971 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
@@ -43,7 +43,7 @@ import org.apache.avro.io.DatumReader;
/**
* Streaming access to files written by {@link DataFileWriter}. Use
* {@link DataFileReader} for file-based input.
- *
+ *
* @see DataFileWriter
*/
public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
@@ -127,7 +127,8 @@ public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
// finalize the header
header.metaKeyList = Collections.unmodifiableList(header.metaKeyList);
- header.schema = new Schema.Parser().setValidate(false).parse(getMetaString(DataFileConstants.SCHEMA));
+ header.schema = new Schema.Parser().setValidate(false).setValidateDefaults(false)
+ .parse(getMetaString(DataFileConstants.SCHEMA));
this.codec = resolveCodec();
reader.setSchema(header.schema);
}
@@ -226,7 +227,7 @@ public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
/**
* Read the next datum in the file.
- *
+ *
* @throws NoSuchElementException if no more remain in the file.
*/
@Override
@@ -240,7 +241,7 @@ public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
/**
* Read the next datum from the file.
- *
+ *
* @param reuse an instance to reuse.
* @throws NoSuchElementException if no more remain in the file.
*/
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
index d4c184c..a4e1043 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
@@ -17,15 +17,22 @@
*/
package org.apache.avro;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
import org.junit.Test;
import com.sun.management.UnixOperatingSystemMXBean;
@@ -62,4 +69,26 @@ public class TestDataFileReader {
return 0;
}
+ @Test
+ public void testIgnoreSchemaValidationOnRead() throws IOException {
+ // This schema has an accent in the name and the default for the field doesn't
+ // match the first type in the union. A Java SDK in the past could create a file
+ // containing this schema.
+ Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false)
+ .parse("{\"type\": \"record\", \"name\": \"InvalidAccëntWithInvalidNull\", \"fields\": "
+ + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}");
+
+ // Create a file with the legacy schema.
+ File f = Files.createTempFile("testIgnoreSchemaValidationOnRead", ".avro").toFile();
+ try (DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>())) {
+ w.create(legacySchema, f);
+ w.flush();
+ }
+
+ // This should not throw an exception.
+ try (DataFileStream<Void> r = new DataFileStream<>(new FileInputStream(f), new GenericDatumReader<>())) {
+ assertEquals(legacySchema, r.getSchema());
+ }
+ }
+
}