You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2020/10/14 09:35:20 UTC

[avro] branch master updated: AVRO-2817: Do not validate schema defaults in file (#966)

This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 328c539  AVRO-2817: Do not validate schema defaults in file (#966)
328c539 is described below

commit 328c539afc77da347ec52be1e112a6a7c371143b
Author: RyanSkraba <ry...@skraba.com>
AuthorDate: Wed Oct 14 11:35:05 2020 +0200

    AVRO-2817: Do not validate schema defaults in file (#966)
    
    * AVRO-2817: Do not validate schema defaults in file.
    
    * AVRO-2817: Add missing imports
---
 .../java/org/apache/avro/file/DataFileStream.java  |  9 ++++---
 .../java/org/apache/avro/TestDataFileReader.java   | 29 ++++++++++++++++++++++
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
index 4b415ec..8d26971 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
@@ -43,7 +43,7 @@ import org.apache.avro.io.DatumReader;
 /**
  * Streaming access to files written by {@link DataFileWriter}. Use
  * {@link DataFileReader} for file-based input.
- * 
+ *
  * @see DataFileWriter
  */
 public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
@@ -127,7 +127,8 @@ public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
 
     // finalize the header
     header.metaKeyList = Collections.unmodifiableList(header.metaKeyList);
-    header.schema = new Schema.Parser().setValidate(false).parse(getMetaString(DataFileConstants.SCHEMA));
+    header.schema = new Schema.Parser().setValidate(false).setValidateDefaults(false)
+        .parse(getMetaString(DataFileConstants.SCHEMA));
     this.codec = resolveCodec();
     reader.setSchema(header.schema);
   }
@@ -226,7 +227,7 @@ public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
 
   /**
    * Read the next datum in the file.
-   * 
+   *
    * @throws NoSuchElementException if no more remain in the file.
    */
   @Override
@@ -240,7 +241,7 @@ public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
 
   /**
    * Read the next datum from the file.
-   * 
+   *
    * @param reuse an instance to reuse.
    * @throws NoSuchElementException if no more remain in the file.
    */
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
index d4c184c..a4e1043 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
@@ -17,15 +17,22 @@
  */
 package org.apache.avro;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.lang.management.ManagementFactory;
 import java.lang.management.OperatingSystemMXBean;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
 import org.junit.Test;
 import com.sun.management.UnixOperatingSystemMXBean;
 
@@ -62,4 +69,26 @@ public class TestDataFileReader {
     return 0;
   }
 
+  @Test
+  public void testIgnoreSchemaValidationOnRead() throws IOException {
+    // This schema has an accent in the name and the default for the field doesn't
+    // match the first type in the union. A Java SDK in the past could create a file
+    // containing this schema.
+    Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false)
+        .parse("{\"type\": \"record\", \"name\": \"InvalidAccëntWithInvalidNull\", \"fields\": "
+            + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}");
+
+    // Create a file with the legacy schema.
+    File f = Files.createTempFile("testIgnoreSchemaValidationOnRead", ".avro").toFile();
+    try (DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>())) {
+      w.create(legacySchema, f);
+      w.flush();
+    }
+
+    // This should not throw an exception.
+    try (DataFileStream<Void> r = new DataFileStream<>(new FileInputStream(f), new GenericDatumReader<>())) {
+      assertEquals(legacySchema, r.getSchema());
+    }
+  }
+
 }