You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dk...@apache.org on 2018/12/11 18:13:35 UTC

[avro] branch master updated: AVRO-2034 Nested schema types with unexpected fields causes json parse failure (#224)

This is an automated email from the ASF dual-hosted git repository.

dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new d55f5e1  AVRO-2034 Nested schema types with unexpected fields causes json parse failure (#224)
d55f5e1 is described below

commit d55f5e152c288a2037d65d15a7169d76aa9be2be
Author: Todd Nine <to...@gmail.com>
AuthorDate: Tue Dec 11 11:13:31 2018 -0700

    AVRO-2034 Nested schema types with unexpected fields causes json parse failure (#224)
    
    * AVRO-2034 Created test to prove issue
    
    * AVRO-2034. Updates test to show a working record vs a failing record in the simplest possible scheme.
    
    * AVRO-2034 Fixes advance logic to skip unrecognized fields at record end
---
 .../main/java/org/apache/avro/io/JsonDecoder.java  |  11 ++-
 .../java/org/apache/avro/TestNestedRecords.java    | 110 +++++++++++++++++++++
 2 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java
index ce9beb7..0a6ef09 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java
@@ -495,14 +495,23 @@ public class JsonDecoder extends ParsingDecoder
         throw error("record-start");
       }
     } else if (top == Symbol.RECORD_END || top == Symbol.UNION_END) {
-      if (in.getCurrentToken() == JsonToken.END_OBJECT) {
+      //AVRO-2034 advance to the end of our object
+      while(in.getCurrentToken() != JsonToken.END_OBJECT){
         in.nextToken();
+      }
+
+      if (in.getCurrentToken() == JsonToken.END_OBJECT) {
+
         if (top == Symbol.RECORD_END) {
           if (currentReorderBuffer != null && !currentReorderBuffer.savedFields.isEmpty()) {
             throw error("Unknown fields: " + currentReorderBuffer.savedFields.keySet());
           }
           currentReorderBuffer = reorderBuffers.pop();
         }
+
+        //AVRO-2034 advance beyond the end object for the next record.
+        in.nextToken();
+
       } else {
         throw error(top == Symbol.RECORD_END ? "record-end" : "union-end");
       }
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java b/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java
new file mode 100644
index 0000000..8900b1e
--- /dev/null
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java
@@ -0,0 +1,110 @@
+package org.apache.avro;
+
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.JsonDecoder;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+/**
+ * This test demonstrates the fix for a complex nested schema type.
+ */
+public class TestNestedRecords {
+
+
+  @Test
+  public void testSingleSubRecord() throws IOException {
+
+    final Schema child = SchemaBuilder.record("Child")
+            .namespace("org.apache.avro.nested")
+            .fields()
+            .requiredString("childField").endRecord();
+
+
+    final Schema parent = SchemaBuilder.record("Parent")
+            .namespace("org.apache.avro.nested")
+            .fields()
+            .requiredString("parentField1")
+            .name("child1").type(child).noDefault()
+            .requiredString("parentField2").endRecord();
+
+
+
+    final String inputAsExpected = "{\n" +
+            " \"parentField1\": \"parentValue1\",\n" +
+            " \"child1\":{\n" +
+            "    \"childField\":\"childValue1\"\n" +
+            " },\n" +
+            " \"parentField2\":\"parentValue2\"\n" +
+            "}";
+
+
+    final ByteArrayInputStream inputStream = new ByteArrayInputStream(inputAsExpected.getBytes());
+
+    final JsonDecoder decoder = DecoderFactory.get().jsonDecoder(parent, inputStream);
+    final DatumReader<Object> reader = new GenericDatumReader<Object>(parent);
+
+    final GenericData.Record  decoded = (GenericData.Record) reader.read(null, decoder);
+
+
+    assertThat(decoded.get("parentField1").toString(), equalTo("parentValue1"));
+    assertThat(decoded.get("parentField2").toString(), equalTo("parentValue2"));
+
+    assertThat(((GenericData.Record)decoded.get("child1")).get("childField").toString(), equalTo("childValue1"));
+
+  }
+
+
+
+  @Test
+  public void testSingleSubRecordExtraField() throws IOException {
+
+    final Schema child = SchemaBuilder.record("Child")
+            .namespace("org.apache.avro.nested")
+            .fields()
+            .requiredString("childField").endRecord();
+
+
+    final Schema parent = SchemaBuilder.record("Parent")
+            .namespace("org.apache.avro.nested")
+            .fields()
+            .requiredString("parentField1")
+            .name("child1").type(child).noDefault()
+            .requiredString("parentField2").endRecord();
+
+
+    final String inputAsExpected = "{\n" +
+            " \"parentField1\": \"parentValue1\",\n" +
+            " \"child1\":{\n" +
+            "    \"childField\":\"childValue1\",\n" +
+
+            //this field should be safely ignored
+            "    \"extraField\":\"extraValue\"\n" +
+            " },\n" +
+            " \"parentField2\":\"parentValue2\"\n" +
+            "}";
+
+
+    final ByteArrayInputStream inputStream = new ByteArrayInputStream(inputAsExpected.getBytes());
+
+    final JsonDecoder decoder = DecoderFactory.get().jsonDecoder(parent, inputStream);
+    final DatumReader<Object> reader = new GenericDatumReader<Object>(parent);
+
+    final GenericData.Record decoded = (GenericData.Record) reader.read(null, decoder);
+
+    assertThat(decoded.get("parentField1").toString(), equalTo("parentValue1"));
+    assertThat(decoded.get("parentField2").toString(), equalTo("parentValue2"));
+
+    assertThat(((GenericData.Record)decoded.get("child1")).get("childField").toString(), equalTo("childValue1"));
+
+
+  }
+
+}