You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ni...@apache.org on 2016/11/01 20:39:37 UTC

avro git commit: AVRO-1944: Fixed invalid toString in records with duplicate values

Repository: avro
Updated Branches:
  refs/heads/master e2d1073bf -> 67bec06db


AVRO-1944: Fixed invalid toString in records with duplicate values


Project: http://git-wip-us.apache.org/repos/asf/avro/repo
Commit: http://git-wip-us.apache.org/repos/asf/avro/commit/67bec06d
Tree: http://git-wip-us.apache.org/repos/asf/avro/tree/67bec06d
Diff: http://git-wip-us.apache.org/repos/asf/avro/diff/67bec06d

Branch: refs/heads/master
Commit: 67bec06dbdc910856fdc33d149aece97c26a59b1
Parents: e2d1073
Author: Niels Basjes <nb...@bol.com>
Authored: Wed Oct 26 15:36:02 2016 +0200
Committer: Niels Basjes <ni...@apache.org>
Committed: Tue Nov 1 21:32:30 2016 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |   2 +-
 .../org/apache/avro/generic/GenericData.java    |  35 +++++--
 .../apache/avro/generic/TestGenericData.java    | 101 ++++++++++++++++++-
 3 files changed, 130 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/avro/blob/67bec06d/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 45b0f61..15c0523 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -76,7 +76,7 @@ Trunk (not yet released)
     AVRO-1853: C++: Compiler::toBin crashes on empty string.
     (Zoltan Ivanfi via tomwhite)
 
-    AVRO-1923: Stop infinite recursion in GenericData.toString. (Niels Basjes)
+    AVRO-1923, AVRO-1944: Stop infinite recursion in GenericData.toString. (Niels Basjes)
 
     AVRO-1860: Java: Field#DefaultVal() returns Ints for Long fields.
     (Gabor Szadovszky via cutting)

http://git-wip-us.apache.org/repos/asf/avro/blob/67bec06d/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
----------------------------------------------------------------------
diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
index 0484c03..ff0893c 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
@@ -504,14 +504,18 @@ public class GenericData {
     toString(datum, buffer, new IdentityHashMap<Object, Object>(128) );
     return buffer.toString();
   }
+
+  private static final String TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT =
+    " \">>> CIRCULAR REFERENCE CANNOT BE PUT IN JSON STRING, ABORTING RECURSION <<<\" ";
+
   /** Renders a Java datum as <a href="http://www.json.org/">JSON</a>. */
   protected void toString(Object datum, StringBuilder buffer, IdentityHashMap<Object, Object> seenObjects) {
-    if (seenObjects.containsKey(datum)) {
-      buffer.append(" \">>> CIRCULAR REFERENCE CANNOT BE PUT IN JSON STRING, ABORTING RECURSION<<<\" ");
-      return;
-    }
-    seenObjects.put(datum, datum);
     if (isRecord(datum)) {
+      if (seenObjects.containsKey(datum)) {
+        buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT);
+        return;
+      }
+      seenObjects.put(datum, datum);
       buffer.append("{");
       int count = 0;
       Schema schema = getRecordSchema(datum);
@@ -523,7 +527,13 @@ public class GenericData {
           buffer.append(", ");
       }
       buffer.append("}");
+      seenObjects.remove(datum);
     } else if (isArray(datum)) {
+      if (seenObjects.containsKey(datum)) {
+        buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT);
+        return;
+      }
+      seenObjects.put(datum, datum);
       Collection<?> array = getArrayAsCollection(datum);
       buffer.append("[");
       long last = array.size()-1;
@@ -534,7 +544,13 @@ public class GenericData {
           buffer.append(", ");
       }
       buffer.append("]");
+      seenObjects.remove(datum);
     } else if (isMap(datum)) {
+      if (seenObjects.containsKey(datum)) {
+        buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT);
+        return;
+      }
+      seenObjects.put(datum, datum);
       buffer.append("{");
       int count = 0;
       @SuppressWarnings(value="unchecked")
@@ -547,6 +563,7 @@ public class GenericData {
           buffer.append(", ");
       }
       buffer.append("}");
+      seenObjects.remove(datum);
     } else if (isString(datum)|| isEnum(datum)) {
       buffer.append("\"");
       writeEscapedString(datum.toString(), buffer);
@@ -564,7 +581,13 @@ public class GenericData {
       buffer.append(datum);
       buffer.append("\"");
     } else if (datum instanceof GenericData) {
-        toString(datum, buffer, seenObjects);
+      if (seenObjects.containsKey(datum)) {
+        buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT);
+        return;
+      }
+      seenObjects.put(datum, datum);
+      toString(datum, buffer, seenObjects);
+      seenObjects.remove(datum);
     } else {
       buffer.append(datum);
     }

http://git-wip-us.apache.org/repos/asf/avro/blob/67bec06d/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java
----------------------------------------------------------------------
diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java
index 09ed66a..fe01ad3 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java
@@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Collection;
 import java.util.ArrayDeque;
@@ -30,6 +31,7 @@ import static org.apache.avro.TestCircularReferences.Referenceable;
 import static org.junit.Assert.*;
 
 import java.util.Arrays;
+import java.util.Map;
 
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
@@ -496,6 +498,104 @@ public class TestGenericData {
     assertTrue(GenericData.get().validate(unionSchema, record));
   }
 
+  /*
+   * The toString has a detection for circular references to abort.
+   * This detection has the risk of detecting that same value as being a circular reference.
+   * For Record, Map and Array this is correct, for the rest is is not.
+   */
+  @Test
+  public void testToStringSameValues() throws IOException {
+    List<Field> fields = new ArrayList<Field>();
+    fields.add(new Field("nullstring1", Schema.create(Type.STRING), null, (Object)null));
+    fields.add(new Field("nullstring2", Schema.create(Type.STRING), null, (Object)null));
+
+    fields.add(new Field("string1", Schema.create(Type.STRING  ), null, (Object)null));
+    fields.add(new Field("string2", Schema.create(Type.STRING  ), null, (Object)null));
+
+    fields.add(new Field("bytes1",  Schema.create(Type.BYTES   ), null, (Object)null));
+    fields.add(new Field("bytes2",  Schema.create(Type.BYTES   ), null, (Object)null));
+
+    fields.add(new Field("int1",    Schema.create(Type.INT     ), null, (Object)null));
+    fields.add(new Field("int2",    Schema.create(Type.INT     ), null, (Object)null));
+
+    fields.add(new Field("long1",   Schema.create(Type.LONG    ), null, (Object)null));
+    fields.add(new Field("long2",   Schema.create(Type.LONG    ), null, (Object)null));
+
+    fields.add(new Field("float1",  Schema.create(Type.FLOAT   ), null, (Object)null));
+    fields.add(new Field("float2",  Schema.create(Type.FLOAT   ), null, (Object)null));
+
+    fields.add(new Field("double1", Schema.create(Type.DOUBLE  ), null, (Object)null));
+    fields.add(new Field("double2", Schema.create(Type.DOUBLE  ), null, (Object)null));
+
+    fields.add(new Field("boolean1",Schema.create(Type.BOOLEAN ), null, (Object)null));
+    fields.add(new Field("boolean2",Schema.create(Type.BOOLEAN ), null, (Object)null));
+
+    List<String> enumValues = new ArrayList<String>();
+    enumValues.add("One");
+    enumValues.add("Two");
+    Schema enumSchema = Schema.createEnum("myEnum", null, null, enumValues);
+    fields.add(new Field("enum1", enumSchema, null, (Object)null));
+    fields.add(new Field("enum2", enumSchema, null, (Object)null));
+
+    Schema recordSchema = SchemaBuilder.record("aRecord").fields().requiredString("myString").endRecord();
+    fields.add(new Field("record1", recordSchema, null, (Object)null));
+    fields.add(new Field("record2", recordSchema, null, (Object)null));
+
+    Schema arraySchema = Schema.createArray(Schema.create(Type.STRING));
+    fields.add(new Field("array1", arraySchema, null, (Object)null));
+    fields.add(new Field("array2", arraySchema, null, (Object)null));
+
+    Schema mapSchema = Schema.createMap(Schema.create(Type.STRING));
+    fields.add(new Field("map1", mapSchema, null, (Object)null));
+    fields.add(new Field("map2", mapSchema, null, (Object)null));
+
+    Schema schema = Schema.createRecord("Foo", "test", "mytest", false);
+    schema.setFields(fields);
+
+    Record testRecord = new Record(schema);
+
+    testRecord.put("nullstring1", null);
+    testRecord.put("nullstring2", null);
+
+    String fortyTwo = "42";
+    testRecord.put("string1",  fortyTwo);
+    testRecord.put("string2",  fortyTwo);
+    testRecord.put("bytes1",   0x42 );
+    testRecord.put("bytes2",   0x42 );
+    testRecord.put("int1",     42 );
+    testRecord.put("int2",     42 );
+    testRecord.put("long1",    42L);
+    testRecord.put("long2",    42L);
+    testRecord.put("float1",   42F);
+    testRecord.put("float2",   42F);
+    testRecord.put("double1",  42D);
+    testRecord.put("double2",  42D);
+    testRecord.put("boolean1", true);
+    testRecord.put("boolean2", true);
+
+    testRecord.put("enum1", "One");
+    testRecord.put("enum2", "One");
+
+    GenericRecord record = new GenericData.Record(recordSchema);
+    record.put("myString","42");
+    testRecord.put("record1",  record);
+    testRecord.put("record2",  record);
+
+    GenericArray<String> array = new GenericData.Array<String>(1, arraySchema);
+    array.clear();
+    array.add("42");
+    testRecord.put("array1",   array);
+    testRecord.put("array2",   array);
+
+    Map<String, String> map = new HashMap<String, String>();
+    map.put("42", "42");
+    testRecord.put("map1",     map);
+    testRecord.put("map2",     map);
+
+    String testString = testRecord.toString();
+    assertFalse("Record with duplicated values results in wrong 'toString()'", testString.contains("CIRCULAR REFERENCE"));
+  }
+
   // Test copied from Apache Parquet: org.apache.parquet.avro.TestCircularReferences
   @Test
   public void testToStringRecursive() throws IOException {
@@ -551,5 +651,4 @@ public class TestGenericData {
       fail("StackOverflowError occurred");
     }
   }
-
 }