You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2016/06/24 22:11:48 UTC

drill git commit: DRILL-4574: Avro Plugin: Flatten does not work correctly on record items

Repository: drill
Updated Branches:
  refs/heads/master 11602456b -> 1c9e92b0c


DRILL-4574: Avro Plugin: Flatten does not work correctly on record items

* When applied to arrays of record, the flatten function always returned the last item only in the array when querying avro records
* The problem was that the RepeatedMapWriter's start() and end() methods wer only called once and not for every array item
* This commit adds tests that show the expected behaviour for both non-primitive and primitive arrays

close apache/drill#459


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/1c9e92b0
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/1c9e92b0
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/1c9e92b0

Branch: refs/heads/master
Commit: 1c9e92b0cec18b4ee5a005fd6006ad329e3fa568
Parents: 1160245
Author: baunz <jo...@gmail.com>
Authored: Sun Apr 3 09:43:40 2016 +0200
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Jun 24 15:07:25 2016 -0700

----------------------------------------------------------------------
 .../drill/exec/store/avro/AvroRecordReader.java |  5 +-
 .../drill/exec/store/avro/AvroFormatTest.java   | 65 ++++++++++++++++++++
 .../drill/exec/store/avro/AvroTestUtil.java     | 40 ++++++++++++
 3 files changed, 108 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
index 89e220c..bbc9b04 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
@@ -195,6 +195,7 @@ public class AvroRecordReader extends AbstractRecordReader {
           }
 
           process(((GenericRecord) value).get(field.name()), field.schema(), field.name(), _writer, fieldSelection.getChild(field.name()));
+
         }
         break;
       case ARRAY:
@@ -207,11 +208,11 @@ public class AvroRecordReader extends AbstractRecordReader {
         } else {
           writer = (MapOrListWriterImpl) writer.list(fieldName);
         }
-        writer.start();
         for (final Object o : array) {
+          writer.start();
           process(o, elementSchema, fieldName, writer, fieldSelection.getChild(fieldName));
+          writer.end();
         }
-        writer.end();
         break;
       case UNION:
         // currently supporting only nullable union (optional fields) like ["null", "some-type"].

http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
index af4d0e6..f804e88 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
@@ -326,6 +326,71 @@ public class AvroFormatTest extends BaseTestQuery {
     test(sql);
   }
 
+  /**
+   *  See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a>
+   *
+   */
+  @Test
+  public void testFlattenPrimitiveArray() throws Exception {
+    final String file = AvroTestUtil.generateSimpleArraySchema_NoNullValues().getFilePath();
+
+    final String sql = "select a_string, flatten(c_string_array) as array_item "
+        + "from dfs_test.`" + file + "` t";
+
+    TestBuilder testBuilder = testBuilder().sqlQuery(sql).unOrdered()
+        .baselineColumns("a_string", "array_item");
+
+    for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) {
+
+      for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) {
+        testBuilder.baselineValues("a_" + i, "c_string_array_" + i + "_" + j);
+      }
+    }
+
+
+    testBuilder.go();
+
+  }
+
+  private TestBuilder nestedArrayQueryTestBuilder(String file) {
+
+    final String sql = "select rec_nr, array_item['nested_1_int'] as array_item_nested_int from "
+        + "(select a_int as rec_nr, flatten(t.b_array) as array_item " + "from dfs_test.`" + file + "` t) a";
+
+    TestBuilder testBuilder = testBuilder().sqlQuery(sql).unOrdered().baselineColumns("rec_nr",
+        "array_item_nested_int");
+
+    return testBuilder;
+
+  }
+
+
+  /**
+   * See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a>
+   */
+  @Test
+  public void testFlattenComplexArray() throws Exception {
+    final String file = AvroTestUtil.generateNestedArraySchema().getFilePath();
+
+    TestBuilder testBuilder = nestedArrayQueryTestBuilder(file);
+    for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) {
+      for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) {
+        testBuilder.baselineValues(i, j);
+      }
+    }
+    testBuilder.go();
+
+  }
+  /**
+   * See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a>
+   */
+  @Test
+  public void testFlattenEmptyComplexArrayMustYieldNoResults() throws Exception {
+    final String file = AvroTestUtil.generateNestedArraySchema(AvroTestUtil.RECORD_COUNT, 0).getFilePath();
+    TestBuilder testBuilder = nestedArrayQueryTestBuilder(file);
+    testBuilder.expectsEmptyResultSet();
+  }
+
   @Test
   public void testNestedUnionArraySchema_withNullValues() throws Exception {
 

http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
index 96508d8..86d29ae 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -449,6 +450,45 @@ public class AvroTestUtil {
     return record;
   }
 
+  public static AvroTestRecordWriter generateNestedArraySchema() throws IOException {
+    return generateNestedArraySchema(RECORD_COUNT, ARRAY_SIZE);
+  }
+
+  public static AvroTestRecordWriter generateNestedArraySchema(int numRecords, int numArrayItems) throws IOException {
+
+    final File file = File.createTempFile("avro-nested-test", ".avro");
+    file.deleteOnExit();
+
+    final Schema schema = SchemaBuilder.record("AvroRecordReaderTest").namespace("org.apache.drill.exec.store.avro")
+        .fields().name("a_int").type().intType().noDefault().name("b_array").type().array().items()
+        .record("my_record_1").namespace("foo.blah.org").fields().name("nested_1_int").type().optional().intType()
+        .endRecord().arrayDefault(Collections.emptyList()).endRecord();
+
+    final Schema arraySchema = schema.getField("b_array").schema();
+    final Schema itemSchema = arraySchema.getElementType();
+
+    final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file);
+    try {
+      for (int i = 0; i < numRecords; i++) {
+        record.startRecord();
+        record.put("a_int", i);
+        GenericArray<GenericRecord> array = new GenericData.Array<>(ARRAY_SIZE, arraySchema);
+
+        for (int j = 0; j < numArrayItems; j++) {
+          final GenericRecord nestedRecord = new GenericData.Record(itemSchema);
+          nestedRecord.put("nested_1_int", j);
+          array.add(nestedRecord);
+        }
+        record.put("b_array", array);
+        record.endRecord();
+      }
+    } finally {
+      record.close();
+    }
+
+    return record;
+  }
+
   public static AvroTestRecordWriter generateMapSchema_withNullValues() throws Exception {
 
     final File file = File.createTempFile("avro-nested-test", ".avro");