You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2016/06/24 22:11:48 UTC
drill git commit: DRILL-4574: Avro Plugin: Flatten does not work
correctly on record items
Repository: drill
Updated Branches:
refs/heads/master 11602456b -> 1c9e92b0c
DRILL-4574: Avro Plugin: Flatten does not work correctly on record items
* When applied to arrays of record, the flatten function always returned the last item only in the array when querying avro records
* The problem was that the RepeatedMapWriter's start() and end() methods wer only called once and not for every array item
* This commit adds tests that show the expected behaviour for both non-primitive and primitive arrays
close apache/drill#459
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/1c9e92b0
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/1c9e92b0
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/1c9e92b0
Branch: refs/heads/master
Commit: 1c9e92b0cec18b4ee5a005fd6006ad329e3fa568
Parents: 1160245
Author: baunz <jo...@gmail.com>
Authored: Sun Apr 3 09:43:40 2016 +0200
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Jun 24 15:07:25 2016 -0700
----------------------------------------------------------------------
.../drill/exec/store/avro/AvroRecordReader.java | 5 +-
.../drill/exec/store/avro/AvroFormatTest.java | 65 ++++++++++++++++++++
.../drill/exec/store/avro/AvroTestUtil.java | 40 ++++++++++++
3 files changed, 108 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
index 89e220c..bbc9b04 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
@@ -195,6 +195,7 @@ public class AvroRecordReader extends AbstractRecordReader {
}
process(((GenericRecord) value).get(field.name()), field.schema(), field.name(), _writer, fieldSelection.getChild(field.name()));
+
}
break;
case ARRAY:
@@ -207,11 +208,11 @@ public class AvroRecordReader extends AbstractRecordReader {
} else {
writer = (MapOrListWriterImpl) writer.list(fieldName);
}
- writer.start();
for (final Object o : array) {
+ writer.start();
process(o, elementSchema, fieldName, writer, fieldSelection.getChild(fieldName));
+ writer.end();
}
- writer.end();
break;
case UNION:
// currently supporting only nullable union (optional fields) like ["null", "some-type"].
http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
index af4d0e6..f804e88 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
@@ -326,6 +326,71 @@ public class AvroFormatTest extends BaseTestQuery {
test(sql);
}
+ /**
+ * See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a>
+ *
+ */
+ @Test
+ public void testFlattenPrimitiveArray() throws Exception {
+ final String file = AvroTestUtil.generateSimpleArraySchema_NoNullValues().getFilePath();
+
+ final String sql = "select a_string, flatten(c_string_array) as array_item "
+ + "from dfs_test.`" + file + "` t";
+
+ TestBuilder testBuilder = testBuilder().sqlQuery(sql).unOrdered()
+ .baselineColumns("a_string", "array_item");
+
+ for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) {
+
+ for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) {
+ testBuilder.baselineValues("a_" + i, "c_string_array_" + i + "_" + j);
+ }
+ }
+
+
+ testBuilder.go();
+
+ }
+
+ private TestBuilder nestedArrayQueryTestBuilder(String file) {
+
+ final String sql = "select rec_nr, array_item['nested_1_int'] as array_item_nested_int from "
+ + "(select a_int as rec_nr, flatten(t.b_array) as array_item " + "from dfs_test.`" + file + "` t) a";
+
+ TestBuilder testBuilder = testBuilder().sqlQuery(sql).unOrdered().baselineColumns("rec_nr",
+ "array_item_nested_int");
+
+ return testBuilder;
+
+ }
+
+
+ /**
+ * See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a>
+ */
+ @Test
+ public void testFlattenComplexArray() throws Exception {
+ final String file = AvroTestUtil.generateNestedArraySchema().getFilePath();
+
+ TestBuilder testBuilder = nestedArrayQueryTestBuilder(file);
+ for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) {
+ for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) {
+ testBuilder.baselineValues(i, j);
+ }
+ }
+ testBuilder.go();
+
+ }
+ /**
+ * See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a>
+ */
+ @Test
+ public void testFlattenEmptyComplexArrayMustYieldNoResults() throws Exception {
+ final String file = AvroTestUtil.generateNestedArraySchema(AvroTestUtil.RECORD_COUNT, 0).getFilePath();
+ TestBuilder testBuilder = nestedArrayQueryTestBuilder(file);
+ testBuilder.expectsEmptyResultSet();
+ }
+
@Test
public void testNestedUnionArraySchema_withNullValues() throws Exception {
http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
index 96508d8..86d29ae 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -449,6 +450,45 @@ public class AvroTestUtil {
return record;
}
+ public static AvroTestRecordWriter generateNestedArraySchema() throws IOException {
+ return generateNestedArraySchema(RECORD_COUNT, ARRAY_SIZE);
+ }
+
+ public static AvroTestRecordWriter generateNestedArraySchema(int numRecords, int numArrayItems) throws IOException {
+
+ final File file = File.createTempFile("avro-nested-test", ".avro");
+ file.deleteOnExit();
+
+ final Schema schema = SchemaBuilder.record("AvroRecordReaderTest").namespace("org.apache.drill.exec.store.avro")
+ .fields().name("a_int").type().intType().noDefault().name("b_array").type().array().items()
+ .record("my_record_1").namespace("foo.blah.org").fields().name("nested_1_int").type().optional().intType()
+ .endRecord().arrayDefault(Collections.emptyList()).endRecord();
+
+ final Schema arraySchema = schema.getField("b_array").schema();
+ final Schema itemSchema = arraySchema.getElementType();
+
+ final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file);
+ try {
+ for (int i = 0; i < numRecords; i++) {
+ record.startRecord();
+ record.put("a_int", i);
+ GenericArray<GenericRecord> array = new GenericData.Array<>(ARRAY_SIZE, arraySchema);
+
+ for (int j = 0; j < numArrayItems; j++) {
+ final GenericRecord nestedRecord = new GenericData.Record(itemSchema);
+ nestedRecord.put("nested_1_int", j);
+ array.add(nestedRecord);
+ }
+ record.put("b_array", array);
+ record.endRecord();
+ }
+ } finally {
+ record.close();
+ }
+
+ return record;
+ }
+
public static AvroTestRecordWriter generateMapSchema_withNullValues() throws Exception {
final File file = File.createTempFile("avro-nested-test", ".avro");