You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ja...@apache.org on 2015/09/14 07:28:54 UTC

[07/15] drill git commit: DRILL-3773: Fix Mongo FieldSelection

DRILL-3773: Fix Mongo FieldSelection

Mongo plugin was previously rewriting a complex (multi-level) column reference as a simple selection of the top level field.

This changeset does not change this behavior in terms of the filter sent to mongo, but it add the original selected column to the list that will be read in by the JSON reader once that data is returned from mongo.

What this means is that we will be requesting more data from mongo that necessary (as we were previously), but this will be leveraging the existing functionality in the JSON reader to grab only the sub-selection actually requested in the query. This allows for difficult schema changes to be avoided by projecting only columns without schema changes.

This also fixes and adds unit tests for FieldSelection that cause wrong results when selecting a nested column and its parent.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/97615e56
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/97615e56
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/97615e56

Branch: refs/heads/master
Commit: 97615e5675c1b25a4a9b5f96e6e1be7ed4f96c9c
Parents: 197d972
Author: Jason Altekruse <al...@gmail.com>
Authored: Wed Sep 9 11:26:09 2015 -0700
Committer: Jacques Nadeau <ja...@apache.org>
Committed: Sun Sep 13 18:36:35 2015 -0700

----------------------------------------------------------------------
 .../exec/store/mongo/MongoRecordReader.java     |  2 +-
 .../exec/store/mongo/MongoTestConstants.java    |  2 +
 .../drill/exec/store/mongo/MongoTestSuit.java   |  2 +
 .../store/mongo/TestMongoProjectPushDown.java   | 43 ++++++++++++++++++++
 .../resources/schema_change_int_to_string.json  | 31 ++++++++++++++
 .../exec/vector/complex/fn/FieldSelection.java  |  2 +-
 .../java/org/apache/drill/DrillTestWrapper.java | 20 ++++++++-
 .../vector/complex/writer/TestJsonReader.java   | 31 ++++++++++++++
 8 files changed, 129 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/97615e56/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
----------------------------------------------------------------------
diff --git a/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java b/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
index 0ac519f..c8b0699 100644
--- a/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
+++ b/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
@@ -98,7 +98,7 @@ public class MongoRecordReader extends AbstractRecordReader {
     if (!isStarQuery()) {
       for (SchemaPath column : projectedColumns ) {
         String fieldName = column.getRootSegment().getPath();
-        transformed.add(SchemaPath.getSimplePath(fieldName));
+        transformed.add(column);
         this.fields.put(fieldName, Integer.valueOf(1));
       }
     } else {

http://git-wip-us.apache.org/repos/asf/drill/blob/97615e56/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestConstants.java
----------------------------------------------------------------------
diff --git a/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestConstants.java b/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestConstants.java
index d6d83d1..d050961 100644
--- a/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestConstants.java
+++ b/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestConstants.java
@@ -39,9 +39,11 @@ public interface MongoTestConstants {
 
   public static final String DONUTS_COLLECTION = "donuts";
   public static final String EMPINFO_COLLECTION = "empinfo";
+  public static final String SCHEMA_CHANGE_COLLECTION = "schema_change";
 
   public static final String DONUTS_DATA = "donuts.json";
   public static final String EMP_DATA = "emp.json";
+  public static final String SCHEMA_CHANGE_DATA = "schema_change_int_to_string.json";
 
   public static final String REPLICA_SET_1_NAME = "shard_1_replicas";
   public static final String REPLICA_SET_2_NAME = "shard_2_replicas";

http://git-wip-us.apache.org/repos/asf/drill/blob/97615e56/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestSuit.java
----------------------------------------------------------------------
diff --git a/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestSuit.java b/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestSuit.java
index c52df63..284e7d8 100644
--- a/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestSuit.java
+++ b/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/MongoTestSuit.java
@@ -186,6 +186,7 @@ public class MongoTestSuit implements MongoTestConstants {
       mongod = mongodExecutable.start();
       mongoClient = new MongoClient(new ServerAddress(LOCALHOST, MONGOS_PORT));
       createDbAndCollections(EMPLOYEE_DB, EMPINFO_COLLECTION, "employee_id");
+      createDbAndCollections(EMPLOYEE_DB, SCHEMA_CHANGE_COLLECTION, "field_2");
     }
 
     private static void cleanup() {
@@ -209,6 +210,7 @@ public class MongoTestSuit implements MongoTestConstants {
         SingleMode.setup();
       }
       TestTableGenerator.importData(EMPLOYEE_DB, EMPINFO_COLLECTION, EMP_DATA);
+      TestTableGenerator.importData(EMPLOYEE_DB, SCHEMA_CHANGE_COLLECTION, SCHEMA_CHANGE_DATA);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/97615e56/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java
----------------------------------------------------------------------
diff --git a/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java b/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java
index 54ace3f..b17cf2f 100644
--- a/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java
+++ b/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java
@@ -17,10 +17,53 @@
  */
 package org.apache.drill.exec.store.mongo;
 
+import org.apache.drill.exec.ExecConstants;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
+import java.io.IOException;
+
+import static org.apache.drill.TestBuilder.listOf;
+import static org.apache.drill.TestBuilder.mapOf;
+
 public class TestMongoProjectPushDown extends MongoTestBase {
 
+  /**
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testComplexProjectPushdown() throws Exception {
+
+    try {
+      testBuilder()
+          .sqlQuery("select t.field_4.inner_3 as col_1, t.field_4 as col_2 from mongo.employee.schema_change t")
+          .unOrdered()
+          .optionSettingQueriesForTestQuery(String.format("alter session set `%s` = true", ExecConstants.MONGO_READER_READ_NUMBERS_AS_DOUBLE))
+              .baselineColumns("col_1", "col_2")
+              .baselineValues(
+                  mapOf(),
+                  mapOf(
+                      "inner_1", listOf(),
+                      "inner_3", mapOf()))
+              .baselineValues(
+                  mapOf("inner_object_field_1", 2.0),
+                  mapOf(
+                      "inner_1", listOf(1.0, 2.0, 3.0),
+                      "inner_2", 3.0,
+                      "inner_3", mapOf("inner_object_field_1", 2.0)))
+              .baselineValues(
+                  mapOf(),
+                  mapOf(
+                      "inner_1", listOf(4.0, 5.0, 6.0),
+                      "inner_2", 3.0,
+                      "inner_3", mapOf()))
+              .go();
+    } finally {
+      test(String.format("alter session set `%s` = false", ExecConstants.MONGO_READER_READ_NUMBERS_AS_DOUBLE));
+    }
+  }
+
   @Test
   public void testSingleColumnProject() throws Exception {
     String query = String.format(TEST_QUERY_PROJECT_PUSH_DOWN_TEMPLATE_1,

http://git-wip-us.apache.org/repos/asf/drill/blob/97615e56/contrib/storage-mongo/src/test/resources/schema_change_int_to_string.json
----------------------------------------------------------------------
diff --git a/contrib/storage-mongo/src/test/resources/schema_change_int_to_string.json b/contrib/storage-mongo/src/test/resources/schema_change_int_to_string.json
new file mode 100644
index 0000000..d6ab4c9
--- /dev/null
+++ b/contrib/storage-mongo/src/test/resources/schema_change_int_to_string.json
@@ -0,0 +1,31 @@
+[{
+  "field_1": [1]
+},
+{
+  "field_1": [5],
+  "field_2": 2,
+  "field_3": {
+  "inner_1" : 2
+},
+  "field_4" : {
+  "inner_1" : [1,2,3],
+  "inner_2" : 3,
+  "inner_3" :  { "inner_object_field_1" : 2}
+},
+  "field_5" : [ { "inner_list" : [1, null, 6] }, { "inner_list":[3,8]}, { "inner_list":[12, null, 4, "null", 5]} ]
+},
+{
+  "field_1": [5,10,15],
+  "field_2": "A wild string appears!",
+  "field_3": {
+    "inner_1" : 5,
+    "inner_2" : 3,
+    "inner_3" : [ { "inner_object_field_1" : null}, {"inner_object_field_1" : 10} ]
+  },
+  "field_4" : {
+    "inner_1" : [4,5,6],
+    "inner_2" : 3
+  },
+  "field_5" : [ { "inner_list" : [5, null, 6.0, "1234"] }, { "inner_list":[7,8.0, "12341324"], "inner_list_2" : [1,2,2323.443e10, "hello there"]}, { "inner_list":[3,4,5], "inner_list_2" : [10, 11, 12]} ]
+}
+]

http://git-wip-us.apache.org/repos/asf/drill/blob/97615e56/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/FieldSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/FieldSelection.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/FieldSelection.java
index aecff05..1857479 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/FieldSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/FieldSelection.java
@@ -70,7 +70,7 @@ class FieldSelection {
       for(Entry<String, FieldSelection> e : children.entrySet()){
         newMap.put(e.getKey(), e.getValue().fixNodes());
       }
-      return new FieldSelection(newMap, ValidityMode.CHECK_CHILDREN);
+      return new FieldSelection(newMap, mode);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/97615e56/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
index ff75274..59cbb15 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
@@ -397,7 +397,7 @@ public class DrillTestWrapper {
     if (expectedNumBatches != EXPECTED_BATCH_COUNT_NOT_SET) {
       final int actualNumBatches = results.size();
       assertEquals(String.format("Expected %d batches but query returned %d non empty batch(es)%n", expectedNumBatches,
-        actualNumBatches), expectedNumBatches, actualNumBatches);
+          actualNumBatches), expectedNumBatches, actualNumBatches);
     }
   }
 
@@ -560,7 +560,23 @@ public class DrillTestWrapper {
         break;
       }
       if (!found) {
-        throw new Exception(String.format("After matching %d records, did not find expected record in result set: %s", counter, printRecord(expectedRecord)));
+        StringBuilder sb = new StringBuilder();
+        for (int expectedRecordDisplayCount = 0;
+             expectedRecordDisplayCount < 10 && expectedRecordDisplayCount < expectedRecords.size();
+             expectedRecordDisplayCount++) {
+          sb.append(printRecord(expectedRecords.get(expectedRecordDisplayCount)));
+        }
+        String expectedRecordExamples = sb.toString();
+        sb.setLength(0);
+        for (int actualRecordDisplayCount = 0;
+             actualRecordDisplayCount < 10 && actualRecordDisplayCount < actualRecords.size();
+             actualRecordDisplayCount++) {
+          sb.append(printRecord(actualRecords.get(actualRecordDisplayCount)));
+        }
+        String actualRecordExamples = sb.toString();
+        throw new Exception(String.format("After matching %d records, did not find expected record in result set: %s\n\n" +
+            "Some examples of expected records:%s\n\n Some examples of records returned by the test query:%s",
+            counter, printRecord(expectedRecord), expectedRecordExamples, actualRecordExamples));
       } else {
         actualRecords.remove(i);
         counter++;

http://git-wip-us.apache.org/repos/asf/drill/blob/97615e56/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
index 7d6c71c..bd9cea1 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
@@ -63,6 +63,37 @@ public class TestJsonReader extends BaseTestQuery {
   }
 
   @Test
+  public void testFieldSelectionBug() throws Exception {
+    try {
+      testBuilder()
+          .sqlQuery("select t.field_4.inner_3 as col_1, t.field_4 as col_2 from cp.`store/json/schema_change_int_to_string.json` t")
+          .unOrdered()
+          .optionSettingQueriesForTestQuery("alter session set `store.json.all_text_mode` = true")
+          .baselineColumns("col_1", "col_2")
+          .baselineValues(
+              mapOf(),
+              mapOf(
+                  "inner_1", listOf(),
+                  "inner_3", mapOf()))
+          .baselineValues(
+              mapOf("inner_object_field_1", "2"),
+              mapOf(
+                  "inner_1", listOf("1", "2", "3"),
+                  "inner_2", "3",
+                  "inner_3", mapOf("inner_object_field_1", "2")))
+          .baselineValues(
+              mapOf(),
+              mapOf(
+                  "inner_1", listOf("4", "5", "6"),
+                  "inner_2", "3",
+                  "inner_3", mapOf()))
+          .go();
+    } finally {
+      test("alter session set `store.json.all_text_mode` = false");
+    }
+  }
+
+  @Test
   public void testSplitAndTransferFailure() throws Exception {
     final String testVal = "a string";
     testBuilder()