You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pa...@apache.org on 2016/06/07 17:08:43 UTC

drill git commit: DRILL-4694: CTAS in JSON format produces extraneous NULL fields Changed behavior of JSON CTAS to skip fields if the value is null. Added an option "store.json.writer.skip_null_fields" to enable old behavior.

Repository: drill
Updated Branches:
  refs/heads/master d1ac6fbae -> 6286c0a4b


DRILL-4694: CTAS in JSON format produces extraneous NULL fields Changed behavior of JSON CTAS to skip fields if the value is null. Added an option "store.json.writer.skip_null_fields" to enable old behavior.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/6286c0a4
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/6286c0a4
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/6286c0a4

Branch: refs/heads/master
Commit: 6286c0a4b8e39524fe00d623152d1d38db15774f
Parents: d1ac6fb
Author: Parth Chandra <pa...@apache.org>
Authored: Wed Jun 1 17:19:03 2016 -0700
Committer: Parth Chandra <pa...@apache.org>
Committed: Tue Jun 7 10:06:53 2016 -0700

----------------------------------------------------------------------
 .../templates/JsonOutputRecordWriter.java       |  13 ++
 .../org/apache/drill/exec/ExecConstants.java    |   1 +
 .../server/options/SystemOptionManager.java     |   1 +
 .../exec/store/easy/json/JSONFormatPlugin.java  |   1 +
 .../exec/store/easy/json/JsonRecordWriter.java  |   1 +
 .../java/org/apache/drill/TestCTASJson.java     | 150 +++++++++++++++++++
 .../test/resources/json/ctas_alltypes_map.json  |  28 ++++
 .../resources/json/ctas_alltypes_map_out.json   |  41 +++++
 .../json/ctas_alltypes_repeated_map.json        |  28 ++++
 .../json/ctas_alltypes_repeated_map_out.json    |  41 +++++
 10 files changed, 305 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java b/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java
index b6b0e9c..651f5a8 100644
--- a/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java
+++ b/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java
@@ -43,6 +43,7 @@ import java.util.List;
 public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implements RecordWriter {
 
   protected JsonOutput gen;
+  protected boolean skipNullFields = true;
 
 <#list vv.types as type>
   <#list type.minor as minor>
@@ -61,7 +62,13 @@ public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implem
 
     @Override
     public void startField() throws IOException {
+      <#if mode.prefix == "Nullable" >
+      if (!skipNullFields || this.reader.isSet()) {
+        gen.writeFieldName(fieldName);
+      }
+      <#else>
       gen.writeFieldName(fieldName);
+      </#if>
     }
 
     @Override
@@ -120,7 +127,13 @@ public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implem
   <#elseif mode.prefix == "Repeated" >
     gen.write${typeName}(i, reader);
   <#else>
+    <#if mode.prefix == "Nullable" >
+    if (!skipNullFields || this.reader.isSet()) {
+      gen.write${typeName}(reader);
+    }
+    <#else>
     gen.write${typeName}(reader);
+    </#if>
   </#if>
 
   <#if mode.prefix == "Repeated">

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 17fbb7b..6eb8a3a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -134,6 +134,7 @@ public interface ExecConstants {
   BooleanValidator JSON_READER_ALL_TEXT_MODE_VALIDATOR = new BooleanValidator(JSON_ALL_TEXT_MODE, false);
   BooleanValidator JSON_EXTENDED_TYPES = new BooleanValidator("store.json.extended_types", false);
   BooleanValidator JSON_WRITER_UGLIFY = new BooleanValidator("store.json.writer.uglify", false);
+  BooleanValidator JSON_WRITER_SKIPNULLFIELDS = new BooleanValidator("store.json.writer.skip_null_fields", true);
 
   DoubleValidator TEXT_ESTIMATED_ROW_SIZE = new RangeDoubleValidator(
       "store.text.estimated_row_size_bytes", 1, Long.MAX_VALUE, 100.0);

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index c35ed0e..579276e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -99,6 +99,7 @@ public class SystemOptionManager extends BaseOptionManager implements AutoClosea
       ExecConstants.TEXT_ESTIMATED_ROW_SIZE,
       ExecConstants.JSON_EXTENDED_TYPES,
       ExecConstants.JSON_WRITER_UGLIFY,
+      ExecConstants.JSON_WRITER_SKIPNULLFIELDS,
       ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR,
       ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL_VALIDATOR,
       ExecConstants.MONGO_READER_ALL_TEXT_MODE_VALIDATOR,

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java
index 3f093be..30c248e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java
@@ -81,6 +81,7 @@ public class JSONFormatPlugin extends EasyFormatPlugin<JSONFormatConfig> {
     options.put("extension", "json");
     options.put("extended", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_EXTENDED_TYPES)));
     options.put("uglify", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_UGLIFY)));
+    options.put("skipnulls", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_SKIPNULLFIELDS)));
 
     RecordWriter recordWriter = new JsonRecordWriter();
     recordWriter.init(options);

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java
index 848da77..f27e04c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java
@@ -72,6 +72,7 @@ public class JsonRecordWriter extends JSONOutputRecordWriter implements RecordWr
     this.fieldDelimiter = writerOptions.get("separator");
     this.extension = writerOptions.get("extension");
     this.useExtendedOutput = Boolean.parseBoolean(writerOptions.get("extended"));
+    this.skipNullFields = Boolean.parseBoolean(writerOptions.get("skipnulls"));
     final boolean uglify = Boolean.parseBoolean(writerOptions.get("uglify"));
 
     Configuration conf = new Configuration();

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java b/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java
new file mode 100644
index 0000000..c76892d
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill;
+
+
+import org.apache.drill.common.util.TestTools;
+import org.apache.drill.exec.ExecConstants;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestCTASJson extends PlanTestBase {
+  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestCTASJson.class);
+
+  static final String WORKING_PATH = TestTools.getWorkingPath();
+  static final String TEST_RES_PATH = WORKING_PATH + "/src/test/resources";
+
+  @Test
+  /**
+   * Test a source json file that contains records that are maps with fields of all types.
+   * Some records have missing fields. CTAS should skip the missing fields
+   */
+  public void testctas_alltypes_map() throws Exception {
+    String testName = "ctas_alltypes_map";
+    test("use dfs_test.tmp");
+    test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`");
+
+    final String query = "select * from `" + testName + "_json` t1 ";
+
+    try {
+      testBuilder()
+          .sqlQuery(query)
+          .ordered()
+          .jsonBaselineFile("json/" + testName + ".json")
+          .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ")
+          .optionSettingQueriesForTestQuery("alter session set store.json.writer.skip_null_fields = true") // DEFAULT
+          .build()
+          .run();
+    } finally {
+      test("drop table " + testName + "_json");
+      test("alter session reset store.format ");
+      test("alter session reset store.json.writer.skip_null_fields ");
+    }
+  }
+
+  @Test
+  /**
+   * Test a source json file that contains records that are maps with fields of all types.
+   * Some records have missing fields. CTAS should NOT skip the missing fields
+   */
+  public void testctas_alltypes_map_noskip() throws Exception {
+    String testName = "ctas_alltypes_map";
+    test("use dfs_test.tmp");
+    test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`");
+
+    final String query = "select * from `" + testName + "_json` t1 ";
+
+    try {
+      testBuilder()
+          .sqlQuery(query)
+          .ordered()
+          .jsonBaselineFile("json/" + testName + "_out.json")
+          .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ")
+          .optionSettingQueriesForTestQuery("alter session set store.json.writer.skip_null_fields = false") // change from DEFAULT
+          .build()
+          .run();
+    } finally{
+      test("drop table " + testName + "_json" );
+      test("alter session reset store.format ");
+      test("alter session reset store.json.writer.skip_null_fields ");
+    }
+
+  }
+
+  @Test
+  /**
+   * Test a source json file that contains records that are maps with fields of all types.
+   * Some records have missing fields. CTAS should skip the missing fields
+   */
+  public void testctas_alltypes_repeatedmap() throws Exception {
+    String testName = "ctas_alltypes_repeated_map";
+    test("use dfs_test.tmp");
+    test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`");
+
+    final String query = "select * from `" + testName + "_json` t1 ";
+
+    try {
+      testBuilder()
+          .sqlQuery(query)
+          .ordered()
+          .jsonBaselineFile("json/" + testName + ".json")
+          .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ")
+          .optionSettingQueriesForTestQuery(
+              "alter session set store.json.writer.skip_null_fields = true") // DEFAULT
+          .build()
+          .run();
+    }finally{
+      test("drop table " + testName + "_json" );
+      test("alter session reset store.format ");
+      test("alter session reset store.json.writer.skip_null_fields ");
+    }
+
+  }
+
+  @Test
+  /**
+   * Test a source json file that contains records that are maps with fields of all types.
+   * Some records have missing fields. CTAS should NOT skip the missing fields
+   */
+  public void testctas_alltypes_repeated_map_noskip() throws Exception {
+    String testName = "ctas_alltypes_repeated_map";
+    test("use dfs_test.tmp");
+    test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`");
+
+    final String query = "select * from `" + testName + "_json` t1 ";
+
+    try {
+      testBuilder()
+          .sqlQuery(query)
+          .ordered()
+          .jsonBaselineFile("json/" + testName + "_out.json")
+          .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ")
+          .optionSettingQueriesForTestQuery(
+              "alter session set store.json.writer.skip_null_fields = false") // change from DEFAULT
+          .build()
+          .run();
+    } finally {
+      test("drop table " + testName + "_json" );
+      test("alter session reset store.format ");
+      test("alter session reset store.json.writer.skip_null_fields ");
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json
new file mode 100644
index 0000000..ca7bda0
--- /dev/null
+++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json
@@ -0,0 +1,28 @@
+{
+  "X" :  {
+    "key1" : "value1",
+    "key2" : 1,
+    "key3" : 10.1,
+    "key4" : true,
+    "key5" : 1000000000,
+    "key6" : 1000000000.1
+  }
+}
+{
+  "X" :  {
+    "key1" : "value2",
+    "key2" : 2
+  }
+}
+{
+  "X" :  {
+    "key3" : 10.3,
+    "key4" : false
+  }
+}
+{
+  "X" :  {
+    "key5" : 1000000005,
+    "key6" : 1000000000.6
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json
new file mode 100644
index 0000000..51eb90d
--- /dev/null
+++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json
@@ -0,0 +1,41 @@
+{
+  "X" :  {
+    "key1" : "value1",
+    "key2" : 1,
+    "key3" : 10.1,
+    "key4" : true,
+    "key5" : 1000000000,
+    "key6" : 1000000000.1
+  }
+}
+{
+  "X" :  {
+    "key1" : "value2",
+    "key2" : 2,
+    "key3" : null,
+    "key4" : null,
+    "key5" : null,
+    "key6" : null
+  }
+}
+{
+  "X" :  {
+    "key1" : null,
+    "key2" : null,
+    "key3" : 10.3,
+    "key4" : false,
+    "key5" : null,
+    "key6" : null
+  }
+}
+{
+  "X" :  {
+    "key1" : null,
+    "key2" : null,
+    "key3" : null,
+    "key4" : null,
+    "key5" : 1000000005,
+    "key6" : 1000000000.6
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json
new file mode 100644
index 0000000..d19e8c5
--- /dev/null
+++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json
@@ -0,0 +1,28 @@
+{
+  "X" : [ {
+    "key1" : "value1",
+    "key2" : 1,
+    "key3" : 10.1,
+    "key4" : true,
+    "key5" : 1000000000,
+    "key6" : 1000000000.1
+  } ]
+}
+{
+  "X" : [ {
+    "key1" : "value2",
+    "key2" : 2
+  } ]
+}
+{
+  "X" : [ {
+    "key3" : 10.3,
+    "key4" : false
+  } ]
+}
+{
+  "X" : [ {
+    "key5" : 1000000005,
+    "key6" : 1000000000.6
+  } ]
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/6286c0a4/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json
new file mode 100644
index 0000000..dc3aabf
--- /dev/null
+++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json
@@ -0,0 +1,41 @@
+{
+  "X" : [ {
+    "key1" : "value1",
+    "key2" : 1,
+    "key3" : 10.1,
+    "key4" : true,
+    "key5" : 1000000000,
+    "key6" : 1000000000.1
+  } ]
+}
+{
+  "X" : [ {
+    "key1" : "value2",
+    "key2" : 2,
+    "key3" : null,
+    "key4" : null,
+    "key5" : null,
+    "key6" : null
+  } ]
+}
+{
+  "X" : [ {
+    "key1" : null,
+    "key2" : null,
+    "key3" : 10.3,
+    "key4" : false,
+    "key5" : null,
+    "key6" : null
+  } ]
+}
+{
+  "X" : [ {
+    "key1" : null,
+    "key2" : null,
+    "key3" : null,
+    "key4" : null,
+    "key5" : 1000000005,
+    "key6" : 1000000000.6
+  } ]
+}
+