You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2016/12/24 17:36:41 UTC

hive git commit: HIVE-15499: Nested column pruning: don't prune paths when a SerDe is used only for serializing (Chao Sun, reviewed by Ferdinand Xu)

Repository: hive
Updated Branches:
  refs/heads/master ab0f9cab3 -> ac68aed6e


HIVE-15499: Nested column pruning: don't prune paths when a SerDe is used only for serializing (Chao Sun, reviewed by Ferdinand Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ac68aed6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ac68aed6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ac68aed6

Branch: refs/heads/master
Commit: ac68aed6e1e7d253e589132ba8ac493b396c3408
Parents: ab0f9ca
Author: Chao Sun <su...@apache.org>
Authored: Thu Dec 22 11:29:40 2016 -0800
Committer: Chao Sun <su...@apache.org>
Committed: Sat Dec 24 09:35:47 2016 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FileSinkOperator.java   | 16 +++++++-
 .../clientpositive/nested_column_pruning.q      | 12 ++++++
 .../clientpositive/nested_column_pruning.q.out  | 41 ++++++++++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 28d4789..3bbe92d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.SkewedColumnPositionPair;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.ql.stats.StatsCollectionContext;
 import org.apache.hadoop.hive.ql.stats.StatsPublisher;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeStats;
 import org.apache.hadoop.hive.serde2.Serializer;
@@ -355,7 +356,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
       parent = Utilities.toTempPath(conf.getDirName());
       statsFromRecordWriter = new boolean[numFiles];
       serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
-      serializer.initialize(hconf, conf.getTableInfo().getProperties());
+      serializer.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties());
       outputClass = serializer.getSerializedClass();
 
       if (isLogInfoEnabled) {
@@ -1288,4 +1289,17 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
     }
     return new String[] {fspKey, null};
   }
+
+  /**
+   * Check if nested column paths is set for 'conf'.
+   * If set, create a copy of 'conf' with this property unset.
+   */
+  private Configuration unsetNestedColumnPaths(Configuration conf) {
+    if (conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR) != null) {
+      Configuration confCopy = new Configuration(conf);
+      confCopy.unset(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
+      return confCopy;
+    }
+    return conf;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/test/queries/clientpositive/nested_column_pruning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/nested_column_pruning.q b/ql/src/test/queries/clientpositive/nested_column_pruning.q
index 28b974e..b08b356 100644
--- a/ql/src/test/queries/clientpositive/nested_column_pruning.q
+++ b/ql/src/test/queries/clientpositive/nested_column_pruning.q
@@ -1,4 +1,5 @@
 set hive.fetch.task.conversion = none;
+set hive.exec.dynamic.partition.mode = nonstrict;
 
 -- First, create source tables
 DROP TABLE IF EXISTS dummy;
@@ -110,3 +111,14 @@ SELECT t1.s1.f3.f5, t2.s2.f8
 FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2
 ON t1.s1.f3.f4 = t2.s1.f6
 WHERE t2.s2.f8.f9 == TRUE;
+
+-- Testing insert with aliases
+
+DROP TABLE IF EXISTS nested_tbl_3;
+CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET;
+
+INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3)
+SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3
+FROM nested_tbl_1;
+
+SELECT * FROM nested_tbl_3;

http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/test/results/clientpositive/nested_column_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/nested_column_pruning.q.out b/ql/src/test/results/clientpositive/nested_column_pruning.q.out
index c501c6a..8d32df5 100644
--- a/ql/src/test/results/clientpositive/nested_column_pruning.q.out
+++ b/ql/src/test/results/clientpositive/nested_column_pruning.q.out
@@ -1091,3 +1091,44 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@nested_tbl_1
 #### A masked pattern was here ####
 5.0	{"f9":true,"f10":[10,11],"f11":{"key1":true,"key2":false}}
+PREHOOK: query: -- Testing insert with aliases
+
+DROP TABLE IF EXISTS nested_tbl_3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Testing insert with aliases
+
+DROP TABLE IF EXISTS nested_tbl_3
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_tbl_3
+POSTHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_tbl_3
+PREHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3)
+SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3
+FROM nested_tbl_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+PREHOOK: Output: default@nested_tbl_3
+POSTHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3)
+SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3
+FROM nested_tbl_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+POSTHOOK: Output: default@nested_tbl_3@f3=4
+POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f1 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>, comment:null), ]
+POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f2 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>, comment:null), ]
+PREHOOK: query: SELECT * FROM nested_tbl_3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_3
+PREHOOK: Input: default@nested_tbl_3@f3=4
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM nested_tbl_3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_3
+POSTHOOK: Input: default@nested_tbl_3@f3=4
+#### A masked pattern was here ####
+false	foo	4