You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2016/12/24 17:36:41 UTC
hive git commit: HIVE-15499: Nested column pruning: don't prune paths
when a SerDe is used only for serializing (Chao Sun, reviewed by Ferdinand Xu)
Repository: hive
Updated Branches:
refs/heads/master ab0f9cab3 -> ac68aed6e
HIVE-15499: Nested column pruning: don't prune paths when a SerDe is used only for serializing (Chao Sun, reviewed by Ferdinand Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ac68aed6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ac68aed6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ac68aed6
Branch: refs/heads/master
Commit: ac68aed6e1e7d253e589132ba8ac493b396c3408
Parents: ab0f9ca
Author: Chao Sun <su...@apache.org>
Authored: Thu Dec 22 11:29:40 2016 -0800
Committer: Chao Sun <su...@apache.org>
Committed: Sat Dec 24 09:35:47 2016 -0800
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FileSinkOperator.java | 16 +++++++-
.../clientpositive/nested_column_pruning.q | 12 ++++++
.../clientpositive/nested_column_pruning.q.out | 41 ++++++++++++++++++++
3 files changed, 68 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 28d4789..3bbe92d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.SkewedColumnPositionPair;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.stats.StatsCollectionContext;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.Serializer;
@@ -355,7 +356,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
parent = Utilities.toTempPath(conf.getDirName());
statsFromRecordWriter = new boolean[numFiles];
serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
- serializer.initialize(hconf, conf.getTableInfo().getProperties());
+ serializer.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties());
outputClass = serializer.getSerializedClass();
if (isLogInfoEnabled) {
@@ -1288,4 +1289,17 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
}
return new String[] {fspKey, null};
}
+
+ /**
+ * Check if nested column paths is set for 'conf'.
+ * If set, create a copy of 'conf' with this property unset.
+ */
+ private Configuration unsetNestedColumnPaths(Configuration conf) {
+ if (conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR) != null) {
+ Configuration confCopy = new Configuration(conf);
+ confCopy.unset(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
+ return confCopy;
+ }
+ return conf;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/test/queries/clientpositive/nested_column_pruning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/nested_column_pruning.q b/ql/src/test/queries/clientpositive/nested_column_pruning.q
index 28b974e..b08b356 100644
--- a/ql/src/test/queries/clientpositive/nested_column_pruning.q
+++ b/ql/src/test/queries/clientpositive/nested_column_pruning.q
@@ -1,4 +1,5 @@
set hive.fetch.task.conversion = none;
+set hive.exec.dynamic.partition.mode = nonstrict;
-- First, create source tables
DROP TABLE IF EXISTS dummy;
@@ -110,3 +111,14 @@ SELECT t1.s1.f3.f5, t2.s2.f8
FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2
ON t1.s1.f3.f4 = t2.s1.f6
WHERE t2.s2.f8.f9 == TRUE;
+
+-- Testing insert with aliases
+
+DROP TABLE IF EXISTS nested_tbl_3;
+CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET;
+
+INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3)
+SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3
+FROM nested_tbl_1;
+
+SELECT * FROM nested_tbl_3;
http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/test/results/clientpositive/nested_column_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/nested_column_pruning.q.out b/ql/src/test/results/clientpositive/nested_column_pruning.q.out
index c501c6a..8d32df5 100644
--- a/ql/src/test/results/clientpositive/nested_column_pruning.q.out
+++ b/ql/src/test/results/clientpositive/nested_column_pruning.q.out
@@ -1091,3 +1091,44 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@nested_tbl_1
#### A masked pattern was here ####
5.0 {"f9":true,"f10":[10,11],"f11":{"key1":true,"key2":false}}
+PREHOOK: query: -- Testing insert with aliases
+
+DROP TABLE IF EXISTS nested_tbl_3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Testing insert with aliases
+
+DROP TABLE IF EXISTS nested_tbl_3
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_tbl_3
+POSTHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_tbl_3
+PREHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3)
+SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3
+FROM nested_tbl_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+PREHOOK: Output: default@nested_tbl_3
+POSTHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3)
+SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3
+FROM nested_tbl_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+POSTHOOK: Output: default@nested_tbl_3@f3=4
+POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f1 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>, comment:null), ]
+POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f2 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>, comment:null), ]
+PREHOOK: query: SELECT * FROM nested_tbl_3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_3
+PREHOOK: Input: default@nested_tbl_3@f3=4
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM nested_tbl_3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_3
+POSTHOOK: Input: default@nested_tbl_3@f3=4
+#### A masked pattern was here ####
+false foo 4