You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/07/24 19:40:23 UTC
[03/11] hive git commit: HIVE-20164 : Murmur Hash : Make sure CTAS
and IAS use correct bucketing version (Deepak Jaiswal, reviewed by Jason Dere)
HIVE-20164 : Murmur Hash : Make sure CTAS and IAS use correct bucketing version (Deepak Jaiswal, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87b9f647
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87b9f647
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87b9f647
Branch: refs/heads/master-txnstats
Commit: 87b9f647b0478b587019e8c94454d33dae970e19
Parents: ed4fa73
Author: Deepak Jaiswal <dj...@apache.org>
Authored: Mon Jul 23 20:38:40 2018 -0700
Committer: Deepak Jaiswal <dj...@apache.org>
Committed: Mon Jul 23 20:38:40 2018 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/parse/TezCompiler.java | 42 ++
.../apache/hadoop/hive/ql/plan/TableDesc.java | 3 +
.../clientpositive/murmur_hash_migration.q | 61 ++
.../llap/murmur_hash_migration.q.out | 618 +++++++++++++++++++
5 files changed, 725 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/87b9f647/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 654185d..b5ae390 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -588,6 +588,7 @@ minillaplocal.query.files=\
mrr.q,\
multiMapJoin1.q,\
multiMapJoin2.q,\
+ murmur_hash_migration.q,\
non_native_window_udf.q,\
optimize_join_ptp.q,\
orc_analyze.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/87b9f647/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 1661aec..c3eb886 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -164,6 +164,9 @@ public class TezCompiler extends TaskCompiler {
runStatsAnnotation(procCtx);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Setup stats in the operator plan");
+ // Update bucketing version of ReduceSinkOp if needed
+ updateBucketingVersionForUpgrade(procCtx);
+
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
// run the optimizations that use stats for optimization
runStatsDependentOptimizations(procCtx, inputs, outputs);
@@ -1690,4 +1693,43 @@ public class TezCompiler extends TaskCompiler {
}
}
}
+
+ private void updateBucketingVersionForUpgrade(OptimizeTezProcContext procCtx) {
+ // Fetch all the FileSinkOperators.
+ Set<FileSinkOperator> fsOpsAll = new HashSet<>();
+ for (TableScanOperator ts : procCtx.parseContext.getTopOps().values()) {
+ Set<FileSinkOperator> fsOps = OperatorUtils.findOperators(
+ ts, FileSinkOperator.class);
+ fsOpsAll.addAll(fsOps);
+ }
+
+
+ for (FileSinkOperator fsOp : fsOpsAll) {
+ Operator<?> parentOfFS = fsOp.getParentOperators().get(0);
+ if (parentOfFS instanceof GroupByOperator) {
+ GroupByOperator gbyOp = (GroupByOperator) parentOfFS;
+ List<String> aggs = gbyOp.getConf().getAggregatorStrings();
+ boolean compute_stats = false;
+ for (String agg : aggs) {
+ if (agg.equalsIgnoreCase("compute_stats")) {
+ compute_stats = true;
+ break;
+ }
+ }
+ if (compute_stats) {
+ continue;
+ }
+ }
+
+ // Not compute_stats
+ Set<ReduceSinkOperator> rsOps = OperatorUtils.findOperatorsUpstream(parentOfFS, ReduceSinkOperator.class);
+ if (rsOps.isEmpty()) {
+ continue;
+ }
+ // Skip setting if the bucketing version is not set in FileSinkOp.
+ if (fsOp.getConf().getTableInfo().isSetBucketingVersion()) {
+ rsOps.iterator().next().setBucketingVersion(fsOp.getConf().getTableInfo().getBucketingVersion());
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/87b9f647/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
index bbce940..b73faa5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
@@ -183,6 +183,9 @@ public class TableDesc implements Serializable, Cloneable {
return (properties.getProperty(hive_metastoreConstants.META_TABLE_STORAGE) != null);
}
+ public boolean isSetBucketingVersion() {
+ return properties.getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION) != null;
+ }
public int getBucketingVersion() {
return Utilities.getBucketingVersion(
properties.getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION));
http://git-wip-us.apache.org/repos/asf/hive/blob/87b9f647/ql/src/test/queries/clientpositive/murmur_hash_migration.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/murmur_hash_migration.q b/ql/src/test/queries/clientpositive/murmur_hash_migration.q
new file mode 100644
index 0000000..2b8da9f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/murmur_hash_migration.q
@@ -0,0 +1,61 @@
+--! qt:dataset:src
+set hive.stats.column.autogather=false;
+set hive.strict.checks.bucketing=false;
+
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=30000;
+
+CREATE TABLE srcbucket_mapjoin_n18_stage(key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1');
+CREATE TABLE srcbucket_mapjoin_part_n20_stage (key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1');
+
+CREATE TABLE srcbucket_mapjoin_n18(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1');
+CREATE TABLE srcbucket_mapjoin_part_n20 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1');
+
+load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08');
+load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08');
+load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08');
+load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08');
+load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08');
+
+set hive.optimize.bucketingsorting=false;
+
+
+insert overwrite table srcbucket_mapjoin_n18 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_n18_stage limit 150;
+
+insert overwrite table srcbucket_mapjoin_part_n20 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20_stage limit 150;
+
+analyze table srcbucket_mapjoin_n18 compute statistics for columns;
+analyze table srcbucket_mapjoin_part_n20 compute statistics for columns;
+
+
+CREATE TABLE tab_part_n11 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+explain
+insert overwrite table tab_part_n11 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20;
+insert overwrite table tab_part_n11 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20;
+
+CREATE TABLE tab_n10(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+explain
+insert overwrite table tab_n10 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_n18;
+insert overwrite table tab_n10 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_n18;
+
+analyze table tab_part_n11 compute statistics for columns;
+analyze table tab_n10 compute statistics for columns;
+
+explain
+select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value;
+select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value;
+
+explain
+select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value;
+select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value;
http://git-wip-us.apache.org/repos/asf/hive/blob/87b9f647/ql/src/test/results/clientpositive/llap/murmur_hash_migration.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/murmur_hash_migration.q.out b/ql/src/test/results/clientpositive/llap/murmur_hash_migration.q.out
new file mode 100644
index 0000000..be5b5f7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/murmur_hash_migration.q.out
@@ -0,0 +1,618 @@
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_n18_stage(key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcbucket_mapjoin_n18_stage
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_n18_stage(key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcbucket_mapjoin_n18_stage
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n20_stage (key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n20_stage (key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_n18(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcbucket_mapjoin_n18
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_n18(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcbucket_mapjoin_n18
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n20 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n20 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_n18_stage
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_n18_stage
+POSTHOOK: Output: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+PREHOOK: query: insert overwrite table srcbucket_mapjoin_n18 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_n18_stage limit 150
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_n18_stage
+PREHOOK: Input: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08
+PREHOOK: Output: default@srcbucket_mapjoin_n18@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcbucket_mapjoin_n18 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_n18_stage limit 150
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_n18_stage
+POSTHOOK: Input: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08
+POSTHOOK: Output: default@srcbucket_mapjoin_n18@ds=2008-04-08
+POSTHOOK: Lineage: srcbucket_mapjoin_n18 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_n18_stage)srcbucket_mapjoin_n18_stage.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: srcbucket_mapjoin_n18 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_n18_stage)srcbucket_mapjoin_n18_stage.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table srcbucket_mapjoin_part_n20 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20_stage limit 150
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_n20_stage
+PREHOOK: Input: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcbucket_mapjoin_part_n20 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20_stage limit 150
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_n20_stage
+POSTHOOK: Input: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+POSTHOOK: Lineage: srcbucket_mapjoin_part_n20 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part_n20_stage)srcbucket_mapjoin_part_n20_stage.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: srcbucket_mapjoin_part_n20 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part_n20_stage)srcbucket_mapjoin_part_n20_stage.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: analyze table srcbucket_mapjoin_n18 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@srcbucket_mapjoin_n18
+PREHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08
+PREHOOK: Output: default@srcbucket_mapjoin_n18
+PREHOOK: Output: default@srcbucket_mapjoin_n18@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcbucket_mapjoin_n18 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@srcbucket_mapjoin_n18
+POSTHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08
+POSTHOOK: Output: default@srcbucket_mapjoin_n18
+POSTHOOK: Output: default@srcbucket_mapjoin_n18@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcbucket_mapjoin_part_n20 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@srcbucket_mapjoin_part_n20
+PREHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20
+PREHOOK: Output: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcbucket_mapjoin_part_n20 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@srcbucket_mapjoin_part_n20
+POSTHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20
+POSTHOOK: Output: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: CREATE TABLE tab_part_n11 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_part_n11
+POSTHOOK: query: CREATE TABLE tab_part_n11 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_part_n11
+PREHOOK: query: explain
+insert overwrite table tab_part_n11 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table tab_part_n11 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: srcbucket_mapjoin_part_n20
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tab_part_n11
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2008-04-08
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tab_part_n11
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table tab_part_n11 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_n20
+PREHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+PREHOOK: Output: default@tab_part_n11@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_part_n11 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_part_n20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_n20
+POSTHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+POSTHOOK: Output: default@tab_part_n11@ds=2008-04-08
+POSTHOOK: Lineage: tab_part_n11 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part_n20)srcbucket_mapjoin_part_n20.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tab_part_n11 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part_n20)srcbucket_mapjoin_part_n20.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE tab_n10(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_n10
+POSTHOOK: query: CREATE TABLE tab_n10(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_n10
+PREHOOK: query: explain
+insert overwrite table tab_n10 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_n18
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table tab_n10 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_n18
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: srcbucket_mapjoin_n18
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tab_n10
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2008-04-08
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tab_n10
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table tab_n10 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_n18
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_n18
+PREHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08
+PREHOOK: Output: default@tab_n10@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_n10 partition (ds='2008-04-08')
+ select key,value from srcbucket_mapjoin_n18
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_n18
+POSTHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08
+POSTHOOK: Output: default@tab_n10@ds=2008-04-08
+POSTHOOK: Lineage: tab_n10 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_n18)srcbucket_mapjoin_n18.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tab_n10 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_n18)srcbucket_mapjoin_n18.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: analyze table tab_part_n11 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@tab_part_n11
+PREHOOK: Input: default@tab_part_n11@ds=2008-04-08
+PREHOOK: Output: default@tab_part_n11
+PREHOOK: Output: default@tab_part_n11@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab_part_n11 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@tab_part_n11
+POSTHOOK: Input: default@tab_part_n11@ds=2008-04-08
+POSTHOOK: Output: default@tab_part_n11
+POSTHOOK: Output: default@tab_part_n11@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: analyze table tab_n10 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@tab_n10
+PREHOOK: Input: default@tab_n10@ds=2008-04-08
+PREHOOK: Output: default@tab_n10
+PREHOOK: Output: default@tab_n10@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab_n10 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@tab_n10
+POSTHOOK: Input: default@tab_n10@ds=2008-04-08
+POSTHOOK: Output: default@tab_n10
+POSTHOOK: Output: default@tab_n10@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ sort order: ++++
+ Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_n18
+PREHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08
+PREHOOK: Input: default@srcbucket_mapjoin_part_n20
+PREHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_n18
+POSTHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08
+POSTHOOK: Input: default@srcbucket_mapjoin_part_n20
+POSTHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08
+#### A masked pattern was here ####
+82 val_82 82 val_82
+86 val_86 86 val_86
+145 val_145 145 val_145
+152 val_152 152 val_152
+152 val_152 152 val_152
+219 val_219 219 val_219
+219 val_219 219 val_219
+255 val_255 255 val_255
+255 val_255 255 val_255
+273 val_273 273 val_273
+273 val_273 273 val_273
+273 val_273 273 val_273
+277 val_277 277 val_277
+277 val_277 277 val_277
+277 val_277 277 val_277
+277 val_277 277 val_277
+369 val_369 369 val_369
+369 val_369 369 val_369
+369 val_369 369 val_369
+406 val_406 406 val_406
+406 val_406 406 val_406
+406 val_406 406 val_406
+406 val_406 406 val_406
+417 val_417 417 val_417
+417 val_417 417 val_417
+417 val_417 417 val_417
+446 val_446 446 val_446
+PREHOOK: query: explain
+select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ sort order: ++++
+ Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab_n10
+PREHOOK: Input: default@tab_n10@ds=2008-04-08
+PREHOOK: Input: default@tab_part_n11
+PREHOOK: Input: default@tab_part_n11@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab_n10
+POSTHOOK: Input: default@tab_n10@ds=2008-04-08
+POSTHOOK: Input: default@tab_part_n11
+POSTHOOK: Input: default@tab_part_n11@ds=2008-04-08
+#### A masked pattern was here ####
+82 val_82 82 val_82
+86 val_86 86 val_86
+145 val_145 145 val_145
+152 val_152 152 val_152
+152 val_152 152 val_152
+219 val_219 219 val_219
+219 val_219 219 val_219
+255 val_255 255 val_255
+255 val_255 255 val_255
+273 val_273 273 val_273
+273 val_273 273 val_273
+273 val_273 273 val_273
+277 val_277 277 val_277
+277 val_277 277 val_277
+277 val_277 277 val_277
+277 val_277 277 val_277
+369 val_369 369 val_369
+369 val_369 369 val_369
+369 val_369 369 val_369
+406 val_406 406 val_406
+406 val_406 406 val_406
+406 val_406 406 val_406
+406 val_406 406 val_406
+417 val_417 417 val_417
+417 val_417 417 val_417
+417 val_417 417 val_417
+446 val_446 446 val_446