You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/07/02 02:08:06 UTC
[18/20] hive git commit: HIVE-19995 : Aggregate row traffic for acid
tables (Zoltan Haindrich via Ashutosh Chauhan)
HIVE-19995 : Aggregate row traffic for acid tables (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/08eba3e1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/08eba3e1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/08eba3e1
Branch: refs/heads/master-txnstats
Commit: 08eba3e1e81e6afb3d866da5ecef69b1a7f91c6c
Parents: 1c33fea
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Sun Jul 1 09:58:59 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sun Jul 1 09:58:59 2018 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/stats/BasicStatsTask.java | 10 +-
.../queries/clientpositive/sqlmerge_stats.q | 38 ++
.../results/clientpositive/acid_nullscan.q.out | 8 +-
.../clientpositive/acid_table_stats.q.out | 16 +-
.../clientpositive/autoColumnStats_4.q.out | 4 +
.../clientpositive/druid/druidmini_mv.q.out | 50 +-
.../llap/acid_bucket_pruning.q.out | 6 +-
.../llap/acid_vectorization_original.q.out | 14 +-
.../llap/dynpart_sort_optimization_acid.q.out | 144 +++---
.../llap/enforce_constraint_notnull.q.out | 24 +-
.../llap/insert_into_default_keyword.q.out | 100 ++--
.../insert_values_orig_table_use_metadata.q.out | 24 +-
.../materialized_view_create_rewrite_3.q.out | 40 +-
.../materialized_view_create_rewrite_4.q.out | 32 +-
.../materialized_view_create_rewrite_5.q.out | 50 +-
...ized_view_create_rewrite_rebuild_dummy.q.out | 40 +-
.../llap/results_cache_invalidation.q.out | 74 +--
.../llap/results_cache_transactional.q.out | 38 +-
.../clientpositive/llap/sqlmerge_stats.q.out | 511 +++++++++++++++++++
.../test/results/clientpositive/row__id.q.out | 18 +-
.../tez/acid_vectorization_original_tez.q.out | 14 +-
.../clientpositive/tez/explainanalyze_5.q.out | 8 +-
23 files changed, 918 insertions(+), 346 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 3dd6580..35fad2c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -683,6 +683,7 @@ minillaplocal.query.files=\
smb_cache.q,\
special_character_in_tabnames_1.q,\
sqlmerge.q,\
+ sqlmerge_stats.q,\
stats_based_fetch_decision.q,\
stats_only_external.q,\
strict_managed_tables_sysdb.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
index 8c23887..f31c170 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
@@ -169,7 +169,7 @@ public class BasicStatsTask implements Serializable, IStatsProcessor {
// though we are marking stats as not being accurate.
if (StatsSetupConst.areBasicStatsUptoDate(parameters) || p.isTransactionalTable()) {
String prefix = getAggregationPrefix(p.getTable(), p.getPartition());
- updateStats(statsAggregator, parameters, prefix, p.isAcid());
+ updateStats(statsAggregator, parameters, prefix);
}
}
@@ -206,14 +206,8 @@ public class BasicStatsTask implements Serializable, IStatsProcessor {
}
private void updateStats(StatsAggregator statsAggregator, Map<String, String> parameters,
- String aggKey, boolean isFullAcid) throws HiveException {
+ String aggKey) throws HiveException {
for (String statType : StatsSetupConst.statsRequireCompute) {
- if (isFullAcid && !work.isTargetRewritten()) {
- // Don't bother with aggregation in this case, it will probably be invalid.
- parameters.remove(statType);
- continue;
- }
-
String value = statsAggregator.aggregateStats(aggKey, statType);
if (value != null && !value.isEmpty()) {
long longValue = Long.parseLong(value);
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/queries/clientpositive/sqlmerge_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/sqlmerge_stats.q b/ql/src/test/queries/clientpositive/sqlmerge_stats.q
new file mode 100644
index 0000000..c480eb6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sqlmerge_stats.q
@@ -0,0 +1,38 @@
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.explain.user=false;
+set hive.merge.cardinality.check=true;
+
+create table t(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+
+desc formatted t;
+
+insert into t values (1,1);
+insert into upd_t values (1,1),(2,2);
+
+desc formatted t;
+
+explain merge into t as t using upd_t as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b);
+
+merge into t as t using upd_t as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b);
+
+-- merge could keep track of inserts
+select assert_true(count(1) = 2) from t group by a>-1;
+-- rownum is 2
+desc formatted t;
+
+merge into t as t using upd_t as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b);
+
+
+select assert_true(count(1) = 0) from t group by a>-1;
+-- rownum is 0; because the orc writer can keep track of delta
+desc formatted t;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/acid_nullscan.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out
index c9684dd..19fcc8c 100644
--- a/ql/src/test/results/clientpositive/acid_nullscan.q.out
+++ b/ql/src/test/results/clientpositive/acid_nullscan.q.out
@@ -42,12 +42,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: acid_vectorized_n1
- Statistics: Num rows: 90 Data size: 25960 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 25960 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2360 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(a)
mode: hash
@@ -79,6 +79,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.acid_vectorized_n1
numFiles 3
+ numRows 11
+ rawDataSize 0
serialization.ddl struct acid_vectorized_n1 { i32 a, string b}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe
@@ -101,6 +103,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.acid_vectorized_n1
numFiles 3
+ numRows 11
+ rawDataSize 0
serialization.ddl struct acid_vectorized_n1 { i32 a, string b}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/acid_table_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out
index 084d232..841a5a4 100644
--- a/ql/src/test/results/clientpositive/acid_table_stats.q.out
+++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out
@@ -94,6 +94,8 @@ Table: acid
#### A masked pattern was here ####
Partition Parameters:
numFiles 2
+ numRows 2000
+ rawDataSize 0
totalSize 4063
#### A masked pattern was here ####
@@ -133,17 +135,17 @@ STAGE PLANS:
TableScan
alias: acid
filterExpr: (ds = '2008-04-08') (type: boolean)
- Statistics: Num rows: 83 Data size: 40630 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 2000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 83 Data size: 40630 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 2000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Execution mode: vectorized
Reduce Operator Tree:
@@ -151,10 +153,10 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -387,6 +389,8 @@ Table: acid
#### A masked pattern was here ####
Partition Parameters:
numFiles 4
+ numRows 3000
+ rawDataSize 208000
totalSize 8118
#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/autoColumnStats_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out
index a16ec07..42c7b43 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out
@@ -201,6 +201,8 @@ Table Type: MANAGED_TABLE
Table Parameters:
bucketing_version 2
numFiles 2
+ numRows 10
+ rawDataSize 0
totalSize 1899
transactional true
transactional_properties default
@@ -244,6 +246,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE {}
bucketing_version 2
numFiles 4
+ numRows 8
+ rawDataSize 0
totalSize 3275
transactional true
transactional_properties default
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
index e5e1ea9..2e44e14 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
@@ -341,34 +341,34 @@ STAGE PLANS:
TableScan
alias: cmv_basetable_n2
filterExpr: (a = 3) (type: boolean)
- Statistics: Num rows: 31 Data size: 372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (a = 3) (type: boolean)
- Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: double)
Map 3
Map Operator Tree:
TableScan
alias: cmv_basetable_n2
filterExpr: ((d = 3) and (a = 3)) (type: boolean)
- Statistics: Num rows: 31 Data size: 496 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((a = 3) and (d = 3)) (type: boolean)
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: double)
Reducer 2
Reduce Operator Tree:
@@ -379,14 +379,14 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1
- Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 49 Data size: 1421 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 49 Data size: 1421 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 49 Data size: 1421 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -460,34 +460,34 @@ STAGE PLANS:
TableScan
alias: cmv_basetable_n2
filterExpr: (a = 3) (type: boolean)
- Statistics: Num rows: 31 Data size: 22692 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (a = 3) (type: boolean)
- Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: CAST( t AS timestamp with local time zone) (type: timestamp with local time zone), 3 (type: int), b (type: varchar(256)), c (type: double), userid (type: varchar(256))
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), _col4 (type: varchar(256)), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, __time_granularity
- Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: __time_granularity (type: timestamp)
sort order: +
Map-reduce partition columns: __time_granularity (type: timestamp)
- Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), _col4 (type: varchar(256))
Reducer 2
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: int), VALUE._col2 (type: varchar(256)), VALUE._col3 (type: double), VALUE._col4 (type: varchar(256)), KEY.__time_granularity (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, __time_granularity
- Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Dp Sort State: PARTITION_SORTED
- Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
@@ -552,17 +552,17 @@ STAGE PLANS:
TableScan
alias: cmv_basetable_n2
filterExpr: ((a = 3) and (d = 3)) (type: boolean)
- Statistics: Num rows: 31 Data size: 496 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((a = 3) and (d = 3)) (type: boolean)
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c (type: double)
outputColumnNames: _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: double)
Map 3
Map Operator Tree:
@@ -587,14 +587,14 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col6
- Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 21 Data size: 609 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: double), _col0 (type: int), _col6 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 21 Data size: 609 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 21 Data size: 609 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
index b856b99..29a05ae 100644
--- a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
alias: acidtbldefault
filterExpr: (a = 1) (type: boolean)
buckets included: [13,] of 16
- Statistics: Num rows: 1850 Data size: 7036 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9174 Data size: 34868 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
@@ -99,6 +99,8 @@ STAGE PLANS:
location hdfs://### HDFS PATH ###
name default.acidtbldefault
numFiles 17
+ numRows 9174
+ rawDataSize 0
serialization.ddl struct acidtbldefault { i32 a}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -122,6 +124,8 @@ STAGE PLANS:
location hdfs://### HDFS PATH ###
name default.acidtbldefault
numFiles 17
+ numRows 9174
+ rawDataSize 0
serialization.ddl struct acidtbldefault { i32 a}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
index 028c3ca..57ff575 100644
--- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
@@ -665,22 +665,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: over10k_orc_bucketed
- Statistics: Num rows: 1234 Data size: 706090 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: ROW__ID
- Statistics: Num rows: 1234 Data size: 706090 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 617 Data size: 51828 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- Statistics: Num rows: 617 Data size: 51828 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -692,13 +692,13 @@ STAGE PLANS:
keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 617 Data size: 51828 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (_col1 > 1L) (type: boolean)
- Statistics: Num rows: 205 Data size: 17220 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 349 Data size: 29316 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 205 Data size: 17220 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 349 Data size: 29316 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
index 33bceed..7905441 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
@@ -95,19 +95,19 @@ STAGE PLANS:
TableScan
alias: acid_part
filterExpr: ((key = 'foo') and (ds = '2008-04-08')) (type: boolean)
- Statistics: Num rows: 160 Data size: 61001 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
- Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -116,10 +116,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -191,19 +191,19 @@ STAGE PLANS:
TableScan
alias: acid_part
filterExpr: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean)
- Statistics: Num rows: 159 Data size: 104317 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 1601 Data size: 444998 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1355 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), ds (type: string)
outputColumnNames: _col0, _col3
- Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col3 (type: string)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -213,10 +213,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), VALUE._col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -383,19 +383,19 @@ STAGE PLANS:
TableScan
alias: acid_part_sdpo
filterExpr: ((key = 'foo') and (ds = '2008-04-08')) (type: boolean)
- Statistics: Num rows: 176 Data size: 67063 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
- Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -404,10 +404,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -479,19 +479,19 @@ STAGE PLANS:
TableScan
alias: acid_part_sdpo
filterExpr: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean)
- Statistics: Num rows: 171 Data size: 112152 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 1601 Data size: 444998 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1355 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), ds (type: string)
outputColumnNames: _col0, _col3
- Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +++
Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -500,11 +500,11 @@ STAGE PLANS:
Select Operator
expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number'
- Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -680,19 +680,19 @@ STAGE PLANS:
TableScan
alias: acid_2l_part
filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr = 11)) (type: boolean)
- Statistics: Num rows: 157 Data size: 60527 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -701,10 +701,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -777,19 +777,19 @@ STAGE PLANS:
TableScan
alias: acid_2l_part
filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr >= 11)) (type: boolean)
- Statistics: Num rows: 1804 Data size: 235871 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 3201 Data size: 313458 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), hr (type: int)
outputColumnNames: _col0, _col4
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col4 (type: int)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -799,10 +799,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -904,19 +904,19 @@ STAGE PLANS:
TableScan
alias: acid_2l_part
filterExpr: (value = 'bar') (type: boolean)
- Statistics: Num rows: 2015 Data size: 726272 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 4200 Data size: 1253037 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: (value = 'bar') (type: boolean)
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), ds (type: string), hr (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string), _col2 (type: int)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -926,10 +926,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: string), VALUE._col1 (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1103,19 +1103,19 @@ STAGE PLANS:
TableScan
alias: acid_2l_part_sdpo
filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr = 11)) (type: boolean)
- Statistics: Num rows: 157 Data size: 60527 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -1124,10 +1124,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1200,19 +1200,19 @@ STAGE PLANS:
TableScan
alias: acid_2l_part_sdpo
filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr >= 11)) (type: boolean)
- Statistics: Num rows: 1804 Data size: 235871 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 3201 Data size: 313458 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), hr (type: int)
outputColumnNames: _col0, _col4
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: ++++
Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int)
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -1221,11 +1221,11 @@ STAGE PLANS:
Select Operator
expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1327,19 +1327,19 @@ STAGE PLANS:
TableScan
alias: acid_2l_part_sdpo
filterExpr: (value = 'bar') (type: boolean)
- Statistics: Num rows: 2015 Data size: 726272 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 4952 Data size: 1456618 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: (value = 'bar') (type: boolean)
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), ds (type: string), hr (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col1 (type: string), _col2 (type: int), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: ++++
Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -1348,11 +1348,11 @@ STAGE PLANS:
Select Operator
expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), KEY._col1 (type: string), KEY._col2 (type: int), KEY.'_bucket_number' (type: string)
outputColumnNames: _col0, _col1, _col2, '_bucket_number'
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1810 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1810 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1526,19 +1526,19 @@ STAGE PLANS:
TableScan
alias: acid_2l_part_sdpo_no_cp
filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr = 11)) (type: boolean)
- Statistics: Num rows: 97 Data size: 82922 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 1601 Data size: 599036 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), key (type: string), ds (type: string), hr (type: int)
outputColumnNames: _col0, _col1, _col3, _col4
- Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: ++++
Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
- Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string), 'bar' (type: string)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -1548,11 +1548,11 @@ STAGE PLANS:
Select Operator
expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
- Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1625,19 +1625,19 @@ STAGE PLANS:
TableScan
alias: acid_2l_part_sdpo_no_cp
filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr >= 11)) (type: boolean)
- Statistics: Num rows: 1725 Data size: 705510 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 3201 Data size: 1197516 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: (key = 'foo') (type: boolean)
- Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), key (type: string), ds (type: string), hr (type: int)
outputColumnNames: _col0, _col1, _col3, _col4
- Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: ++++
Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
- Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string), 'bar' (type: string)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -1647,11 +1647,11 @@ STAGE PLANS:
Select Operator
expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
- Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
index 8a5a326..5a3f519 100644
--- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
+++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
@@ -3237,19 +3237,19 @@ STAGE PLANS:
TableScan
alias: acid_uami_n1
filterExpr: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean)
- Statistics: Num rows: 281 Data size: 87904 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1002 Data size: 312584 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean)
- Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), i (type: int), vc (type: varchar(128))
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col3 (type: varchar(128))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
@@ -3259,10 +3259,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128))
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -3331,19 +3331,19 @@ STAGE PLANS:
TableScan
alias: acid_uami_n1
filterExpr: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean)
- Statistics: Num rows: 320 Data size: 100040 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1002 Data size: 312584 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean)
- Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), i (type: int), vc (type: varchar(128))
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col3 (type: varchar(128))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
@@ -3353,10 +3353,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128))
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat