You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2017/07/25 22:42:04 UTC
[01/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 892841a46 -> f8b79fe6d
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
index 19546c3..893aea3 100644
--- a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
+++ b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
@@ -162,18 +162,20 @@ PREHOOK: Input: default@ex_table
POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ex_table
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 0 9 0 6 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 9 0 6 SExM4AYGxdOOGLy91N8BwJKLAcGuwk7AqvwN/4Sz5AE=
+ from deserializer
PREHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ex_table
POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ex_table
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 6 5.0 5 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 6 5.0 5 SExM4AYGwZXdyQGC2MSsAcCIiJQBvtSupwHDnsmSAr36nzs=
+ from deserializer
PREHOOK: query: ALTER TABLE ex_table PARTITION (part='part1') RENAME TO PARTITION (part='part2')
PREHOOK: type: ALTERTABLE_RENAMEPART
PREHOOK: Input: default@ex_table
@@ -310,15 +312,17 @@ PREHOOK: Input: default@ex_table
POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ex_table
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 0 9 0 6 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 9 0 6 SExM4AYGxdOOGLy91N8BwJKLAcGuwk7AqvwN/4Sz5AE=
+ from deserializer
PREHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ex_table
POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ex_table
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 6 5.0 5 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 6 5.0 5 SExM4AYGwZXdyQGC2MSsAcCIiJQBvtSupwHDnsmSAr36nzs=
+ from deserializer
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out
index 16b3a38..ae6fa40 100644
--- a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out
+++ b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out
@@ -57,30 +57,33 @@ PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: statsdb1@testtable1
@@ -96,30 +99,33 @@ PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb2.testtable2 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb2.testtable2 col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: use default
PREHOOK: type: SWITCHDATABASE
PREHOOK: Input: database:default
@@ -203,30 +209,33 @@ PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: statsdb1@testtable1
@@ -242,30 +251,33 @@ PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb2.testtable2 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb2.testtable2 col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: use default
PREHOOK: type: SWITCHDATABASE
PREHOOK: Input: database:default
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out
index b73b5f5..eb2a636 100644
--- a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out
+++ b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out
@@ -36,10 +36,10 @@ PREHOOK: Input: default@dec
POSTHOOK: query: DESC FORMATTED `dec` value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dec
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value decimal(8,4) -12.25 234.79 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value decimal(8,4) -12.25 234.79 0 10 from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}}
PREHOOK: query: DROP TABLE IF EXISTS avro_dec
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE IF EXISTS avro_dec
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
index d476172..26680f8 100644
--- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
@@ -3700,7 +3700,7 @@ STAGE PLANS:
partition key expr: ds
Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
target column name: ds
- target work: Map 1
+ target work: Map 4
Select Operator
expressions: _col0 (type: string)
outputColumnNames: _col0
@@ -3714,7 +3714,7 @@ STAGE PLANS:
partition key expr: ds
Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
target column name: ds
- target work: Map 4
+ target work: Map 1
Reducer 13
Reduce Operator Tree:
Group By Operator
@@ -3743,7 +3743,7 @@ STAGE PLANS:
partition key expr: ds
Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
target column name: ds
- target work: Map 1
+ target work: Map 4
Select Operator
expressions: _col0 (type: string)
outputColumnNames: _col0
@@ -3757,7 +3757,7 @@ STAGE PLANS:
partition key expr: ds
Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
target column name: ds
- target work: Map 4
+ target work: Map 1
Stage: Stage-1
Spark
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/spark/stats_only_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out
index 359eea3..94f955c 100644
--- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out
+++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out
@@ -389,9 +389,9 @@ PREHOOK: Input: default@stats_null_part
POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@stats_null_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a double 1.0 1.0 1 1 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a double 1.0 1.0 1 1 from deserializer
PREHOOK: query: drop table stats_null
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@stats_null
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/stats_only_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out
index 88c2114..de1b017 100644
--- a/ql/src/test/results/clientpositive/stats_only_null.q.out
+++ b/ql/src/test/results/clientpositive/stats_only_null.q.out
@@ -377,9 +377,10 @@ PREHOOK: Input: default@stats_null_part
POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@stats_null_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a double 1.0 1.0 1 1 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a double 1.0 1.0 1 1 SExM4AEBwaDRtwU=
+ from deserializer
PREHOOK: query: drop table stats_null
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@stats_null
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
index ad92058..8d94ac6 100644
--- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
@@ -55,9 +55,9 @@ PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string from deserializer
PREHOOK: query: explain
analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue
PREHOOK: type: QUERY
@@ -242,27 +242,36 @@ PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string 0 55 12.763636363636364 13 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string 0 55 12.763636363636364 13 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M
+wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB
+wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz
+AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA==
+ from deserializer
PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-avgTimeOnSite int 1 9 0 9 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI=
+ from deserializer
PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none adRevenue
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b
+Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK
+wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA
+86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL
+ from deserializer
PREHOOK: query: CREATE TEMPORARY TABLE empty_tab(
a int,
b double,
@@ -289,10 +298,10 @@ PREHOOK: Input: default@empty_tab
POSTHOOK: query: desc formatted empty_tab a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@empty_tab
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a int from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a int from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
PREHOOK: query: explain
analyze table empty_tab compute statistics for columns a,b,c,d,e
PREHOOK: type: QUERY
@@ -358,20 +367,20 @@ PREHOOK: Input: default@empty_tab
POSTHOOK: query: desc formatted empty_tab a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@empty_tab
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a int 0 0 0 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a int 0 0 0 0 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
PREHOOK: query: desc formatted empty_tab b
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@empty_tab
POSTHOOK: query: desc formatted empty_tab b
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@empty_tab
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-b double 0.0 0.0 0 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b double 0.0 0.0 0 0 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
PREHOOK: query: CREATE DATABASE test
PREHOOK: type: CREATEDATABASE
PREHOOK: Output: database:test
@@ -447,27 +456,31 @@ PREHOOK: Input: test@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string from deserializer
PREHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP
PREHOOK: type: DESCTABLE
PREHOOK: Input: test@uservisits_web_text_none
POSTHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string from deserializer
PREHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string 0 55 12.763636363636364 13 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string 0 55 12.763636363636364 13 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M
+wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB
+wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz
+AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA==
+ from deserializer
PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword
PREHOOK: type: QUERY
PREHOOK: Input: test@uservisits_web_text_none
@@ -489,15 +502,23 @@ PREHOOK: Input: test@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none sKeyword
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sKeyword string 0 54 7.872727272727273 19 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sKeyword string 0 54 7.872727272727273 19 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA
++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr
+aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9
+x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA=
+ from deserializer
PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword
PREHOOK: type: DESCTABLE
PREHOOK: Input: test@uservisits_web_text_none
POSTHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sKeyword string 0 54 7.872727272727273 19 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sKeyword string 0 54 7.872727272727273 19 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA
++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr
+aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9
+x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA=
+ from deserializer
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
index 626e1fd..1764164 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
@@ -296,14 +296,14 @@ Stage-3
Reducer 2
File Output Operator [FS_8]
table:{"name:":"default.acid_uami"}
- Select Operator [SEL_4] (rows=8/2 width=302)
+ Select Operator [SEL_4] (rows=4/2 width=302)
Output:["_col0","_col1","_col2","_col3"]
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_3]
PartitionCols:UDFToInteger(_col0)
- Select Operator [SEL_2] (rows=8/2 width=302)
+ Select Operator [SEL_2] (rows=4/2 width=302)
Output:["_col0","_col1","_col3"]
- Filter Operator [FIL_9] (rows=8/2 width=226)
+ Filter Operator [FIL_9] (rows=4/2 width=226)
predicate:((de = 109.23) or (de = 119.23))
TableScan [TS_0] (rows=8/4 width=226)
default@acid_uami,acid_uami, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["i","de","vc"]
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
index 13c19ca..f70d711 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
@@ -41,13 +41,13 @@ Stage-0
Stage-1
Reducer 2 vectorized
File Output Operator [FS_8]
- Select Operator [SEL_7] (rows=10 width=100)
+ Select Operator [SEL_7] (rows=10 width=101)
Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_6]
- Select Operator [SEL_5] (rows=10 width=100)
+ Select Operator [SEL_5] (rows=10 width=101)
Output:["_col0","_col1"]
- TableScan [TS_0] (rows=10 width=100)
+ TableScan [TS_0] (rows=10 width=101)
default@acid_vectorized,acid_vectorized, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
PREHOOK: query: explain select key, value
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/tunable_ndv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tunable_ndv.q.out b/ql/src/test/results/clientpositive/tunable_ndv.q.out
index 437beaf..e08f452 100644
--- a/ql/src/test/results/clientpositive/tunable_ndv.q.out
+++ b/ql/src/test/results/clientpositive/tunable_ndv.q.out
@@ -73,48 +73,53 @@ PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d partition(year=2000) locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 2 0 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 2 0 2 SExM4AICxfO+SPyNofED
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d partition(year=2001) locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d partition(year=2001) locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 4 0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 4 0 4 SExM4AQExfO+SLy7rGKA4vdMwPD8wQI=
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 4 0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 4 0 4 SExM4AICxfO+SPyNofED
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
PREHOOK: query: describe formatted loc_orc_1d locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 4 0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 4 0 4 SExM4AICxfO+SPyNofED
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
PREHOOK: query: describe formatted loc_orc_1d locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 4 0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 4 0 4 SExM4AICxfO+SPyNofED
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
PREHOOK: query: create table if not exists loc_orc_2d (
state string,
locid int
@@ -194,27 +199,30 @@ PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 4 0 3 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 4 0 4 SExM4AEBwYHguQQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
PREHOOK: query: describe formatted loc_orc_2d locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 4 0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 4 0 4 SExM4AEBwYHguQQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
PREHOOK: query: describe formatted loc_orc_2d locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 4 0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 4 0 4 SExM4AEBwYHguQQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}
[02/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out
index 5e64743..20e59a3 100644
--- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out
+++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out
@@ -80,36 +80,40 @@ PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 27 484 0 20 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De
+jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 20 6.8 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type comment
-
-key int from deserializer
+# col_name data_type comment
+
+key int from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type comment
-
-value string from deserializer
+# col_name data_type comment
+
+value string from deserializer
PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns
PREHOOK: type: QUERY
PREHOOK: Input: default@partcolstats
@@ -134,36 +138,40 @@ PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 27 484 0 20 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De
+jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 20 6.8 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type comment
-
-key int from deserializer
+# col_name data_type comment
+
+key int from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type comment
-
-value string from deserializer
+# col_name data_type comment
+
+value string from deserializer
PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns
PREHOOK: type: QUERY
PREHOOK: Input: default@partcolstats
@@ -192,54 +200,60 @@ PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 27 495 0 30 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 27 495 0 30 SExM4B4ewv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD8C5hRaHm6ND+b3hCYComCaA+tFngba1
+G7/T4wfAkocbguS2HL+06gTBtfI+/8iBAf/G+AWClaYVvr3WP8H6iQGB35Yz/v9gwYukJIPcgA3+
+6+9ZvuyzPYCwqTo=
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 30 6.833333333333333 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 30 6.833333333333333 7 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD
+CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1
+Ab++nA+CmogTvaOkBw==
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type comment
-
-key int from deserializer
+# col_name data_type comment
+
+key int from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type comment
-
-value string from deserializer
+# col_name data_type comment
+
+value string from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type comment
-
-key int from deserializer
+# col_name data_type comment
+
+key int from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type comment
-
-value string from deserializer
+# col_name data_type comment
+
+value string from deserializer
PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns
PREHOOK: type: QUERY
PREHOOK: Input: default@partcolstats
@@ -276,36 +290,52 @@ PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 15 495 0 40 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 15 495 0 40 SExM4Cgowv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD4D4jA/AwfgGh5ujQ/m94QmAqJgmgPrR
+Z4G2tRu/0+MHwJKHG4Lkthy/tOoEwfiHI77r2A7C0ZEN/8iBAf/G+AWClaYVvr3WP8H6iQGB35Yz
+/v9gwYukJL+9zgrEnrIC/OqkAYDasSKCp5k2vuyzPYDrkw6AxZUsgK/7DYK2uAr/ivcC
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 40 6.825 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 40 6.825 7 SExM4Cgog+SgJr7iywPAp44YwK72BIC/6BaB3skFwLTeHYHwuwH/7/YQ/4KtDMCv7BbC0uQ3vqKP
+DsDaLoHowwiErvwK+7OXDMDlIsC54ByB1egd/52dEcGEy1q//tAigKTuBsC/mmXB2LUDhN7rGvuS
+1w+Bx7AXv5uoWMHXuTmB2L1lwNTNB77f1iKCuLUBv76cD4KaiBO9o6QHgdygE4DUFw==
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 15 495 0 58 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 15 495 0 58 SExM4Do6wv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD4GbYP/crA7AwfgGwMyEQMfOngP5veEJ
+gJH1GIDAkAyA15IBg8+TPL25xAzCp9gR/smhDYG2tRu/0+MHwJKHG4Lkthy/tOoExJKoGf3l3wm+
+69gOwtGRDf/IgQH/xvgFwsSVEcDQkAS/qJM3/5TDCMH6iQHAwrIawZzkGP7/YMGLpCS/vc4KxJ6y
+AvzqpAGA2rEigqeZNr7ssz2AgZ4BgOr1DIaz3wL6kbYpgK/7DcCa2QnCm1//ivcC/5fiIsC10AmC
+5uYQvue2GQ==
+ from deserializer
PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcolstats
POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 58 6.883333333333334 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 58 6.883333333333334 7 SExM4Do6geTIH4KA2Aa+4ssDwKeOGMCu9gSAv+gWgd7JBcC03h2B8LsB/+/2EP+CrQzAr+wWwIKn
+HoLQvRm+87oEgK/UCcDaLsH7kALA7LIGhK78CvuzlwzA5SLAueAcgdXoHf+dnRGA+ZUgwPH7M4C0
++AWB5kC//tAigKTuBoHJvAaAm+4bv91SgP6cQsHYtQO/4s4XxfucA/uS1w+Bx7AXv5uoWMD2mxmB
+4Z0gwMaBGv/Zzz7Ct+wM/raEAsKdyQW+39Yigri1Ab/3igGB6vwG/9yUB4KaiBO9o6QHwdeeA8CE
+ghCA1Bc=
+ from deserializer
PREHOOK: query: drop table partcolstats
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@partcolstats
@@ -356,9 +386,12 @@ PREHOOK: Input: default@partcolstatsnum
POSTHOOK: query: describe formatted partcolstatsnum partition (tint=100, sint=1000, bint=1000000) value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstatsnum
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 30 6.833333333333333 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 30 6.833333333333333 7 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD
+CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1
+Ab++nA+CmogTvaOkBw==
+ from deserializer
PREHOOK: query: drop table partcolstatsnum
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@partcolstatsnum
@@ -409,9 +442,12 @@ PREHOOK: Input: default@partcolstatsdec
POSTHOOK: query: describe formatted partcolstatsdec partition (decpart='1000.0001') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstatsdec
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 30 6.833333333333333 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 30 6.833333333333333 7 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD
+CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1
+Ab++nA+CmogTvaOkBw==
+ from deserializer
PREHOOK: query: drop table partcolstatsdec
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@partcolstatsdec
@@ -462,9 +498,12 @@ PREHOOK: Input: default@partcolstatschar
POSTHOOK: query: describe formatted partcolstatschar partition (varpart='part1', charpart='aaa') value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcolstatschar
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 30 6.833333333333333 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 30 6.833333333333333 7 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD
+CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1
+Ab++nA+CmogTvaOkBw==
+ from deserializer
PREHOOK: query: drop table partcolstatschar
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@partcolstatschar
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out
index 5db87d9..18fdfd4 100644
--- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out
+++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out
@@ -72,10 +72,10 @@ PREHOOK: Input: default@testdeci2
POSTHOOK: query: describe formatted testdeci2 amount
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@testdeci2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-amount decimal(10,3) from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+amount decimal(10,3) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: analyze table testdeci2 compute statistics for columns
PREHOOK: type: QUERY
PREHOOK: Input: default@testdeci2
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
index 6bc1970..d51a544 100644
--- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
+++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
@@ -113,72 +113,80 @@ PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 3 0.75 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 3 0.75 2 SExM4AMDgaTbFcD8mOYCwMOJoQQ=
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 6 3.0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 6 3.0 3 SExM4AYGhJ2RPL68foHA90C/kJJjgJX39QKAwfg7
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid double 1.0 4.0 0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid double 1.0 4.0 0 4 SExM4AQEwvmagwOC4fQQ/cXBowKCnueKAg==
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid double 1.0 5.0 0 5 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid double 1.0 5.0 0 5 SExM4AUFgoqWCcDvhPoCguH0EP3FwaMCgp7nigI=
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') cnt
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') cnt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-cnt decimal(10,0) 10 2000 0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+cnt decimal(10,0) 10 2000 0 4 SExM4AQEwtKH1wOJpIYp95+qNYHs8ZgB
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') cnt
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') cnt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-cnt decimal(10,0) 10 910 0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+cnt decimal(10,0) 10 910 0 4 SExM4AQEwavm2wOC18PyAYDUhBSCqe9l
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') zip
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') zip
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-zip int 43201 94087 0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY=
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') zip
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') zip
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-zip int 43201 94087 0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY=
+ from deserializer
PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d
@@ -414,72 +422,80 @@ PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 2 0.5 1 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 2 0.5 1 SExM4AICgaTbFYDJnvoC
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') state
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 3 1.25 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 3 1.25 4 SExM4AMDgaTbFcD8mOYCwJDuDA==
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid double 1.0 2.0 0 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid double 1.0 2.0 0 2 SExM4AICwvmagwP/pra0Ag==
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid double 1.0 31.0 0 5 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid double 1.0 31.0 0 5 SExM4AUFgoqWCb/8tKEBg9TE6QH9xcGjAoKe54oC
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') cnt
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') cnt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-cnt decimal(10,0) 1000 1010 0 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+cnt decimal(10,0) 1000 1010 0 2 SExM4AICwtKH1wOJpIYp
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') cnt
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') cnt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-cnt decimal(10,0) 1000 2000 0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+cnt decimal(10,0) 1000 2000 0 3 SExM4AMDwtKH1wOJpIYp95+qNQ==
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') zip
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') zip
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-zip int 94086 94087 0 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+zip int 94086 94087 0 2 SExM4AICgaPxmgOAs+SCAw==
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') zip
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') zip
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-zip int 43201 94087 0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY=
+ from deserializer
PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d
@@ -786,54 +802,60 @@ PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 2 0.5 1 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 2 0.5 1 SExM4AICgaTbFYDAoocH
+ from deserializer
PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') state
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 3 3.0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 3 3.0 3 SExM4AMDwtmPPYHA90C/kJJj
+ from deserializer
PREHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 2 3 0 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 2 3 0 2 SExM4AICga/rqgHA0vSOAw==
+ from deserializer
PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') locid
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') locid
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-locid int 1 5 0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+locid int 1 5 0 3 SExM4AMDxfO+SLy7rGLA9IJO
+ from deserializer
PREHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') cnt
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') cnt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-cnt decimal(10,0) 1000 2000 0 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+cnt decimal(10,0) 1000 2000 0 2 SExM4AICy/aNgAT3n6o1
+ from deserializer
PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') cnt
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_2d
POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') cnt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_2d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-cnt decimal(10,0) 10 100 0 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+cnt decimal(10,0) 10 100 0 2 SExM4AICw4KqzgWC/fN5
+ from deserializer
PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/llap_smb.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_smb.q.out b/ql/src/test/results/clientpositive/llap/llap_smb.q.out
index 87b33db..a75b3da 100644
--- a/ql/src/test/results/clientpositive/llap/llap_smb.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_smb.q.out
@@ -321,8 +321,8 @@ POSTHOOK: Input: default@orc_a@y=2001/q=8
POSTHOOK: Input: default@orc_a@y=2001/q=9
POSTHOOK: Input: default@orc_b
#### A masked pattern was here ####
-2000 5 52
-2001 5 139630
+2001 4 139630
+2001 7 52
PREHOOK: query: DROP TABLE orc_a
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@orc_a
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/stats_only_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out
index 57aaf55..ab91ea7 100644
--- a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out
+++ b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out
@@ -413,9 +413,10 @@ PREHOOK: Input: default@stats_null_part
POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@stats_null_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a double 1.0 1.0 1 1 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a double 1.0 1.0 1 1 SExM4AEBwaDRtwU=
+ from deserializer
PREHOOK: query: drop table stats_null
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@stats_null
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index a62c494..05d4fc8 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -5932,7 +5932,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE)
Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
Reducer 7 <- Map 6 (SIMPLE_EDGE)
@@ -6144,7 +6144,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE)
Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
Reducer 7 <- Map 6 (SIMPLE_EDGE)
@@ -6314,7 +6314,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 6 <- Map 5 (SIMPLE_EDGE)
@@ -6478,7 +6478,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
Reducer 4 <- Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -6597,7 +6597,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
Reducer 4 <- Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out b/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out
index 2e9d88e..023d51c 100644
--- a/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out
+++ b/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out
@@ -406,7 +406,7 @@ from varchar_udf_1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@varchar_udf_1
#### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"}
PREHOOK: query: select
min(c2),
min(c4)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/vector_udf1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out
index 9a164fe..b8d19c5 100644
--- a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out
@@ -2713,7 +2713,7 @@ from varchar_udf_1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@varchar_udf_1
#### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"}
PREHOOK: query: explain vectorization detail
select
min(c2),
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/partial_column_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out
index 87d47da..452d4b6 100644
--- a/ql/src/test/results/clientpositive/partial_column_stats.q.out
+++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out
@@ -69,7 +69,7 @@ PREHOOK: Input: default@t1
POSTHOOK: query: desc formatted t1 value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@t1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 0 0.0 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 0 0.0 0 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"key\":\"true\",\"value\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/partition_coltype_literals.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out
index d459b36..d824a98 100644
--- a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out
+++ b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out
@@ -302,48 +302,52 @@ PREHOOK: Input: default@partcoltypenum
POSTHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcoltypenum
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 27 484 0 20 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De
+jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6
+ from deserializer
PREHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcoltypenum
POSTHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcoltypenum
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 20 6.766666666666667 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 20 6.766666666666667 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted partcoltypenum tint
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcoltypenum
POSTHOOK: query: describe formatted partcoltypenum tint
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcoltypenum
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-tint tinyint 110 110 0 1
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+tint tinyint 110 110 0 1
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}}
PREHOOK: query: describe formatted partcoltypenum sint
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcoltypenum
POSTHOOK: query: describe formatted partcoltypenum sint
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcoltypenum
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sint smallint 22000 22000 0 1
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"sint\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sint smallint 22000 22000 0 1
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"sint\":\"true\"}}
PREHOOK: query: describe formatted partcoltypenum bint
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partcoltypenum
POSTHOOK: query: describe formatted partcoltypenum bint
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcoltypenum
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-bint bigint 330000000000 330000000000 0 1
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bint\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bint bigint 330000000000 330000000000 0 1
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bint\":\"true\"}}
PREHOOK: query: alter table partcoltypenum change key key decimal(10,0)
PREHOOK: type: ALTERTABLE_RENAMECOL
PREHOOK: Input: default@partcoltypenum
@@ -458,10 +462,10 @@ PREHOOK: Input: default@partcoltypenum
POSTHOOK: query: describe formatted partcoltypenum tint
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partcoltypenum
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-tint decimal(3,0) 110 110 0 1
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+tint decimal(3,0) 110 110 0 1
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}}
PREHOOK: query: show partitions partcoltypenum partition (tint=110BD, sint=22000S, bint=330000000000L)
PREHOOK: type: SHOWPARTITIONS
PREHOOK: Input: default@partcoltypenum
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out
index 4bddd3b..124a4b4 100644
--- a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out
+++ b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out
@@ -22,7 +22,7 @@ select
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-{"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{0}{0}{0}{1}{1}{1}{0}{0}{0}{0}{0}{1}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{0}{0}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}{4}{2}{0}"} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{1}{3}{2}{3}{5}{2}{0}{1}{0}{1}{1}{1}{1}{0}{1}"}
+{"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAABAAAAAQAAAAEAAAACAAAAAgAAAAIAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAEAAAABAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAQ\r\nAAAABAAAAAEAAAA=\r\n"} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAACAAAACAAAAAQAAAAIAAAAIAAAAAQAAAABAAAAAgAAAAEAAAACAAAAAgAAAAIAAAAC\r\nAAAAAQAAAAIAAAA=\r\n"}
PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
from
(
@@ -111,7 +111,7 @@ select
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{2}{1}{0}{2}{0}{1}{1}{1}{0}{0}{1}{1}{0}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{4}{0}{0}{4}{3}{0}{1}{0}{0}{0}{0}{0}{0}{1}{2}"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{2}{0}{2}{2}{0}{0}{2}{0}{0}{0}{0}{0}{1}{0}{0}{0}"}
+{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAQAAAACAAAAAQAAAAQAAAABAAAAAgAAAAIAAAACAAAAAQAAAAEAAAACAAAAAgAAAAEAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAQAAAAAQAAAAEAAAAQAAAACAAAAAEAAAACAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAgAAAAQAAAA=\r\n"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAQAAAABAAAABAAAAAQAAAABAAAAAQAAAAQAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"}
PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
from
(
@@ -217,4 +217,4 @@ select
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{2}{1}{0}{2}{0}{1}{1}{1}{0}{0}{1}{1}{0}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{4}{0}{0}{4}{3}{0}{1}{0}{0}{0}{0}{0}{0}{1}{2}"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{2}{0}{2}{2}{0}{0}{2}{0}{0}{0}{0}{0}{1}{0}{0}{0}"}
+{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAQAAAACAAAAAQAAAAQAAAABAAAAAgAAAAIAAAACAAAAAQAAAAEAAAACAAAAAgAAAAEAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAQAAAAAQAAAAEAAAAQAAAACAAAAAEAAAACAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAgAAAAQAAAA=\r\n"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAQAAAABAAAABAAAAAQAAAABAAAAAQAAAAQAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"}
[06/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
new file mode 100644
index 0000000..1dcc1fc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
@@ -0,0 +1,708 @@
+PREHOOK: query: create table src_stat as select * from src1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_stat
+POSTHOOK: query: create table src_stat as select * from src1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_stat
+POSTHOOK: Lineage: src_stat.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_stat.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: create table src_stat_int (
+ key double,
+ value string
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_stat_int
+POSTHOOK: query: create table src_stat_int (
+ key double,
+ value string
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_stat_int
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@src_stat_int
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@src_stat_int
+PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_stat
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_stat
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted src_stat key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat
+POSTHOOK: query: describe formatted src_stat key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 16 1.72 3 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: describe formatted src_stat key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat
+POSTHOOK: query: describe formatted src_stat key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 1111 1.111 3 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: describe formatted src_stat value
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat
+POSTHOOK: query: describe formatted src_stat value
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 122 121 1.23 124 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_stat_int
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_stat_int
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted src_stat_int key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat_int
+POSTHOOK: query: describe formatted src_stat_int key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat_int
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key double 66.0 406.0 10 15 from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
+PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: describe formatted src_stat_int key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat_int
+POSTHOOK: query: describe formatted src_stat_int key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat_int
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key double 333.22 22.22 10 2222 from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
+PREHOOK: query: create database if not exists dummydb
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:dummydb
+POSTHOOK: query: create database if not exists dummydb
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:dummydb
+PREHOOK: query: use dummydb
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:dummydb
+POSTHOOK: query: use dummydb
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:dummydb
+PREHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column key SET ('numDVs'='3333','avgColLen'='2.222')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column key SET ('numDVs'='3333','avgColLen'='2.222')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: describe formatted default.src_stat key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat
+POSTHOOK: query: describe formatted default.src_stat key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 3333 2.222 3 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: describe formatted default.src_stat value
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat
+POSTHOOK: query: describe formatted default.src_stat value
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 233 232 2.34 235 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+PREHOOK: query: use default
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:default
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:default
+PREHOOK: query: drop database dummydb
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:dummydb
+PREHOOK: Output: database:dummydb
+POSTHOOK: query: drop database dummydb
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:dummydb
+POSTHOOK: Output: database:dummydb
+PREHOOK: query: create table datatype_stats(
+ t TINYINT,
+ s SMALLINT,
+ i INT,
+ b BIGINT,
+ f FLOAT,
+ d DOUBLE,
+ dem DECIMAL, --default decimal (10,0)
+ ts TIMESTAMP,
+ dt DATE,
+ str STRING,
+ v VARCHAR(12),
+ c CHAR(5),
+ bl BOOLEAN,
+ bin BINARY)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@datatype_stats
+POSTHOOK: query: create table datatype_stats(
+ t TINYINT,
+ s SMALLINT,
+ i INT,
+ b BIGINT,
+ f FLOAT,
+ d DOUBLE,
+ dem DECIMAL, --default decimal (10,0)
+ ts TIMESTAMP,
+ dt DATE,
+ str STRING,
+ v VARCHAR(12),
+ c CHAR(5),
+ bl BOOLEAN,
+ bin BINARY)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@datatype_stats
+PREHOOK: query: INSERT INTO datatype_stats values(2, 3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@datatype_stats
+POSTHOOK: query: INSERT INTO datatype_stats values(2, 3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@datatype_stats
+POSTHOOK: Lineage: datatype_stats.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.bin EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col14, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.bl EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col13, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col12, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.d EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.dem EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col7, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col9, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.f EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.s EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.str SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col10, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col8, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.v EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col11, type:string, comment:), ]
+PREHOOK: query: INSERT INTO datatype_stats values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@datatype_stats
+POSTHOOK: query: INSERT INTO datatype_stats values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@datatype_stats
+POSTHOOK: Lineage: datatype_stats.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.bin EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col14, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.bl EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col13, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col12, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.d EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.dem EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.dt EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.f EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.s EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.str SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col10, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.t EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.ts EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ]
+POSTHOOK: Lineage: datatype_stats.v EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col11, type:string, comment:), ]
+PREHOOK: query: DESC FORMATTED datatype_stats s
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats s
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+s smallint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats i
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats i
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+i int from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats b
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats b
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b bigint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats f
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats f
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+f float from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats d
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats d
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+d double from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats dem
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats dem
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dem decimal(10,0) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats ts
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats ts
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ts timestamp from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats dt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats dt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dt date from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats str
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats str
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+str string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+v varchar(12) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats c
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats c
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c char(5) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats bl
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats bl
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bl boolean from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats bin
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats bin
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bin binary from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: DESC FORMATTED datatype_stats t
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats t
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+t tinyint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats t
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats t
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+t tinyint 35 234 233 232 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats s
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats s
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+s smallint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats s
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats s
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+s smallint 25 489 56 56 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats i
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats i
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+i int from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats i
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats i
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+i int 5 889 1 59 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats b
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats b
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b bigint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats b
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats b
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b bigint 8 89 14 9 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats f
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats f
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+f float from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats f
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats f
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+f float 8.0 2345.656 45 563 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats d
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats d
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+d double from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats d
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats d
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+d double 0.00455 560.3367 12 5677 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats dem
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats dem
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dem decimal(10,0) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats dem
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats dem
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dem decimal(10,0) 0 560 912 57 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats ts
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats ts
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ts timestamp from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats ts
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats ts
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ts timestamp 1357030924 1357030923 12 7 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats dt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats dt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dt date from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats dt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats dt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dt date 2001-02-04 2012-01-01 912 57 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats str
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats str
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+str string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats str
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats str
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+str string 233 232 2.34 235 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+v varchar(12) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+v varchar(12) 33 22 4.4 25 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats c
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats c
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c char(5) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats c
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats c
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c char(5) 3 2 9.0 58 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats bl
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats bl
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bl boolean from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats bl
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats bl
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bl boolean 1 9 8 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+PREHOOK: query: DESC FORMATTED datatype_stats bin
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats bin
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bin binary from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: DESC FORMATTED datatype_stats bin
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@datatype_stats
+POSTHOOK: query: DESC FORMATTED datatype_stats bin
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@datatype_stats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bin binary 8 2.0 8 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bin\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/analyze_tbl_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out
index 6a3fbc0..0de0a3a 100644
--- a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out
+++ b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out
@@ -48,9 +48,11 @@ PREHOOK: Input: default@src_stat_part
POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 16 1.72 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
PREHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key, value
PREHOOK: type: QUERY
PREHOOK: Input: default@src_stat_part
@@ -69,18 +71,22 @@ PREHOOK: Input: default@src_stat_part
POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 16 1.72 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@src_stat_part
POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 19 4.92 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 19 4.92 7 SExM4BMTgaTbFcCikRTAp44YwK72BIGdshzAtN4dgfC7Ab6ikDTAz6JGgejDCP+AlzSA84UvwYTL
+Wr+ivynA6+uCAsDjm8kBgri1Ab++nA+/vawa
+ from deserializer
PREHOOK: query: create table src_stat_string_part(key string, value string) partitioned by (partitionName string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/autoColumnStats_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out
index e3abba5..2996397 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out
@@ -192,10 +192,11 @@ PREHOOK: Input: default@partitioned1
POSTHOOK: query: desc formatted partitioned1 partition(part=1) a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partitioned1
-col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a int 1 4 0 4 from deserializer
+col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a int 1 4 0 4 SExM4AQExfO+SLy7rGKA4vdMwPD8wQI=
+ from deserializer
PREHOOK: query: alter table partitioned1 add columns(c int, d string)
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@partitioned1
@@ -434,10 +435,11 @@ PREHOOK: Input: default@partitioned1
POSTHOOK: query: desc formatted partitioned1 partition(part=2) c
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partitioned1
-col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-c int 10 40 0 4 from deserializer
+col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c int 10 40 0 4 SExM4AQEguSTlQGB4f34Ab/okIMC/4XTfQ==
+ from deserializer
PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred')
PREHOOK: type: QUERY
POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred')
@@ -626,17 +628,19 @@ PREHOOK: Input: default@partitioned1
POSTHOOK: query: desc formatted partitioned1 partition(part=1) a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partitioned1
-col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a int 1 6 0 4 from deserializer
+col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a int 1 6 0 6 SExM4AYGwZn6L4TaxBi8u6xigOL3TMCSiwGA3vHAAg==
+ from deserializer
PREHOOK: query: desc formatted partitioned1 partition(part=1) c
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@partitioned1
POSTHOOK: query: desc formatted partitioned1 partition(part=1) c
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@partitioned1
-col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-c int 100 200 0 2 from deserializer
+col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c int 100 200 0 2 SExM4AICweD/2gaAj/YU
+ from deserializer
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
index 06f23b1..e32c884 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
@@ -250,17 +250,59 @@ PREHOOK: Input: default@dest_j1
POSTHOOK: query: desc formatted dest_j1 key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dest_j1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 0 498 0 309 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L
+vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb
+YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO
+vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo
+Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7
+Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69
+yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi
+AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy
+8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/
+1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ
+2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe
+A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+
+we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu
+9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc
+6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB
+gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g
+4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD
+gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl
+Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA
+t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA
+4gPA7aoC/6mKCIDZpgLDoEQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: desc formatted dest_j1 value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@dest_j1
POSTHOOK: query: desc formatted dest_j1 value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dest_j1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 309 6.834630350194552 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 309 6.834630350194552 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT
+A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF
+r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB
+gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB
+wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC
+gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6
+xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W
+sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC
+wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG
+gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8
+BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl
+nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC
+v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA
+/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+
+ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM
+fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+
+tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP
+KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC
+sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC
+zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm
+A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/avro_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_decimal.q.out b/ql/src/test/results/clientpositive/avro_decimal.q.out
index e1045eb..07dca39 100644
--- a/ql/src/test/results/clientpositive/avro_decimal.q.out
+++ b/ql/src/test/results/clientpositive/avro_decimal.q.out
@@ -32,10 +32,11 @@ PREHOOK: Input: default@dec
POSTHOOK: query: DESC FORMATTED `dec` value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dec
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value decimal(8,4) -12.25 234.79 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value decimal(8,4) -12.25 234.79 0 10 SExM4AoKxdOOGP2An6UDv92lC4HV6VD/sbUNg9u1Bb210FHA981AwdjTnAGB//Ui
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}}
PREHOOK: query: DROP TABLE IF EXISTS avro_dec
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE IF EXISTS avro_dec
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/avro_decimal_native.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/avro_decimal_native.q.out
index b73b5f5..9f8d4c6 100644
--- a/ql/src/test/results/clientpositive/avro_decimal_native.q.out
+++ b/ql/src/test/results/clientpositive/avro_decimal_native.q.out
@@ -36,10 +36,11 @@ PREHOOK: Input: default@dec
POSTHOOK: query: DESC FORMATTED `dec` value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dec
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value decimal(8,4) -12.25 234.79 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value decimal(8,4) -12.25 234.79 0 10 SExM4AoKxdOOGP2An6UDv92lC4HV6VD/sbUNg9u1Bb210FHA981AwdjTnAGB//Ui
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}}
PREHOOK: query: DROP TABLE IF EXISTS avro_dec
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE IF EXISTS avro_dec
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/bitvector.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bitvector.q.out b/ql/src/test/results/clientpositive/bitvector.q.out
new file mode 100644
index 0000000..21859d2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/bitvector.q.out
@@ -0,0 +1,31 @@
+PREHOOK: query: desc formatted src key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src
+POSTHOOK: query: desc formatted src key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/char_udf1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/char_udf1.q.out b/ql/src/test/results/clientpositive/char_udf1.q.out
index fefc740..e701d64 100644
--- a/ql/src/test/results/clientpositive/char_udf1.q.out
+++ b/ql/src/test/results/clientpositive/char_udf1.q.out
@@ -406,7 +406,7 @@ from char_udf_1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@char_udf_1
#### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"}
PREHOOK: query: select
min(c2),
min(c4)
[08/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_column_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out
index 96dce1e..3676204 100644
--- a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out
@@ -123,30 +123,33 @@ PREHOOK: Input: statsdb1@testtable0
POSTHOOK: query: describe formatted statsdb1.testtable0 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable0 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable0
POSTHOOK: query: describe formatted statsdb1.testtable0 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable0 col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable0
POSTHOOK: query: describe formatted statsdb1.testtable0 col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: alter table statsdb1.testtable0 rename to statsdb1.testtable1
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: statsdb1@testtable0
@@ -199,30 +202,33 @@ PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: alter table testtable1 replace columns (col1 int, col2 string, col4 string)
PREHOOK: type: ALTERTABLE_REPLACECOLS
PREHOOK: Input: statsdb1@testtable1
@@ -274,30 +280,32 @@ PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col4 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col4 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
PREHOOK: query: alter table testtable1 change col1 col1 string
PREHOOK: type: ALTERTABLE_RENAMECOL
PREHOOK: Input: statsdb1@testtable1
@@ -349,30 +357,31 @@ PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col4 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col4 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: statsdb1@testtable1
@@ -425,30 +434,31 @@ PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb2.testtable2 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb2.testtable2 col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col4 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col4 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: analyze table testpart0 compute statistics for columns
PREHOOK: type: QUERY
PREHOOK: Input: statsdb1@testpart0
@@ -549,27 +559,30 @@ PREHOOK: Input: statsdb1@testpart0
POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart0
POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart0
POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2')
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart0
@@ -616,27 +629,32 @@ PREHOOK: Input: statsdb1@testpart0
POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 20 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De
+jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart0
POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 20 6.8 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart0
POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
PREHOOK: query: alter table statsdb1.testpart0 rename to statsdb1.testpart1
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: statsdb1@testpart0
@@ -735,27 +753,30 @@ PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2')
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
@@ -802,27 +823,32 @@ PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 20 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De
+jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 20 6.8 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
PREHOOK: query: alter table statsdb1.testpart1 partition (part = 'part1') rename to partition (part = 'part11')
PREHOOK: type: ALTERTABLE_RENAMEPART
PREHOOK: Input: statsdb1@testpart1
@@ -922,27 +948,30 @@ PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2')
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
@@ -989,27 +1018,32 @@ PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 20 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De
+jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 20 6.8 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
PREHOOK: query: alter table statsdb1.testpart1 replace columns (col1 int, col2 string, col4 string) cascade
PREHOOK: type: ALTERTABLE_REPLACECOLS
PREHOOK: Input: statsdb1@testpart1
@@ -1111,27 +1145,29 @@ PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type comment
-
-col4 string from deserializer
+# col_name data_type comment
+
+col4 string from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2')
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
@@ -1178,27 +1214,31 @@ PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 20 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De
+jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 20 6.8 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type comment
-
-col4 string from deserializer
+# col_name data_type comment
+
+col4 string from deserializer
PREHOOK: query: alter table statsdb1.testpart1 change column col1 col1 string cascade
PREHOOK: type: ALTERTABLE_RENAMECOL
PREHOOK: Input: statsdb1@testpart1
@@ -1300,27 +1340,28 @@ PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type comment
-
-col1 string from deserializer
+# col_name data_type comment
+
+col1 string from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type comment
-
-col4 string from deserializer
+# col_name data_type comment
+
+col4 string from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2')
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
@@ -1367,27 +1408,29 @@ PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type comment
-
-col1 string from deserializer
+# col_name data_type comment
+
+col1 string from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 20 6.8 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testpart1
POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testpart1
-# col_name data_type comment
-
-col4 string from deserializer
+# col_name data_type comment
+
+col4 string from deserializer
PREHOOK: query: alter table statsdb1.testpart1 rename to statsdb2.testpart2
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: statsdb1@testpart1
@@ -1446,54 +1489,57 @@ PREHOOK: Input: statsdb2@testpart2
POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testpart2
-# col_name data_type comment
-
-col1 string from deserializer
+# col_name data_type comment
+
+col1 string from deserializer
PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testpart2
POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testpart2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testpart2
POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testpart2
-# col_name data_type comment
-
-col4 string from deserializer
+# col_name data_type comment
+
+col4 string from deserializer
PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testpart2
POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testpart2
-# col_name data_type comment
-
-col1 string from deserializer
+# col_name data_type comment
+
+col1 string from deserializer
PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testpart2
POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testpart2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 20 6.8 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==
+ from deserializer
PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testpart2
POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testpart2
-# col_name data_type comment
-
-col4 string from deserializer
+# col_name data_type comment
+
+col4 string from deserializer
PREHOOK: query: use statsdb2
PREHOOK: type: SWITCHDATABASE
PREHOOK: Input: database:statsdb2
@@ -1663,30 +1709,33 @@ PREHOOK: Input: statsdb1@testtable0
POSTHOOK: query: describe formatted statsdb1.testtable0 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable0 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable0
POSTHOOK: query: describe formatted statsdb1.testtable0 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable0 col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable0
POSTHOOK: query: describe formatted statsdb1.testtable0 col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: alter table statsdb1.testtable0 rename to statsdb1.testtable1
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: statsdb1@testtable0
@@ -1739,30 +1788,33 @@ PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col3
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col3
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col3 string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col3 string 0 1 4.0 4 SExM4AEBgeL8+wM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
PREHOOK: query: alter table testtable1 replace columns (col1 int, col2 string, col4 string)
PREHOOK: type: ALTERTABLE_REPLACECOLS
PREHOOK: Input: statsdb1@testtable1
@@ -1814,30 +1866,32 @@ PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 int 27 484 0 10 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col4 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col4 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
PREHOOK: query: alter table testtable1 change col1 col1 string
PREHOOK: type: ALTERTABLE_RENAMECOL
PREHOOK: Input: statsdb1@testtable1
@@ -1889,30 +1943,31 @@ PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col2 string 0 10 6.7 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb1.testtable1 col4
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb1@testtable1
POSTHOOK: query: describe formatted statsdb1.testtable1 col4
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb1@testtable1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col4 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col4 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: statsdb1@testtable1
@@ -1965,30 +2020,31 @@ PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
PREHOOK: query: describe formatted statsdb2.testtable2 col2
PREHOOK: type: DESCTABLE
PREHOOK: Input: statsdb2@testtable2
POSTHOOK: query: describe formatted statsdb2.testtable2 col2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: statsdb2@testtable2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues
<TRUNCATED>
[03/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
index b5f4fee..b6aedc4 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
@@ -89,18 +89,20 @@ PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 3 0.75 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 3 0.75 2 SExM4AMDgaTbFcD8mOYCwMOJoQQ=
+ from deserializer
PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 6 3.0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 6 3.0 3 SExM4AYGhJ2RPL68foHA90C/kJJjgJX39QKAwfg7
+ from deserializer
PREHOOK: query: explain extended select state from loc_orc_1d
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select state from loc_orc_1d
@@ -296,12 +298,12 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc_1d
- Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL
GatherStats: false
Select Operator
expressions: state (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: explain extended select state,locid from loc_orc_1d
@@ -499,12 +501,12 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc_1d
- Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL
GatherStats: false
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/fm-sketch.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/fm-sketch.q.out b/ql/src/test/results/clientpositive/fm-sketch.q.out
new file mode 100644
index 0000000..2bd218b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/fm-sketch.q.out
@@ -0,0 +1,333 @@
+PREHOOK: query: create table n(key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@n
+POSTHOOK: query: create table n(key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@n
+PREHOOK: query: insert overwrite table n select null from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@n
+POSTHOOK: query: insert overwrite table n select null from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@n
+POSTHOOK: Lineage: n.key EXPRESSION []
+PREHOOK: query: explain analyze table n compute statistics for columns
+PREHOOK: type: QUERY
+POSTHOOK: query: explain analyze table n compute statistics for columns
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+ Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-0
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: n
+ Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'fm', 16)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-1
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.n
+
+PREHOOK: query: analyze table n compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@n
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table n compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@n
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted n key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@n
+POSTHOOK: query: desc formatted n key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@n
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 0 500 1 Rk0QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAA=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+PREHOOK: query: create table i(key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@i
+POSTHOOK: query: create table i(key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@i
+PREHOOK: query: insert overwrite table i select key from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@i
+POSTHOOK: query: insert overwrite table i select key from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@i
+POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: explain analyze table i compute statistics for columns
+PREHOOK: type: QUERY
+POSTHOOK: query: explain analyze table i compute statistics for columns
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+ Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-0
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: i
+ Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'fm', 16)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-1
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.i
+
+PREHOOK: query: analyze table i compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@i
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table i compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@i
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted i key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@i
+POSTHOOK: query: desc formatted i key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@i
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 498 0 196 Rk0QAP8YAAB/AAAA/woAAP8AAAC/AQAA/wEAAH8BAAD/AgAAfwAAAPsLAAB/AgAA/wgAAH9DAAA/
+AAAA/xQAAP8DAAA=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+PREHOOK: query: drop table i
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@i
+PREHOOK: Output: default@i
+POSTHOOK: query: drop table i
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@i
+POSTHOOK: Output: default@i
+PREHOOK: query: create table i(key double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@i
+POSTHOOK: query: create table i(key double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@i
+PREHOOK: query: insert overwrite table i select key from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@i
+POSTHOOK: query: insert overwrite table i select key from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@i
+POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: analyze table i compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@i
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table i compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@i
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted i key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@i
+POSTHOOK: query: desc formatted i key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@i
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key double 0.0 498.0 0 234 Rk0QAP8AAAD/AQAA/wAAAJ8NAAB/MAAA/xEAAP8CAAD/AgAAfwIAAP8AAAB/EQAA/wAAAP8AAAB/
+AAAA3wEAAP8CAAA=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+PREHOOK: query: drop table i
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@i
+PREHOOK: Output: default@i
+POSTHOOK: query: drop table i
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@i
+POSTHOOK: Output: default@i
+PREHOOK: query: create table i(key decimal)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@i
+POSTHOOK: query: create table i(key decimal)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@i
+PREHOOK: query: insert overwrite table i select key from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@i
+POSTHOOK: query: insert overwrite table i select key from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@i
+POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: analyze table i compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@i
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table i compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@i
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted i key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@i
+POSTHOOK: query: desc formatted i key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@i
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key decimal(10,0) 0 498 0 180 Rk0QAP8AAAD/AwAA/wUAAP8DAAD/AwAAvwIAAH8eAAC/AQAAPwAAAL8AAAAHAAAAvwAAAP0CAAD/
+AQAA/wMAAH8CAAA=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+PREHOOK: query: drop table i
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@i
+PREHOOK: Output: default@i
+POSTHOOK: query: drop table i
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@i
+POSTHOOK: Output: default@i
+PREHOOK: query: create table i(key date)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@i
+POSTHOOK: query: create table i(key date)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@i
+PREHOOK: query: insert into i values ('2012-08-17')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@i
+POSTHOOK: query: insert into i values ('2012-08-17')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@i
+POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into i values ('2012-08-17')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@i
+POSTHOOK: query: insert into i values ('2012-08-17')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@i
+POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into i values ('2013-08-17')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@i
+POSTHOOK: query: insert into i values ('2013-08-17')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@i
+POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into i values ('2012-03-17')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@i
+POSTHOOK: query: insert into i values ('2012-03-17')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@i
+POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into i values ('2012-05-17')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@i
+POSTHOOK: query: insert into i values ('2012-05-17')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@i
+POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: analyze table i compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@i
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table i compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@i
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted i key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@i
+POSTHOOK: query: desc formatted i key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@i
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key date 2012-03-17 2013-08-17 0 3 Rk0QAAEAAAAGAAAAAwAAAA0AAAADAAAABwAAAAsAAAAJAAAAEwAAAAkAAAADAAAABwAAAAMAAAAB
+AAAABAAAAAUAAAA=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/hll.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/hll.q.out b/ql/src/test/results/clientpositive/hll.q.out
index b9357c3..13da130 100644
--- a/ql/src/test/results/clientpositive/hll.q.out
+++ b/ql/src/test/results/clientpositive/hll.q.out
@@ -1,3 +1,88 @@
+PREHOOK: query: create table n(key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@n
+POSTHOOK: query: create table n(key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@n
+PREHOOK: query: insert overwrite table n select null from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@n
+POSTHOOK: query: insert overwrite table n select null from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@n
+POSTHOOK: Lineage: n.key EXPRESSION []
+PREHOOK: query: explain analyze table n compute statistics for columns
+PREHOOK: type: QUERY
+POSTHOOK: query: explain analyze table n compute statistics for columns
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+ Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-0
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: n
+ Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll')
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-1
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.n
+
+PREHOOK: query: analyze table n compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@n
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table n compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@n
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted n key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@n
+POSTHOOK: query: desc formatted n key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@n
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 0 500 1 SExM4AEA
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: create table i(key int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
@@ -78,10 +163,31 @@ PREHOOK: Input: default@i
POSTHOOK: query: desc formatted i key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@i
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 0 498 0 309 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L
+vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb
+YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO
+vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo
+Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7
+Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69
+yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi
+AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy
+8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/
+1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ
+2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe
+A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+
+we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu
+9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc
+6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB
+gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g
+4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD
+gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl
+Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA
+t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA
+4gPA7aoC/6mKCIDZpgLDoEQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: drop table i
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@i
@@ -121,10 +227,31 @@ PREHOOK: Input: default@i
POSTHOOK: query: desc formatted i key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@i
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key double 0.0 498.0 0 309 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key double 0.0 498.0 0 309 SExM4LUCtQLB60iBzkW/t98FgZmoAoClIL/zvgGA98wFwZxDgNEev/fQAoTVmQS8tMUCgIfzAsap
+7wL7vdICwMCHAv/a2gGAj+MJwMSpBsHPLcGehw++nboDgY6bAb+Z3Q2CycMBvtO0A8DE9QOCh+sE
+v4/3AoHde7++1QKB1ZUFvvHBAYCumwGAm4sBwOjEAoD4LsDB1APA0rEEwo6RAb665QaCysQBvua+
+BMGxF8C4qASAqtwLhZqTAbqmvAHChlWBuNsJ/fzNAoCjiwSA7f0BgMHaBIDtjQ3B9zLCwa0F/8ba
+AYPTogK7xaMGgNT7BIOAQr7ZogH//r4Bwb88xcts+7xwv97zAYPPhAG+mJkBgeabAb6jFcDZmgmA
+wWiB6NQG/6y2AcDn7QLB2OMBv7PADMK6K7+obMCrqwrBnPUE/qNQwaacDsCm9gG/wdUCwauuAf/I
+yQGCgtkB/r+qAcAgwNvzD8C0GoTuwAS87xqAx7wJwc+QAYDolQL/164BwJ+VA8K4Hf/DjwG//ecB
+wNTFEMCugwLA3CyBopEBv4fdAoOhSb6DjQPDjm79gI0B//ixBoCFA4HvpwGAx4sC/7KuD4DKGoPk
+qwH9g8YBwI95wMCYAsD0lAHAhViGgt0F+7hQv4bMAsCKEICDmQHE06oD/J1FgJBfgOapA8PJlQf9
+itwEwM2GC4P8pgi997EDgP2aAsCzhgTAj5ADw5HBA77hdb/IL8LNxQT+zzOA4aQBgf+gBf/SWMDl
+iAWF/r4BgPwD/KhS/5M+wIP+AcBkgPCBA4CxjAGAwgHEgFz/9vYLvZuZA8CHqgPDjJwBvddOwJua
+BMCpUYKEvgS+pVaA9PoEg6osvZ1Gga/IAv/9wQLA94EDwOwigO1tgfadAcO4f7yA/ATAq94BwP2X
+A4CahgLA64ECwOzUCoPq5wP9s5cBwNkJgISgAcCN7gLB0bMFgOyLCMDTlA7Bzm2/rYoBgbv8Av+G
+esGE3gKAuSu+8YwFxK+9BICqLv70iAq/z1vB2oQDv790gZOXA8DxhQi+3r0Ewe2+AsGpfL7JtgGB
+sdgHgt+IAb3riwKA/xqAx4YBwM6BBMD24QeE/sgEvM3RAsD/4QHA9KUBg9/PBr7xxgaB0aUD//aC
+A8D0gxSB19wEvtOyCcDBmQGC9q4BvqHgCYDEbMGnaoHK2QT/j5kDv+w7gutQgP3zC/6+kgSAsh3B
+xkC/ybsCgYq4Ab+iS8LN2wK/3dUEgMGICMHQ9wK+ucQCgJvyAofd9Ai5wzbC3LcFwrjwAf78jgq+
+xiPBgzO/0myEya8E/OKkAsHYPcHfqQP/ndwCwNH/BcOngAG8/d8Egd5S/+khgr+zEICIJ4bv0AH4
+isQCgN6lAsTolwO88EDA56UEwsSgDf7U4gHDpUa9570DweyNAb/LyQfA/PwGga7MA8Db7QGBpYEB
+vqNhwNSNBMCL3AHBqzu/gGXAweUCgIqDAoCBdYLHyAbAaL/rgAWA9e4RgMwTv76yAoDZDcHd1wGA
+tucFgd6SE8DhBr+JUQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: drop table i
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@i
@@ -164,10 +291,31 @@ PREHOOK: Input: default@i
POSTHOOK: query: desc formatted i key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@i
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key decimal(10,0) 0 498 0 309 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key decimal(10,0) 0 498 0 309 SExM4LUCtQLB60iBzkW/t98FgZmoAoClIL/zvgGA98wFwZxDgNEev/fQAoTVmQS8tMUCgIfzAsap
+7wL7vdICwMCHAv/a2gGAj+MJwMSpBsHPLcGehw++nboDgY6bAb+Z3Q2CycMBvtO0A8DE9QOCh+sE
+v4/3AoHde7++1QKB1ZUFvvHBAYCumwGAm4sBwOjEAoD4LsDB1APA0rEEwo6RAb665QaCysQBvua+
+BMGxF8C4qASAqtwLhZqTAbqmvAHChlWBuNsJ/fzNAoCjiwSA7f0BgMHaBIDtjQ3B9zLCwa0F/8ba
+AYPTogK7xaMGgNT7BIOAQr7ZogH//r4Bwb88xcts+7xwv97zAYPPhAG+mJkBgeabAb6jFcDZmgmA
+wWiB6NQG/6y2AcDn7QLB2OMBv7PADMK6K7+obMCrqwrBnPUE/qNQwaacDsCm9gG/wdUCwauuAf/I
+yQGCgtkB/r+qAcAgwNvzD8C0GoTuwAS87xqAx7wJwc+QAYDolQL/164BwJ+VA8K4Hf/DjwG//ecB
+wNTFEMCugwLA3CyBopEBv4fdAoOhSb6DjQPDjm79gI0B//ixBoCFA4HvpwGAx4sC/7KuD4DKGoPk
+qwH9g8YBwI95wMCYAsD0lAHAhViGgt0F+7hQv4bMAsCKEICDmQHE06oD/J1FgJBfgOapA8PJlQf9
+itwEwM2GC4P8pgi997EDgP2aAsCzhgTAj5ADw5HBA77hdb/IL8LNxQT+zzOA4aQBgf+gBf/SWMDl
+iAWF/r4BgPwD/KhS/5M+wIP+AcBkgPCBA4CxjAGAwgHEgFz/9vYLvZuZA8CHqgPDjJwBvddOwJua
+BMCpUYKEvgS+pVaA9PoEg6osvZ1Gga/IAv/9wQLA94EDwOwigO1tgfadAcO4f7yA/ATAq94BwP2X
+A4CahgLA64ECwOzUCoPq5wP9s5cBwNkJgISgAcCN7gLB0bMFgOyLCMDTlA7Bzm2/rYoBgbv8Av+G
+esGE3gKAuSu+8YwFxK+9BICqLv70iAq/z1vB2oQDv790gZOXA8DxhQi+3r0Ewe2+AsGpfL7JtgGB
+sdgHgt+IAb3riwKA/xqAx4YBwM6BBMD24QeE/sgEvM3RAsD/4QHA9KUBg9/PBr7xxgaB0aUD//aC
+A8D0gxSB19wEvtOyCcDBmQGC9q4BvqHgCYDEbMGnaoHK2QT/j5kDv+w7gutQgP3zC/6+kgSAsh3B
+xkC/ybsCgYq4Ab+iS8LN2wK/3dUEgMGICMHQ9wK+ucQCgJvyAofd9Ai5wzbC3LcFwrjwAf78jgq+
+xiPBgzO/0myEya8E/OKkAsHYPcHfqQP/ndwCwNH/BcOngAG8/d8Egd5S/+khgr+zEICIJ4bv0AH4
+isQCgN6lAsTolwO88EDA56UEwsSgDf7U4gHDpUa9570DweyNAb/LyQfA/PwGga7MA8Db7QGBpYEB
+vqNhwNSNBMCL3AHBqzu/gGXAweUCgIqDAoCBdYLHyAbAaL/rgAWA9e4RgMwTv76yAoDZDcHd1wGA
+tucFgd6SE8DhBr+JUQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: drop table i
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@i
@@ -233,7 +381,8 @@ PREHOOK: Input: default@i
POSTHOOK: query: desc formatted i key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@i
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key date 2012-03-17 2013-08-17 0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key date 2012-03-17 2013-08-17 0 4 SExM4AQEgZ3gM4Gdw13A3/qtA4L855QD
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out
index f29f7b5..edaf241 100644
--- a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out
@@ -123,20 +123,62 @@ PREHOOK: Input: default@a
POSTHOOK: query: describe formatted a key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@a
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 309 2.812 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe formatted b key
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@b
POSTHOOK: query: describe formatted b key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@b
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 309 2.812 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: from src
insert overwrite table a select *
insert into table b select *
@@ -231,20 +273,62 @@ PREHOOK: Input: default@b
POSTHOOK: query: describe formatted b key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@b
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 309 2.812 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe formatted b value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@b
POSTHOOK: query: describe formatted b value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@b
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 309 6.812 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 309 6.812 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT
+A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF
+r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB
+gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB
+wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC
+gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6
+xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W
+sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC
+wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG
+gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8
+BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl
+nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC
+v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA
+/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+
+ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM
+fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+
+tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP
+KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC
+sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC
+zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm
+A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: insert into table b select NULL, NULL from src limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -261,20 +345,62 @@ PREHOOK: Input: default@b
POSTHOOK: query: describe formatted b key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@b
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 10 309 2.812 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 10 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe formatted b value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@b
POSTHOOK: query: describe formatted b value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@b
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 10 309 6.812 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 10 309 6.812 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT
+A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF
+r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB
+gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB
+wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC
+gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6
+xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W
+sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC
+wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG
+gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8
+BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl
+nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC
+v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA
+/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+
+ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM
+fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+
+tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP
+KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC
+sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC
+zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm
+A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: insert into table b(value) select key+100000 from src limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -291,20 +417,63 @@ PREHOOK: Input: default@b
POSTHOOK: query: describe formatted b key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@b
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 20 309 2.812 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 20 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe formatted b value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@b
POSTHOOK: query: describe formatted b value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@b
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 10 309 8.0 8 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 10 319 8.0 8 SExM4L8CvwLM7SyB+xL1r/4Gw751wOABvMEcgclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoLguAL+
+63GCtK0Dv6qTA/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCd
+mAGBuSGAhBmFr5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KX
+Av61xQGBgpcBgIo6geaBAf6ArwyBkDD/tkWBqNID/6ilBoDahwbA1fICwcKtBL+I0QHC28MFvvLE
+BYC8f4CF8wHCw8AD/tPRAcD+BsK12AKA7scDwcuXAoC1Bf3auAXApKAGgMn3BICZ0QWA/IgCgOL4
+BsC+EoCLhALB56oD/6G7AoDumAKCmFW+1UrAkdgCwIyYAcH0lAKA9zzD/+ECgMMTvOC6AsCg0gHA
+2i7B+5ACv7v2AoC39AGB+scBgNXPAYOysQH91nH/vaYCgOF/hbHjA72W/AG+zLUGgNHlA8DlIoCi
+DcGVLr/hyQTAlg6CwjX/5ooB/++nA4CU7wOA7qUHwpq3BP/F1AK/jkSBwMANv8TlBMKclQb/s60F
+/7SYAcOZ0QO/t50G/oKmAsCP+wLAhXWB/bMGv/DkA8DiZsLx0AL+vckDgtRM/sG5AoCJ4AOB8twG
+v+i4AYCzY4CIpQbAl/YKgZDRBr/TmgaA2iOAlNkGgJaTBMC3gQrHms8DuZmpAoHmQIH19wKAtPAH
+/o7yBMDG9hLA5pEBgMWAA8D42wKBybwGgLSMBIHd6wL+gOAFgZy+BcO14gLAqgr8xkyChcIBwKXi
+A7+ber/dUoGzzgGBj2SAx7IEv/NHwKWcAsKw4gL+za0B/8mxAsCZYID5qQKCurYD//CUC7/bqwTC
+3USAqf8D/96OAb+ylgzA4qcKgeqxAcDugwK/ragGgOCEBoDygQKA9KQHgO/6AcH7IcCpDr/FFYGb
+0QGE9oUBvL2OA7/VyAyAr2nAy6IDwLn8BoD9/wPCyI4DgKINgN6bBL/NcMPFfr7HhQrCnJYG/ufw
+Af6VrgfC8FK/ruoCv524AcDcrwSC8osD/v7KA8D+eYC5A4W+3g27v7gDw9cb/ZlugaEXwrr4Bv7o
+iwmB3oAB/5qLAf+pXISEHv6j/gX+s7QBgIx9wLWpA4C65gGBxsoBwPLkAf/C7QGBrpQCv97XA8Ge
+xwK/1L4FgMqPA8H8kAGAl5cGgKMBgdPyAv61XoHpE7/0YYHVpQfFwLEB+queAoCUhwOEtckCve9X
+/8bkBMGKpAOBwaQBwOKNBL6CwQmBz4UDgM8pgomLAf2GrAHCkkL+wc0BgdK0CYHgigm+x6cGgdhH
+v/SpB4Gl3gzB2m/+mdUIgY2kAv+5P8DmngeAyIAFgNkGgrBG/raEAsKdyQWAqogEvq7iBIavsAH6
++kHByq8O/8TAAcTfzQf87TuCuLUBv/eKAYCX+AG/hdgBws2sA8GtgQK+r5MF/7uUBcGH1gLA1YEC
+goGcCf2WF8Dc7QKC9/wC/riiAcHXngPDmI8BgaCJCLzEpgP/hReD5z2+mu4CgNQXwOCTBsDm9QG/
+lh4=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: drop table src_multi2
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table src_multi2
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out
index fb833bc..74085bf 100644
--- a/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out
+++ b/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out
@@ -48,10 +48,10 @@ PREHOOK: Input: default@space
POSTHOOK: query: desc formatted space ` left`
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@space
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
- left string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ left string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}}
PREHOOK: query: insert into space values ("1", "2", "3")
PREHOOK: type: QUERY
PREHOOK: Output: default@space
@@ -67,10 +67,11 @@ PREHOOK: Input: default@space
POSTHOOK: query: desc formatted space ` left`
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@space
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
- left string 0 1 1.0 1 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ left string 0 1 1.0 1 SExM4AEBxbi8+AQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}}
PREHOOK: query: select * from space
PREHOOK: type: QUERY
PREHOOK: Input: default@space
[07/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_update_status.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out
index 9cd9a8d..f23ba57 100644
--- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out
@@ -46,10 +46,12 @@ PREHOOK: Input: default@src_stat
POSTHOOK: query: describe formatted src_stat key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 16 1.72 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111')
@@ -60,10 +62,12 @@ PREHOOK: Input: default@src_stat
POSTHOOK: query: describe formatted src_stat key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 1111 1.111 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 1111 1.111 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124')
@@ -74,10 +78,10 @@ PREHOOK: Input: default@src_stat
POSTHOOK: query: describe formatted src_stat value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 122 121 1.23 124 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 122 121 1.23 124 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key
PREHOOK: type: QUERY
PREHOOK: Input: default@src_stat_int
@@ -92,10 +96,12 @@ PREHOOK: Input: default@src_stat_int
POSTHOOK: query: describe formatted src_stat_int key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_int
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key double 66.0 406.0 10 15 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key double 66.0 406.0 10 15 SExM4A8PgZLrJoLyx3uBrPspvqnUPoHIoA/+prAWgPaQT4Du5BLDosR5vZLrGIDtbYDVh+QBwKHW
+UIOz9UG+ouNE
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22')
@@ -106,10 +112,12 @@ PREHOOK: Input: default@src_stat_int
POSTHOOK: query: describe formatted src_stat_int key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_int
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key double 333.22 22.22 10 2222 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key double 333.22 22.22 10 2222 SExM4A8PgZLrJoLyx3uBrPspvqnUPoHIoA/+prAWgPaQT4Du5BLDosR5vZLrGIDtbYDVh+QBwKHW
+UIOz9UG+ouNE
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: create database if not exists dummydb
PREHOOK: type: CREATEDATABASE
PREHOOK: Output: database:dummydb
@@ -132,10 +140,12 @@ PREHOOK: Input: default@src_stat
POSTHOOK: query: describe formatted default.src_stat key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 3333 2.222 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 3333 2.222 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235')
@@ -146,10 +156,10 @@ PREHOOK: Input: default@src_stat
POSTHOOK: query: describe formatted default.src_stat value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 233 232 2.34 235 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 233 232 2.34 235 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: use default
PREHOOK: type: SWITCHDATABASE
PREHOOK: Input: database:default
@@ -246,140 +256,140 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats s
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-s smallint from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+s smallint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats i
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats i
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-i int from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+i int from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats b
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats b
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-b bigint from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b bigint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats f
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats f
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-f float from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+f float from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats d
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats d
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-d double from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+d double from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats dem
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats dem
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-dem decimal(10,0) from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dem decimal(10,0) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats ts
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats ts
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-ts timestamp from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ts timestamp from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats dt
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats dt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-dt date from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dt date from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats str
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats str
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-str string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+str string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats v
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats v
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-v varchar(12) from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+v varchar(12) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats c
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats c
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-c char(5) from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c char(5) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats bl
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats bl
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-bl boolean from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bl boolean from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats bin
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats bin
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-bin binary from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bin binary from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: DESC FORMATTED datatype_stats t
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats t
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-t tinyint from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+t tinyint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35')
@@ -390,20 +400,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats t
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-t tinyint 35 234 233 232 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+t tinyint 35 234 233 232 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats s
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats s
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-s smallint from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+s smallint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25')
@@ -414,20 +424,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats s
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-s smallint 25 489 56 56 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+s smallint 25 489 56 56 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats i
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats i
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-i int from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+i int from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5')
@@ -438,20 +448,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats i
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-i int 5 889 1 59 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+i int 5 889 1 59 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats b
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats b
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-b bigint from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b bigint from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8')
@@ -462,20 +472,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats b
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-b bigint 8 89 14 9 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b bigint 8 89 14 9 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats f
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats f
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-f float from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+f float from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00')
@@ -486,20 +496,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats f
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-f float 8.0 2345.656 45 563 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+f float 8.0 2345.656 45 563 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats d
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats d
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-d double from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+d double from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455')
@@ -510,20 +520,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats d
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-d double 0.00455 560.3367 12 5677 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+d double 0.00455 560.3367 12 5677 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats dem
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats dem
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-dem decimal(10,0) from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dem decimal(10,0) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0')
@@ -534,20 +544,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats dem
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-dem decimal(10,0) 0 560 912 57 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dem decimal(10,0) 0 560 912 57 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats ts
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats ts
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-ts timestamp from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ts timestamp from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924')
@@ -558,20 +568,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats ts
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-ts timestamp 1357030924 1357030923 12 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ts timestamp 1357030924 1357030923 12 7 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats dt
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats dt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-dt date from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dt date from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04')
@@ -582,20 +592,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats dt
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-dt date 2001-02-04 2012-01-01 912 57 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+dt date 2001-02-04 2012-01-01 912 57 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats str
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats str
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-str string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+str string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235')
@@ -606,20 +616,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats str
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-str string 233 232 2.34 235 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+str string 233 232 2.34 235 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats v
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats v
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-v varchar(12) from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+v varchar(12) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25')
@@ -630,20 +640,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats v
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-v varchar(12) 33 22 4.4 25 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+v varchar(12) 33 22 4.4 25 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats c
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats c
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-c char(5) from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c char(5) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58')
@@ -654,20 +664,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats c
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-c char(5) 3 2 9.0 58 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c char(5) 3 2 9.0 58 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats bl
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats bl
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-bl boolean from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bl boolean from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8')
@@ -678,20 +688,20 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats bl
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-bl boolean 1 9 8 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bl boolean 1 9 8 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
PREHOOK: query: DESC FORMATTED datatype_stats bin
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats bin
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-bin binary from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bin binary from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8')
@@ -702,7 +712,7 @@ PREHOOK: Input: default@datatype_stats
POSTHOOK: query: DESC FORMATTED datatype_stats bin
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@datatype_stats
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-bin binary 8 2.0 8 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bin\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+bin binary 8 2.0 8 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bin\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}}
[09/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
new file mode 100644
index 0000000..54828f2
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.FileMetadataExprType;
+import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
+import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestOldSchema {
+ private ObjectStore store = null;
+
+ private static final Logger LOG = LoggerFactory.getLogger(TestOldSchema.class.getName());
+
+ public static class MockPartitionExpressionProxy implements PartitionExpressionProxy {
+ @Override
+ public String convertExprToFilter(byte[] expr) throws MetaException {
+ return null;
+ }
+
+ @Override
+ public boolean filterPartitionsByExpr(List<String> partColumnNames,
+ List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] expr, String defaultPartitionName,
+ List<String> partitionNames) throws MetaException {
+ return false;
+ }
+
+ @Override
+ public FileMetadataExprType getMetadataType(String inputFormat) {
+ return null;
+ }
+
+ @Override
+ public SearchArgument createSarg(byte[] expr) {
+ return null;
+ }
+
+ @Override
+ public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) {
+ return null;
+ }
+ }
+
+ String bitVectors[] = new String[2];
+
+ @Before
+ public void setUp() throws Exception {
+ HiveConf conf = new HiveConf();
+ conf.setVar(HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS,
+ MockPartitionExpressionProxy.class.getName());
+ conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR, false);
+
+ store = new ObjectStore();
+ store.setConf(conf);
+ dropAllStoreObjects(store);
+
+ HyperLogLog hll = HyperLogLog.builder().build();
+ hll.addLong(1);
+ bitVectors[1] = hll.serialize();
+ hll = HyperLogLog.builder().build();
+ hll.addLong(2);
+ hll.addLong(3);
+ hll.addLong(3);
+ hll.addLong(4);
+ bitVectors[0] = hll.serialize();
+ }
+
+ @After
+ public void tearDown() {
+ }
+
+ /**
+ * Tests partition operations
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testPartitionOps() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ Database db1 = new Database(dbName, "description", "locationurl", null);
+ store.createDatabase(db1);
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ Deadline.startTimer("getPartition");
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = new ArrayList<>();
+ partVal.add(String.valueOf(i));
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + String.valueOf(i));
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col1");
+ obj.setColType("bigint");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors[0]);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(10, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col1", cso.getColName());
+ Assert.assertEquals("bigint", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(91, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col1"));
+ statChecker.checkStats(aggrStats);
+
+ }
+
+ private static interface Checker {
+ void checkStats(AggrStats aggrStats) throws Exception;
+ }
+
+ public static void dropAllStoreObjects(RawStore store) throws MetaException,
+ InvalidObjectException, InvalidInputException {
+ try {
+ Deadline.registerIfNot(100000);
+ Deadline.startTimer("getPartition");
+ List<String> dbs = store.getAllDatabases();
+ for (int i = 0; i < dbs.size(); i++) {
+ String db = dbs.get(i);
+ List<String> tbls = store.getAllTables(db);
+ for (String tbl : tbls) {
+ List<Partition> parts = store.getPartitions(db, tbl, 100);
+ for (Partition part : parts) {
+ store.dropPartition(db, tbl, part.getValues());
+ }
+ store.dropTable(db, tbl);
+ }
+ store.dropDatabase(db);
+ }
+ } catch (NoSuchObjectException e) {
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
index 1fa9447..e31dad3 100644
--- a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java
@@ -23,6 +23,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.ObjectStore;
import org.apache.hadoop.hive.metastore.TableType;
@@ -740,4 +742,158 @@ public class TestCachedStore {
aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
}
+
+ @Test
+ public void testPartitionAggrStats() throws Exception {
+ String dbName = "testTableColStatsOps1";
+ String tblName = "tbl1";
+ String colName = "f1";
+
+ Database db = new Database(dbName, null, "some_location", null);
+ cachedStore.createDatabase(db);
+
+ List<FieldSchema> cols = new ArrayList<FieldSchema>();
+ cols.add(new FieldSchema(colName, "int", null));
+ List<FieldSchema> partCols = new ArrayList<FieldSchema>();
+ partCols.add(new FieldSchema("col", "int", null));
+ StorageDescriptor sd =
+ new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<String, String>()),
+ null, null, null);
+
+ Table tbl =
+ new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<String, String>(),
+ null, null, TableType.MANAGED_TABLE.toString());
+ cachedStore.createTable(tbl);
+
+ List<String> partVals1 = new ArrayList<String>();
+ partVals1.add("1");
+ List<String> partVals2 = new ArrayList<String>();
+ partVals2.add("2");
+
+ Partition ptn1 =
+ new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<String, String>());
+ cachedStore.addPartition(ptn1);
+ Partition ptn2 =
+ new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<String, String>());
+ cachedStore.addPartition(ptn2);
+
+ ColumnStatistics stats = new ColumnStatistics();
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
+ statsDesc.setPartName("col");
+ List<ColumnStatisticsObj> colStatObjs = new ArrayList<ColumnStatisticsObj>();
+
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
+ LongColumnStatsData longStats = new LongColumnStatsData();
+ longStats.setLowValue(0);
+ longStats.setHighValue(100);
+ longStats.setNumNulls(50);
+ longStats.setNumDVs(30);
+ data.setLongStats(longStats);
+ colStatObjs.add(colStats);
+
+ stats.setStatsDesc(statsDesc);
+ stats.setStatsObj(colStatObjs);
+
+ cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1);
+
+ longStats.setNumDVs(40);
+ cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2);
+
+ List<String> colNames = new ArrayList<String>();
+ colNames.add(colName);
+ List<String> aggrPartVals = new ArrayList<String>();
+ aggrPartVals.add("1");
+ aggrPartVals.add("2");
+ AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
+ Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
+ Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 40);
+ aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
+ Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
+ Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 40);
+ }
+
+ @Test
+ public void testPartitionAggrStatsBitVector() throws Exception {
+ String dbName = "testTableColStatsOps2";
+ String tblName = "tbl2";
+ String colName = "f1";
+
+ Database db = new Database(dbName, null, "some_location", null);
+ cachedStore.createDatabase(db);
+
+ List<FieldSchema> cols = new ArrayList<FieldSchema>();
+ cols.add(new FieldSchema(colName, "int", null));
+ List<FieldSchema> partCols = new ArrayList<FieldSchema>();
+ partCols.add(new FieldSchema("col", "int", null));
+ StorageDescriptor sd =
+ new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<String, String>()),
+ null, null, null);
+
+ Table tbl =
+ new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<String, String>(),
+ null, null, TableType.MANAGED_TABLE.toString());
+ cachedStore.createTable(tbl);
+
+ List<String> partVals1 = new ArrayList<String>();
+ partVals1.add("1");
+ List<String> partVals2 = new ArrayList<String>();
+ partVals2.add("2");
+
+ Partition ptn1 =
+ new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<String, String>());
+ cachedStore.addPartition(ptn1);
+ Partition ptn2 =
+ new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<String, String>());
+ cachedStore.addPartition(ptn2);
+
+ ColumnStatistics stats = new ColumnStatistics();
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
+ statsDesc.setPartName("col");
+ List<ColumnStatisticsObj> colStatObjs = new ArrayList<ColumnStatisticsObj>();
+
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
+ LongColumnStatsData longStats = new LongColumnStatsData();
+ longStats.setLowValue(0);
+ longStats.setHighValue(100);
+ longStats.setNumNulls(50);
+ longStats.setNumDVs(30);
+
+ HyperLogLog hll = HyperLogLog.builder().build();
+ hll.addLong(1);
+ hll.addLong(2);
+ hll.addLong(3);
+ longStats.setBitVectors(hll.serialize());
+
+ data.setLongStats(longStats);
+ colStatObjs.add(colStats);
+
+ stats.setStatsDesc(statsDesc);
+ stats.setStatsObj(colStatObjs);
+
+ cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1);
+
+ longStats.setNumDVs(40);
+ hll = HyperLogLog.builder().build();
+ hll.addLong(2);
+ hll.addLong(3);
+ hll.addLong(4);
+ hll.addLong(5);
+ longStats.setBitVectors(hll.serialize());
+
+ cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2);
+
+ List<String> colNames = new ArrayList<String>();
+ colNames.add(colName);
+ List<String> aggrPartVals = new ArrayList<String>();
+ aggrPartVals.add("1");
+ aggrPartVals.add("2");
+ AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
+ Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
+ Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5);
+ aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
+ Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
+ Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
index ecc99c3..9cf1fb8 100644
--- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java
@@ -28,6 +28,7 @@ import java.util.TreeMap;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -115,7 +116,11 @@ public class TestHBaseAggregateStatsCacheWithBitVector {
dcsd.setLowValue(-20.1234213423);
dcsd.setNumNulls(30);
dcsd.setNumDVs(12342);
- dcsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
+ HyperLogLog hll = HyperLogLog.builder().build();
+ hll.addDouble(1);
+ hll.addDouble(2);
+ hll.addDouble(3);
+ dcsd.setBitVectors(hll.serialize());
data.setDoubleStats(dcsd);
obj.setStatsData(data);
cs.addToStatsObj(obj);
@@ -135,7 +140,11 @@ public class TestHBaseAggregateStatsCacheWithBitVector {
dcsd.setLowValue(-20.1234213423);
dcsd.setNumNulls(30);
dcsd.setNumDVs(12342);
- dcsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
+ hll = HyperLogLog.builder().build();
+ hll.addDouble(3);
+ hll.addDouble(4);
+ hll.addDouble(5);
+ dcsd.setBitVectors(hll.serialize());
data.setDoubleStats(dcsd);
obj.setStatsData(data);
cs.addToStatsObj(obj);
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
index 99ce96c..4d868b0 100644
--- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
@@ -28,6 +28,7 @@ import java.util.TreeMap;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.StatObjectConverter;
import org.apache.hadoop.hive.metastore.api.AggrStats;
@@ -62,8 +63,7 @@ public class TestHBaseAggregateStatsExtrapolation {
SortedMap<String, Cell> rows = new TreeMap<>();
// NDV will be 3 for the bitVectors
- String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}";
-
+ String bitVectors = null;
@Before
public void before() throws IOException {
MockitoAnnotations.initMocks(this);
@@ -71,6 +71,11 @@ public class TestHBaseAggregateStatsExtrapolation {
conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
store = MockUtils.init(conf, htable, rows);
store.backdoor().getStatsCache().resetCounters();
+ HyperLogLog hll = HyperLogLog.builder().build();
+ hll.addLong(1);
+ hll.addLong(2);
+ hll.addLong(3);
+ bitVectors = hll.serialize();
}
private static interface Checker {
@@ -395,7 +400,7 @@ public class TestHBaseAggregateStatsExtrapolation {
dcsd.setHighValue(1000 + i);
dcsd.setLowValue(-1000 - i);
dcsd.setNumNulls(i);
- dcsd.setNumDVs(10 * i);
+ dcsd.setNumDVs(i == 0 ? 1 : 10 * i);
data.setLongStats(dcsd);
obj.setStatsData(data);
cs.addToStatsObj(obj);
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
index 74e1669..0ad2780 100644
--- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
@@ -28,6 +28,7 @@ import java.util.TreeMap;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.StatObjectConverter;
import org.apache.hadoop.hive.metastore.api.AggrStats;
@@ -61,9 +62,8 @@ public class TestHBaseAggregateStatsNDVUniformDist {
SortedMap<String, Cell> rows = new TreeMap<>();
// NDV will be 3 for bitVectors[0] and 1 for bitVectors[1]
- String bitVectors[] = {
- "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}",
- "{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}" };
+ String bitVectors[] = new String[2];
+
@Before
public void before() throws IOException {
@@ -73,6 +73,15 @@ public class TestHBaseAggregateStatsNDVUniformDist {
conf.setBoolean(HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION.varname, true);
store = MockUtils.init(conf, htable, rows);
store.backdoor().getStatsCache().resetCounters();
+ HyperLogLog hll = HyperLogLog.builder().build();
+ hll.addLong(1);
+ bitVectors[1] = hll.serialize();
+ hll = HyperLogLog.builder().build();
+ hll.addLong(2);
+ hll.addLong(3);
+ hll.addLong(3);
+ hll.addLong(4);
+ bitVectors[0] = hll.serialize();
}
private static interface Checker {
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 97bf839..16c440f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -3396,7 +3396,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
ColStatistics.Range r = cs.getRange();
StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue,
r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(),
- cs.getNumNulls(), cs.getCountDistint(), cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
+ cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
colStats = Collections.singletonList(cso);
StatsSetupConst.setColumnStatsState(tblProps, colNames);
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
index aa77234..2380073 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
@@ -695,38 +695,40 @@ public final class MetaDataFormatUtils {
ColumnStatisticsData csd = cso.getStatsData();
if (csd.isSetBinaryStats()) {
BinaryColumnStatsData bcsd = csd.getBinaryStats();
- appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(),
+ appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", bcsd.getAvgColLen(),
bcsd.getMaxColLen(), "", "");
} else if (csd.isSetStringStats()) {
StringColumnStatsData scsd = csd.getStringStats();
appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(),
- scsd.getAvgColLen(), scsd.getMaxColLen(), "", "");
+ scsd.getBitVectors() == null ? "" : scsd.getBitVectors(), scsd.getAvgColLen(),
+ scsd.getMaxColLen(), "", "");
} else if (csd.isSetBooleanStats()) {
BooleanColumnStatsData bcsd = csd.getBooleanStats();
- appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "",
+ appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "", "",
bcsd.getNumTrues(), bcsd.getNumFalses());
} else if (csd.isSetDecimalStats()) {
DecimalColumnStatsData dcsd = csd.getDecimalStats();
appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()),
convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(),
+ dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(),
"", "", "", "");
} else if (csd.isSetDoubleStats()) {
DoubleColumnStatsData dcsd = csd.getDoubleStats();
appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(),
- dcsd.getNumDVs(), "", "", "", "");
+ dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", "");
} else if (csd.isSetLongStats()) {
LongColumnStatsData lcsd = csd.getLongStats();
appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(),
- lcsd.getNumDVs(), "", "", "", "");
+ lcsd.getNumDVs(), lcsd.getBitVectors() == null ? "" : lcsd.getBitVectors(), "", "", "", "");
} else if (csd.isSetDateStats()) {
DateColumnStatsData dcsd = csd.getDateStats();
appendColumnStats(tableInfo,
convertToString(dcsd.getLowValue()),
convertToString(dcsd.getHighValue()),
- dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
+ dcsd.getNumNulls(), dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", "");
}
} else {
- appendColumnStats(tableInfo, "", "", "", "", "", "", "", "");
+ appendColumnStats(tableInfo, "", "", "", "", "", "", "", "", "");
}
}
@@ -779,7 +781,7 @@ public final class MetaDataFormatUtils {
}
private static void appendColumnStats(StringBuilder sb, Object min, Object max, Object numNulls,
- Object ndv, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) {
+ Object ndv, Object bitVector, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) {
sb.append(String.format("%-" + ALIGNMENT + "s", min)).append(FIELD_DELIM);
sb.append(String.format("%-" + ALIGNMENT + "s", max)).append(FIELD_DELIM);
sb.append(String.format("%-" + ALIGNMENT + "s", numNulls)).append(FIELD_DELIM);
@@ -788,6 +790,7 @@ public final class MetaDataFormatUtils {
sb.append(String.format("%-" + ALIGNMENT + "s", maxColLen)).append(FIELD_DELIM);
sb.append(String.format("%-" + ALIGNMENT + "s", numTrues)).append(FIELD_DELIM);
sb.append(String.format("%-" + ALIGNMENT + "s", numFalses)).append(FIELD_DELIM);
+ sb.append(String.format("%-" + ALIGNMENT + "s", bitVector)).append(FIELD_DELIM);
}
private static void appendColumnStatsNoFormatting(StringBuilder sb, Object min,
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
index 41a1c7a..f2d2e2d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
@@ -18,9 +18,6 @@
package org.apache.hadoop.hive.ql.plan;
-import org.apache.hadoop.hive.ql.stats.StatsUtils;
-
-
public class ColStatistics {
private String colName;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java
index d7a9888..845ffcf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java
@@ -59,8 +59,8 @@ public class DescTableDesc extends DDLDesc implements Serializable {
*/
private static final String schema = "col_name,data_type,comment#string:string:string";
private static final String colStatsSchema = "col_name,data_type,min,max,num_nulls,"
- + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment"
- + "#string:string:string:string:string:string:string:string:string:string:string";
+ + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment,bitVector"
+ + "#string:string:string:string:string:string:string:string:string:string:string:string";
public DescTableDesc() {
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
index 2d56950..8ee41bf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
@@ -23,9 +23,9 @@ import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
-import org.apache.hadoop.hive.common.ndv.FMSketch;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.common.ndv.fm.FMSketch;
import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.Description;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q b/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q
new file mode 100644
index 0000000..d64263f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q
@@ -0,0 +1,139 @@
+set hive.stats.fetch.bitvector=false;
+
+create table src_stat as select * from src1;
+
+create table src_stat_int (
+ key double,
+ value string
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int;
+
+ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key;
+
+describe formatted src_stat key;
+
+ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111');
+
+describe formatted src_stat key;
+
+ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124');
+
+describe formatted src_stat value;
+
+ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key;
+
+describe formatted src_stat_int key;
+
+ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22');
+
+describe formatted src_stat_int key;
+
+
+
+create database if not exists dummydb;
+
+use dummydb;
+
+ALTER TABLE default.src_stat UPDATE STATISTICS for column key SET ('numDVs'='3333','avgColLen'='2.222');
+
+describe formatted default.src_stat key;
+
+ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235');
+
+describe formatted default.src_stat value;
+
+use default;
+
+drop database dummydb;
+
+create table datatype_stats(
+ t TINYINT,
+ s SMALLINT,
+ i INT,
+ b BIGINT,
+ f FLOAT,
+ d DOUBLE,
+ dem DECIMAL, --default decimal (10,0)
+ ts TIMESTAMP,
+ dt DATE,
+ str STRING,
+ v VARCHAR(12),
+ c CHAR(5),
+ bl BOOLEAN,
+ bin BINARY);
+
+INSERT INTO datatype_stats values(2, 3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin');
+INSERT INTO datatype_stats values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+DESC FORMATTED datatype_stats s;
+DESC FORMATTED datatype_stats i;
+DESC FORMATTED datatype_stats b;
+DESC FORMATTED datatype_stats f;
+DESC FORMATTED datatype_stats d;
+DESC FORMATTED datatype_stats dem;
+DESC FORMATTED datatype_stats ts;
+DESC FORMATTED datatype_stats dt;
+DESC FORMATTED datatype_stats str;
+DESC FORMATTED datatype_stats v;
+DESC FORMATTED datatype_stats c;
+DESC FORMATTED datatype_stats bl;
+DESC FORMATTED datatype_stats bin;
+
+--tinyint
+DESC FORMATTED datatype_stats t;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35');
+DESC FORMATTED datatype_stats t;
+--smallint
+DESC FORMATTED datatype_stats s;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25');
+DESC FORMATTED datatype_stats s;
+--int
+DESC FORMATTED datatype_stats i;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5');
+DESC FORMATTED datatype_stats i;
+--bigint
+DESC FORMATTED datatype_stats b;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8');
+DESC FORMATTED datatype_stats b;
+
+--float
+DESC FORMATTED datatype_stats f;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00');
+DESC FORMATTED datatype_stats f;
+--double
+DESC FORMATTED datatype_stats d;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455');
+DESC FORMATTED datatype_stats d;
+--decimal
+DESC FORMATTED datatype_stats dem;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0');
+DESC FORMATTED datatype_stats dem;
+--timestamp
+DESC FORMATTED datatype_stats ts;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924');
+DESC FORMATTED datatype_stats ts;
+--decimal
+DESC FORMATTED datatype_stats dt;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04');
+DESC FORMATTED datatype_stats dt;
+--string
+DESC FORMATTED datatype_stats str;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235');
+DESC FORMATTED datatype_stats str;
+--varchar
+DESC FORMATTED datatype_stats v;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25');
+DESC FORMATTED datatype_stats v;
+--char
+DESC FORMATTED datatype_stats c;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58');
+DESC FORMATTED datatype_stats c;
+--boolean
+DESC FORMATTED datatype_stats bl;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8');
+DESC FORMATTED datatype_stats bl;
+--binary
+DESC FORMATTED datatype_stats bin;
+ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8');
+DESC FORMATTED datatype_stats bin;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/queries/clientpositive/bitvector.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bitvector.q b/ql/src/test/queries/clientpositive/bitvector.q
new file mode 100644
index 0000000..d8669f2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/bitvector.q
@@ -0,0 +1,3 @@
+set hive.mapred.mode=nonstrict;
+
+desc formatted src key;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/queries/clientpositive/fm-sketch.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/fm-sketch.q b/ql/src/test/queries/clientpositive/fm-sketch.q
new file mode 100644
index 0000000..6a65442
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/fm-sketch.q
@@ -0,0 +1,58 @@
+set hive.mapred.mode=nonstrict;
+set hive.stats.ndv.algo=fm;
+
+create table n(key int);
+
+insert overwrite table n select null from src;
+
+explain analyze table n compute statistics for columns;
+
+analyze table n compute statistics for columns;
+
+desc formatted n key;
+
+
+create table i(key int);
+
+insert overwrite table i select key from src;
+
+explain analyze table i compute statistics for columns;
+
+analyze table i compute statistics for columns;
+
+desc formatted i key;
+
+drop table i;
+
+create table i(key double);
+
+insert overwrite table i select key from src;
+
+analyze table i compute statistics for columns;
+
+desc formatted i key;
+
+drop table i;
+
+create table i(key decimal);
+
+insert overwrite table i select key from src;
+
+analyze table i compute statistics for columns;
+
+desc formatted i key;
+
+drop table i;
+
+create table i(key date);
+
+insert into i values ('2012-08-17');
+insert into i values ('2012-08-17');
+insert into i values ('2013-08-17');
+insert into i values ('2012-03-17');
+insert into i values ('2012-05-17');
+
+analyze table i compute statistics for columns;
+
+desc formatted i key;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/queries/clientpositive/hll.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/hll.q b/ql/src/test/queries/clientpositive/hll.q
index edfdce8..91c4e78 100644
--- a/ql/src/test/queries/clientpositive/hll.q
+++ b/ql/src/test/queries/clientpositive/hll.q
@@ -1,5 +1,16 @@
set hive.mapred.mode=nonstrict;
+create table n(key int);
+
+insert overwrite table n select null from src;
+
+explain analyze table n compute statistics for columns;
+
+analyze table n compute statistics for columns;
+
+desc formatted n key;
+
+
create table i(key int);
insert overwrite table i select key from src;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alterColumnStats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alterColumnStats.q.out b/ql/src/test/results/clientpositive/alterColumnStats.q.out
index 519a62a..ea2416f 100644
--- a/ql/src/test/results/clientpositive/alterColumnStats.q.out
+++ b/ql/src/test/results/clientpositive/alterColumnStats.q.out
@@ -142,17 +142,17 @@ PREHOOK: Input: default@p
POSTHOOK: query: desc formatted p c1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@p
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-c1 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c1 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: desc formatted p c2
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@p
POSTHOOK: query: desc formatted p c2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@p
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-c2 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c2 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
index 672bd9f..a315a6b 100644
--- a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
+++ b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
@@ -65,9 +65,9 @@ PREHOOK: Input: default@p
POSTHOOK: query: desc formatted p partition (c=1) a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@p
-# col_name data_type comment
-
-a int from deserializer
+# col_name data_type comment
+
+a int from deserializer
PREHOOK: query: desc formatted p partition (c=1)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@p
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_partition_update_status.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out
index c0d4eee..700f07f 100644
--- a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out
@@ -34,9 +34,11 @@ PREHOOK: Input: default@src_stat_part_one
POSTHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_part_one
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 16 1.72 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
PREHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2')
PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS
POSTHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2')
@@ -47,9 +49,11 @@ PREHOOK: Input: default@src_stat_part_one
POSTHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_part_one
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 11 2.2 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 11 2.2 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
PREHOOK: query: create table src_stat_part_two(key string, value string) partitioned by (px int, py string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
@@ -86,9 +90,11 @@ PREHOOK: Input: default@src_stat_part_two
POSTHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_part_two
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 16 1.72 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
PREHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40')
PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS
POSTHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40')
@@ -99,9 +105,11 @@ PREHOOK: Input: default@src_stat_part_two
POSTHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_part_two
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 30 1.72 40 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 30 1.72 40 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
PREHOOK: query: create database if not exists dummydb
PREHOOK: type: CREATEDATABASE
PREHOOK: Output: database:dummydb
@@ -124,9 +132,11 @@ PREHOOK: Input: default@src_stat_part_two
POSTHOOK: query: describe formatted default.src_stat_part_two PARTITION(px=1, py='a') key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_stat_part_two
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 40 1.72 50 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 40 1.72 50 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
+myn/wfcugOGjfsCYzA0=
+ from deserializer
PREHOOK: query: use default
PREHOOK: type: SWITCHDATABASE
PREHOOK: Input: database:default
[11/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f8b79fe6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f8b79fe6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f8b79fe6
Branch: refs/heads/master
Commit: f8b79fe6d136f348820ce81dc7a6883f1e70dcfc
Parents: 892841a
Author: Pengcheng Xiong <px...@apache.org>
Authored: Tue Jul 25 15:41:14 2017 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Tue Jul 25 15:41:14 2017 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/common/ndv/FMSketch.java | 117 +--
.../ndv/NumDistinctValueEstimatorFactory.java | 30 +-
.../hive/common/ndv/fm/FMSketchUtils.java | 133 ++++
.../hadoop/hive/common/ndv/hll/HyperLogLog.java | 4 +-
.../org/apache/hadoop/hive/conf/HiveConf.java | 4 +-
.../ndv/fm/TestFMSketchSerialization.java | 97 +++
data/conf/hive-site.xml | 4 +
data/conf/llap/hive-site.xml | 4 +
data/conf/perf-reg/hive-site.xml | 5 +
data/conf/tez/hive-site.xml | 5 +
.../upgrade/derby/044-HIVE-16997.derby.sql | 1 +
.../upgrade/derby/hive-schema-3.0.0.derby.sql | 2 +-
.../derby/upgrade-2.3.0-to-3.0.0.derby.sql | 1 +
.../upgrade/mssql/029-HIVE-16997.mssql.sql | 1 +
.../upgrade/mssql/hive-schema-3.0.0.mssql.sql | 1 +
.../mssql/upgrade-2.3.0-to-3.0.0.mssql.sql | 1 +
.../upgrade/mysql/044-HIVE-16997.mysql.sql | 1 +
.../upgrade/mysql/hive-schema-3.0.0.mysql.sql | 1 +
.../mysql/upgrade-2.3.0-to-3.0.0.mysql.sql | 1 +
.../upgrade/oracle/044-HIVE-16997.oracle.sql | 1 +
.../upgrade/oracle/hive-schema-3.0.0.oracle.sql | 1 +
.../oracle/upgrade-2.3.0-to-3.0.0.oracle.sql | 1 +
.../postgres/043-HIVE-16997.postgres.sql | 1 +
.../postgres/hive-schema-3.0.0.postgres.sql | 1 +
.../upgrade-2.3.0-to-3.0.0.postgres.sql | 1 +
.../hive/metastore/MetaStoreDirectSql.java | 98 ++-
.../hadoop/hive/metastore/MetaStoreUtils.java | 49 +-
.../hadoop/hive/metastore/ObjectStore.java | 20 +-
.../hive/metastore/StatObjectConverter.java | 40 +-
.../hive/metastore/cache/CachedStore.java | 50 +-
.../aggr/DateColumnStatsAggregator.java | 358 +++++++++
.../aggr/StringColumnStatsAggregator.java | 303 ++++++++
.../hadoop/hive/metastore/hbase/StatsCache.java | 11 +-
.../stats/BinaryColumnStatsAggregator.java | 2 +-
.../stats/BooleanColumnStatsAggregator.java | 2 +-
.../hbase/stats/ColumnStatsAggregator.java | 4 +-
.../stats/ColumnStatsAggregatorFactory.java | 14 +-
.../stats/DecimalColumnStatsAggregator.java | 35 +-
.../stats/DoubleColumnStatsAggregator.java | 33 +-
.../hbase/stats/IExtrapolatePartStatus.java | 2 +-
.../hbase/stats/LongColumnStatsAggregator.java | 34 +-
.../stats/StringColumnStatsAggregator.java | 122 ---
.../stats/merge/BinaryColumnStatsMerger.java | 2 +-
.../stats/merge/BooleanColumnStatsMerger.java | 2 +-
.../hbase/stats/merge/ColumnStatsMerger.java | 2 +-
.../stats/merge/ColumnStatsMergerFactory.java | 2 +-
.../stats/merge/DateColumnStatsMerger.java | 2 +-
.../stats/merge/DecimalColumnStatsMerger.java | 2 +-
.../stats/merge/DoubleColumnStatsMerger.java | 2 +-
.../stats/merge/LongColumnStatsMerger.java | 2 +-
.../stats/merge/StringColumnStatsMerger.java | 2 +-
.../model/MPartitionColumnStatistics.java | 24 +-
.../metastore/model/MTableColumnStatistics.java | 24 +-
metastore/src/model/package.jdo | 6 +
.../hadoop/hive/metastore/TestOldSchema.java | 229 ++++++
.../hive/metastore/cache/TestCachedStore.java | 156 ++++
...stHBaseAggregateStatsCacheWithBitVector.java | 13 +-
.../TestHBaseAggregateStatsExtrapolation.java | 11 +-
.../TestHBaseAggregateStatsNDVUniformDist.java | 15 +-
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 2 +-
.../formatting/MetaDataFormatUtils.java | 19 +-
.../hadoop/hive/ql/plan/ColStatistics.java | 3 -
.../hadoop/hive/ql/plan/DescTableDesc.java | 4 +-
.../ql/udf/generic/GenericUDAFComputeStats.java | 2 +-
...lter_table_update_status_disable_bitvector.q | 139 ++++
ql/src/test/queries/clientpositive/bitvector.q | 3 +
ql/src/test/queries/clientpositive/fm-sketch.q | 58 ++
ql/src/test/queries/clientpositive/hll.q | 11 +
.../clientpositive/alterColumnStats.q.out | 16 +-
.../clientpositive/alterColumnStatsPart.q.out | 6 +-
.../alter_partition_update_status.q.out | 40 +-
.../alter_table_column_stats.q.out | 764 +++++++++++--------
.../alter_table_update_status.q.out | 394 +++++-----
..._table_update_status_disable_bitvector.q.out | 708 +++++++++++++++++
.../clientpositive/analyze_tbl_part.q.out | 24 +-
.../clientpositive/autoColumnStats_5.q.out | 36 +-
.../clientpositive/autoColumnStats_9.q.out | 58 +-
.../results/clientpositive/avro_decimal.q.out | 9 +-
.../clientpositive/avro_decimal_native.q.out | 9 +-
.../test/results/clientpositive/bitvector.q.out | 31 +
.../test/results/clientpositive/char_udf1.q.out | 2 +-
.../clientpositive/colstats_all_nulls.q.out | 18 +-
...names_with_leading_and_trailing_spaces.q.out | 17 +-
.../column_pruner_multiple_children.q.out | 18 +-
.../clientpositive/columnstats_partlvl.q.out | 46 +-
.../clientpositive/columnstats_partlvl_dp.q.out | 70 +-
.../clientpositive/columnstats_tbllvl.q.out | 78 +-
.../results/clientpositive/compustat_avro.q.out | 16 +-
.../clientpositive/compute_stats_date.q.out | 20 +-
.../clientpositive/compute_stats_decimal.q.out | 2 +-
.../clientpositive/compute_stats_double.q.out | 2 +-
.../clientpositive/compute_stats_long.q.out | 2 +-
.../clientpositive/compute_stats_string.q.out | 2 +-
.../confirm_initial_tbl_stats.q.out | 223 ++++--
.../results/clientpositive/decimal_stats.q.out | 9 +-
.../results/clientpositive/deleteAnalyze.q.out | 8 +-
.../clientpositive/describe_syntax.q.out | 24 +-
.../results/clientpositive/describe_table.q.out | 74 +-
.../display_colstats_tbllvl.q.out | 111 +--
.../encrypted/encryption_move_tbl.q.out | 116 ++-
.../extrapolate_part_stats_full.q.out | 7 +-
.../extrapolate_part_stats_partial.q.out | 22 +-
.../test/results/clientpositive/fm-sketch.q.out | 333 ++++++++
ql/src/test/results/clientpositive/hll.q.out | 181 ++++-
.../clientpositive/llap/autoColumnStats_2.q.out | 233 +++++-
...names_with_leading_and_trailing_spaces.q.out | 17 +-
.../llap/columnstats_part_coltype.q.out | 165 ++--
.../clientpositive/llap/deleteAnalyze.q.out | 8 +-
.../extrapolate_part_stats_partial_ndv.q.out | 154 ++--
.../results/clientpositive/llap/llap_smb.q.out | 4 +-
.../clientpositive/llap/stats_only_null.q.out | 7 +-
.../clientpositive/llap/subquery_scalar.q.out | 10 +-
.../clientpositive/llap/varchar_udf1.q.out | 2 +-
.../clientpositive/llap/vector_udf1.q.out | 2 +-
.../clientpositive/partial_column_stats.q.out | 8 +-
.../partition_coltype_literals.q.out | 48 +-
.../reduceSinkDeDuplication_pRS_key_empty.q.out | 6 +-
.../rename_external_partition_location.q.out | 28 +-
.../rename_table_update_column_stats.q.out | 108 +--
.../spark/avro_decimal_native.q.out | 8 +-
.../spark/spark_dynamic_partition_pruning.q.out | 8 +-
.../clientpositive/spark/stats_only_null.q.out | 6 +-
.../clientpositive/stats_only_null.q.out | 7 +-
.../temp_table_display_colstats_tbllvl.q.out | 99 ++-
.../clientpositive/tez/explainanalyze_5.q.out | 6 +-
.../clientpositive/tez/explainuser_3.q.out | 6 +-
.../results/clientpositive/tunable_ndv.q.out | 68 +-
127 files changed, 5217 insertions(+), 1620 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java b/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java
index e20d299..160ce66 100644
--- a/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java
+++ b/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java
@@ -15,22 +15,28 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.hadoop.hive.common.ndv;
+package org.apache.hadoop.hive.common.ndv.fm;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
import java.util.Random;
import javolution.util.FastBitSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
public class FMSketch implements NumDistinctValueEstimator{
static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName());
+ public static final byte[] MAGIC = new byte[] { 'F', 'M' };
/* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number.
* 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1.
@@ -38,7 +44,7 @@ public class FMSketch implements NumDistinctValueEstimator{
* independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1
* thus introducing errors in the estimates.
*/
- private static final int BIT_VECTOR_SIZE = 31;
+ public static final int BIT_VECTOR_SIZE = 31;
// Refer to Flajolet-Martin'86 for the value of phi
private static final double PHI = 0.77351;
@@ -111,27 +117,6 @@ public class FMSketch implements NumDistinctValueEstimator{
}
}
- public FMSketch(String s, int numBitVectors) {
- this.numBitVectors = numBitVectors;
- FastBitSet bitVectorDeser[] = genBitSet(s, numBitVectors);
- bitVector = new FastBitSet[numBitVectors];
- for(int i=0; i <numBitVectors; i++) {
- bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
- bitVector[i].clear();
- bitVector[i].or(bitVectorDeser[i]);
- }
-
- a = null;
- b = null;
-
- aValue = null;
- bValue = null;
- }
-
- public FMSketch(String s) {
- this(s, StringUtils.countMatches(s, "{"));
- }
-
/**
* Resets a distinctValueEstimator object to its original state.
*/
@@ -145,6 +130,10 @@ public class FMSketch implements NumDistinctValueEstimator{
return bitVector[index];
}
+ public FastBitSet setBitVector(FastBitSet fastBitSet, int index) {
+ return bitVector[index] = fastBitSet;
+ }
+
public int getnumBitVectors() {
return numBitVectors;
}
@@ -168,67 +157,30 @@ public class FMSketch implements NumDistinctValueEstimator{
LOG.debug(t);
}
- /* Serializes a distinctValueEstimator object to Text for transport.
- *
- */
+ @Override
public String serialize() {
- String s = new String();
- for(int i=0; i < numBitVectors; i++) {
- s = s + (bitVector[i].toString());
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ // write bytes to bos ...
+ try {
+ FMSketchUtils.serializeFM(bos, this);
+ String result = Base64.encodeBase64String(bos.toByteArray());
+ bos.close();
+ return result;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
- return s;
}
- /* Deserializes from string to FastBitSet; Creates a NumDistinctValueEstimator object and
- * returns it.
- */
-
- private FastBitSet[] genBitSet(String s, int numBitVectors) {
- FastBitSet[] b = new FastBitSet[numBitVectors];
- for (int j=0; j < numBitVectors; j++) {
- b[j] = new FastBitSet(BIT_VECTOR_SIZE);
- b[j].clear();
- }
-
- int vectorIndex =0;
-
- /* Parse input string to obtain the indexes that are set in the bitvector.
- * When a toString() is called on a FastBitSet object to serialize it, the serialization
- * adds { and } to the beginning and end of the return String.
- * Skip "{", "}", ",", " " in the input string.
- */
- for(int i=1; i < s.length()-1;) {
- char c = s.charAt(i);
- i = i + 1;
-
- // Move on to the next bit vector
- if (c == '}') {
- vectorIndex = vectorIndex + 1;
- }
-
- // Encountered a numeric value; Extract out the entire number
- if (c >= '0' && c <= '9') {
- String t = new String();
- t = t + c;
- c = s.charAt(i);
- i = i + 1;
-
- while (c != ',' && c!= '}') {
- t = t + c;
- c = s.charAt(i);
- i = i + 1;
- }
-
- int bitIndex = Integer.parseInt(t);
- assert(bitIndex >= 0);
- assert(vectorIndex < numBitVectors);
- b[vectorIndex].set(bitIndex);
- if (c == '}') {
- vectorIndex = vectorIndex + 1;
- }
- }
+ @Override
+ public NumDistinctValueEstimator deserialize(String s) {
+ InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s));
+ try {
+ NumDistinctValueEstimator n = FMSketchUtils.deserializeFM(is);
+ is.close();
+ return n;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
- return b;
}
private int generateHash(long v, int hashNum) {
@@ -387,11 +339,6 @@ public class FMSketch implements NumDistinctValueEstimator{
return lengthFor(model, getnumBitVectors());
}
- @Override
- public NumDistinctValueEstimator deserialize(String s) {
- return new FMSketch(s);
- }
-
// the caller needs to gurrantee that they are the same type based on numBitVectors
@Override
public void mergeEstimators(NumDistinctValueEstimator o) {
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java b/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
index e810ac5..6a29859 100644
--- a/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
+++ b/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
@@ -19,6 +19,14 @@
package org.apache.hadoop.hive.common.ndv;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.hive.common.ndv.fm.FMSketch;
+import org.apache.hadoop.hive.common.ndv.fm.FMSketchUtils;
import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
public class NumDistinctValueEstimatorFactory {
@@ -26,11 +34,25 @@ public class NumDistinctValueEstimatorFactory {
private NumDistinctValueEstimatorFactory() {
}
+ private static boolean isFMSketch(String s) throws IOException {
+ InputStream in = new ByteArrayInputStream(Base64.decodeBase64(s));
+ byte[] magic = new byte[2];
+ magic[0] = (byte) in.read();
+ magic[1] = (byte) in.read();
+ in.close();
+ return Arrays.equals(magic, FMSketchUtils.MAGIC);
+ }
+
public static NumDistinctValueEstimator getNumDistinctValueEstimator(String s) {
- if (s.startsWith("{")) {
- return new FMSketch(s);
- } else {
- return HyperLogLog.builder().build().deserialize(s);
+ // Right now we assume only FM and HLL are available.
+ try {
+ if (isFMSketch(s)) {
+ return FMSketchUtils.deserializeFM(s);
+ } else {
+ return HyperLogLog.builder().build().deserialize(s);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
new file mode 100644
index 0000000..b6f7fdd
--- /dev/null
+++ b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.ndv.fm;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+
+import javolution.util.FastBitSet;
+
+import org.apache.commons.codec.binary.Base64;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class FMSketchUtils {
+
+ static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName());
+ public static final byte[] MAGIC = new byte[] { 'F', 'M' };
+
+ /*
+ * Serializes a distinctValueEstimator object to Text for transport.
+ *
+ * <b>4 byte header</b> is encoded like below 2 bytes - FM magic string to
+ * identify serialized stream 2 bytes - numbitvectors because
+ * BIT_VECTOR_SIZE=31, 4 bytes are enough to hold positions of 0-31
+ */
+ public static void serializeFM(OutputStream out, FMSketch fm) throws IOException {
+ out.write(MAGIC);
+
+ // max of numBitVectors = 1024, 2 bytes is enough.
+ byte[] nbv = new byte[2];
+ nbv[0] = (byte) fm.getnumBitVectors();
+ nbv[1] = (byte) (fm.getnumBitVectors() >>> 8);
+
+ out.write(nbv);
+
+ // original toString takes too much space
+ // we compress a fastbitset to 4 bytes
+ for (int i = 0; i < fm.getnumBitVectors(); i++) {
+ writeBitVector(out, fm.getBitVector(i));
+ }
+ }
+
+ // BIT_VECTOR_SIZE is 31, we can use 32 bits, i.e., 4 bytes to represent a
+ // FastBitSet, rather than using 31 integers.
+ private static void writeBitVector(OutputStream out, FastBitSet bit) throws IOException {
+ int num = 0;
+ for (int pos = 0; pos < FMSketch.BIT_VECTOR_SIZE; pos++) {
+ if (bit.get(pos)) {
+ num |= 1 << pos;
+ }
+ }
+ byte[] i = new byte[4];
+ for (int j = 0; j < 4; j++) {
+ i[j] = (byte) ((num >>> (8 * j)) & 0xff);
+ }
+ out.write(i);
+ }
+
+ /*
+ * Deserializes from string to FastBitSet; Creates a NumDistinctValueEstimator
+ * object and returns it.
+ */
+ public static FMSketch deserializeFM(String s) throws IOException {
+ InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s));
+ try {
+ FMSketch sketch = deserializeFM(is);
+ is.close();
+ return sketch;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static FMSketch deserializeFM(InputStream in) throws IOException {
+ checkMagicString(in);
+
+ byte[] nbv = new byte[2];
+ nbv[0] = (byte) in.read();
+ nbv[1] = (byte) in.read();
+
+ int numBitVectors = 0;
+ numBitVectors |= (nbv[0] & 0xff);
+ numBitVectors |= ((nbv[1] & 0xff) << 8);
+
+ FMSketch sketch = new FMSketch(numBitVectors);
+ for (int n = 0; n < numBitVectors; n++) {
+ sketch.setBitVector(readBitVector(in), n);
+ }
+ return sketch;
+ }
+
+ private static FastBitSet readBitVector(InputStream in) throws IOException {
+ FastBitSet fastBitSet = new FastBitSet();
+ fastBitSet.clear();
+ for (int i = 0; i < 4; i++) {
+ byte b = (byte) in.read();
+ for (int j = 0; j < 8; j++) {
+ if ((b & (1 << j)) != 0) {
+ fastBitSet.set(j + 8 * i);
+ }
+ }
+ }
+ return fastBitSet;
+ }
+
+ private static void checkMagicString(InputStream in) throws IOException {
+ byte[] magic = new byte[2];
+ magic[0] = (byte) in.read();
+ magic[1] = (byte) in.read();
+
+ if (!Arrays.equals(magic, MAGIC)) {
+ throw new IllegalArgumentException("The input stream is not a FMSketch stream.");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index d195546..182560a 100644
--- a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -277,7 +277,9 @@ public class HyperLogLog implements NumDistinctValueEstimator{
}
public long estimateNumDistinctValues() {
- return count();
+ // FMSketch treats the ndv of all nulls as 1 but hll treates the ndv as 0.
+ // In order to get rid of divide by 0 problem, we follow FMSketch
+ return count() > 0 ? count() : 1;
}
public long count() {
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index dd9ad71..05f6cc9 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1737,7 +1737,9 @@ public class HiveConf extends Configuration {
"Whether column accesses are tracked in the QueryPlan.\n" +
"This is useful to identify how tables are accessed and to determine if there are wasted columns that can be trimmed."),
HIVE_STATS_NDV_ALGO("hive.stats.ndv.algo", "hll", new PatternSet("hll", "fm"),
- "hll and fm stand for HyperLogLog and FM-sketch, respectively for computing ndv."),
+ "hll and fm stand for HyperLogLog and FM-sketch, respectively for computing ndv."),
+ HIVE_STATS_FETCH_BITVECTOR("hive.stats.fetch.bitvector", false,
+ "Whether we fetch bitvector when we compute ndv. Users can turn it off if they want to use old schema"),
// standard error allowed for ndv estimates for FM-sketch. A lower value indicates higher accuracy and a
// higher compute cost.
HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0,
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
----------------------------------------------------------------------
diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
new file mode 100644
index 0000000..74fdf58
--- /dev/null
+++ b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.ndv.fm;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import javolution.util.FastBitSet;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.junit.Test;
+
+public class TestFMSketchSerialization {
+
+ private FastBitSet[] deserialize(String s, int numBitVectors) {
+ FastBitSet[] b = new FastBitSet[numBitVectors];
+ for (int j = 0; j < numBitVectors; j++) {
+ b[j] = new FastBitSet(FMSketch.BIT_VECTOR_SIZE);
+ b[j].clear();
+ }
+
+ int vectorIndex = 0;
+
+ /*
+ * Parse input string to obtain the indexes that are set in the bitvector.
+ * When a toString() is called on a FastBitSet object to serialize it, the
+ * serialization adds { and } to the beginning and end of the return String.
+ * Skip "{", "}", ",", " " in the input string.
+ */
+ for (int i = 1; i < s.length() - 1;) {
+ char c = s.charAt(i);
+ i = i + 1;
+
+ // Move on to the next bit vector
+ if (c == '}') {
+ vectorIndex = vectorIndex + 1;
+ }
+
+ // Encountered a numeric value; Extract out the entire number
+ if (c >= '0' && c <= '9') {
+ String t = new String();
+ t = t + c;
+ c = s.charAt(i);
+ i = i + 1;
+
+ while (c != ',' && c != '}') {
+ t = t + c;
+ c = s.charAt(i);
+ i = i + 1;
+ }
+
+ int bitIndex = Integer.parseInt(t);
+ assert (bitIndex >= 0);
+ assert (vectorIndex < numBitVectors);
+ b[vectorIndex].set(bitIndex);
+ if (c == '}') {
+ vectorIndex = vectorIndex + 1;
+ }
+ }
+ }
+ return b;
+ }
+
+ @Test
+ public void testSerDe() throws IOException {
+ String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}";
+ FastBitSet[] fastBitSet = deserialize(bitVectors, 16);
+ FMSketch sketch = new FMSketch(16);
+ for (int i = 0; i < 16; i++) {
+ sketch.setBitVector(fastBitSet[i], i);
+ }
+ assertEquals(sketch.estimateNumDistinctValues(), 3);
+ String s = sketch.serialize();
+ FMSketch newSketch = (FMSketch) NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(s);
+ sketch.equals(newSketch);
+ assertEquals(newSketch.estimateNumDistinctValues(), 3);
+ assertEquals(newSketch.serialize(), s);
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/data/conf/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index 62364fe..a205b8c 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -314,5 +314,9 @@
<value>true</value>
</property>
+<property>
+ <name>hive.stats.fetch.bitvector</name>
+ <value>true</value>
+</property>
</configuration>
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/data/conf/llap/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml
index cac5a3b..870b584 100644
--- a/data/conf/llap/hive-site.xml
+++ b/data/conf/llap/hive-site.xml
@@ -333,5 +333,9 @@
<value>4</value>
<description> </description>
</property>
+<property>
+ <name>hive.stats.fetch.bitvector</name>
+ <value>true</value>
+</property>
</configuration>
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/data/conf/perf-reg/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/perf-reg/hive-site.xml b/data/conf/perf-reg/hive-site.xml
index 012369f..331a500 100644
--- a/data/conf/perf-reg/hive-site.xml
+++ b/data/conf/perf-reg/hive-site.xml
@@ -282,4 +282,9 @@
<value>true</value>
</property>
+<property>
+ <name>hive.stats.fetch.bitvector</name>
+ <value>true</value>
+</property>
+
</configuration>
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/data/conf/tez/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
index 28abc2d..35e8c99 100644
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@ -278,4 +278,9 @@
<value>true</value>
</property>
+<property>
+ <name>hive.stats.fetch.bitvector</name>
+ <value>true</value>
+</property>
+
</configuration>
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql b/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql
new file mode 100644
index 0000000..2c2177b
--- /dev/null
+++ b/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql
@@ -0,0 +1 @@
+ALTER TABLE "APP"."PART_COL_STATS" ADD COLUMN "BIT_VECTOR" BLOB;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql b/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql
index a9a5329..f4cbba6 100644
--- a/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql
@@ -94,7 +94,7 @@ CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as
CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767));
-CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(256) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(767) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(256) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(767) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "BIT_VECTOR" BLOB, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255));
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql b/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql
index 30513dc..01b6f90 100644
--- a/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql
@@ -2,5 +2,6 @@
RUN '041-HIVE-16556.derby.sql';
RUN '042-HIVE-16575.derby.sql';
RUN '043-HIVE-16922.derby.sql';
+RUN '044-HIVE-16997.derby.sql';
UPDATE "APP".VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql b/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql
new file mode 100644
index 0000000..1882c59
--- /dev/null
+++ b/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql
@@ -0,0 +1 @@
+ALTER TABLE PART_COL_STATS ADD BIT_VECTOR VARBINARY(MAX);
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql
index 1cfe2d1..fa8fc6e 100644
--- a/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql
@@ -87,6 +87,7 @@ CREATE TABLE PART_COL_STATS
LONG_LOW_VALUE bigint NULL,
MAX_COL_LEN bigint NULL,
NUM_DISTINCTS bigint NULL,
+ BIT_VECTOR varbinary(max) NULL,
NUM_FALSES bigint NULL,
NUM_NULLS bigint NOT NULL,
NUM_TRUES bigint NULL,
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql
index 5683254..21d62ae 100644
--- a/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql
@@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS MESSAGE;
:r 026-HIVE-16556.mssql.sql
:r 027-HIVE-16575.mssql.sql
:r 028-HIVE-16922.mssql.sql
+:r 029-HIVE-16997.mssql.sql
UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS MESSAGE;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql b/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql
new file mode 100644
index 0000000..4954b2e
--- /dev/null
+++ b/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql
@@ -0,0 +1 @@
+ALTER TABLE PART_COL_STATS ADD COLUMN BIT_VECTOR BLOB;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql
index 97d881f..31963d0 100644
--- a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql
@@ -690,6 +690,7 @@ CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
`BIG_DECIMAL_HIGH_VALUE` varchar(4000) CHARACTER SET latin1 COLLATE latin1_bin,
`NUM_NULLS` bigint(20) NOT NULL,
`NUM_DISTINCTS` bigint(20),
+ `BIT_VECTOR` blob,
`AVG_COL_LEN` double(53,4),
`MAX_COL_LEN` bigint(20),
`NUM_TRUES` bigint(20),
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql
index ba62939..9cd3a62 100644
--- a/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql
@@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS ' ';
SOURCE 041-HIVE-16556.mysql.sql;
SOURCE 042-HIVE-16575.mysql.sql;
SOURCE 043-HIVE-16922.mysql.sql;
+SOURCE 044-HIVE-16997.mysql.sql;
UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS ' ';
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql b/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql
new file mode 100644
index 0000000..44e5fa3
--- /dev/null
+++ b/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql
@@ -0,0 +1 @@
+ALTER TABLE PART_COL_STATS ADD BIT_VECTOR BLOB NULL;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql
index 8fdb552..81e4208 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql
@@ -515,6 +515,7 @@ CREATE TABLE PART_COL_STATS (
BIG_DECIMAL_HIGH_VALUE VARCHAR2(4000),
NUM_NULLS NUMBER NOT NULL,
NUM_DISTINCTS NUMBER,
+ BIT_VECTOR BLOB,
AVG_COL_LEN NUMBER,
MAX_COL_LEN NUMBER,
NUM_TRUES NUMBER,
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql
index 0a70d47..6a26649 100644
--- a/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql
@@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS Status from dual;
@041-HIVE-16556.oracle.sql;
@042-HIVE-16575.oracle.sql;
@043-HIVE-16922.oracle.sql;
+@044-HIVE-16997.oracle.sql;
UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS Status from dual;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql b/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql
new file mode 100644
index 0000000..bee8c44
--- /dev/null
+++ b/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql
@@ -0,0 +1 @@
+ALTER TABLE "PART_COL_STATS" ADD COLUMN "BIT_VECTOR" BYTEA;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql
index 1cdeb6b..5cb5cb0 100644
--- a/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql
@@ -534,6 +534,7 @@ CREATE TABLE "PART_COL_STATS" (
"BIG_DECIMAL_HIGH_VALUE" character varying(4000) DEFAULT NULL::character varying,
"NUM_NULLS" bigint NOT NULL,
"NUM_DISTINCTS" bigint,
+ "BIT_VECTOR" bytea,
"AVG_COL_LEN" double precision,
"MAX_COL_LEN" bigint,
"NUM_TRUES" bigint,
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql
index c44dd06..ee5a673 100644
--- a/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql
@@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0';
\i 040-HIVE-16556.postgres.sql;
\i 041-HIVE-16575.postgres.sql;
\i 042-HIVE-16922.postgres.sql;
+\i 043-HIVE-16997.postgres.sql;
UPDATE "VERSION" SET "SCHEMA_VERSION"='3.0.0', "VERSION_COMMENT"='Hive release version 3.0.0' where "VER_ID"=1;
SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0';
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index a960b2d..73754ff 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.metastore;
import static org.apache.commons.lang.StringUtils.join;
import static org.apache.commons.lang.StringUtils.repeat;
+import java.sql.Blob;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.Statement;
@@ -33,6 +34,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.TreeMap;
import javax.jdo.PersistenceManager;
@@ -64,6 +66,8 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator;
+import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory;
import org.apache.hadoop.hive.metastore.model.MConstraint;
import org.apache.hadoop.hive.metastore.model.MDatabase;
import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics;
@@ -941,6 +945,24 @@ class MetaStoreDirectSql {
}
}
+ static String extractSqlBlob(Object value) throws MetaException {
+ if (value == null)
+ return null;
+ if (value instanceof Blob) {
+ try {
+ // getBytes function says: pos the ordinal position of the first byte in
+ // the BLOB value to be extracted; the first byte is at position 1
+ return new String(((Blob) value).getBytes(1, (int) ((Blob) value).length()));
+ } catch (SQLException e) {
+ throw new MetaException("Encounter error while processing blob.");
+ }
+ } else {
+ // this may happen when enablebitvector is false
+ LOG.debug("Expected blob type but got " + value.getClass().getName());
+ return null;
+ }
+ }
+
private static String trimCommaList(StringBuilder sb) {
if (sb.length() > 0) {
sb.setLength(sb.length() - 1);
@@ -1221,12 +1243,12 @@ class MetaStoreDirectSql {
* @throws MetaException
*/
public ColumnStatistics getTableStats(final String dbName, final String tableName,
- List<String> colNames) throws MetaException {
+ List<String> colNames, boolean enableBitVector) throws MetaException {
if (colNames == null || colNames.isEmpty()) {
return null;
}
final boolean doTrace = LOG.isDebugEnabled();
- final String queryText0 = "select " + STATS_COLLIST + " from " + TAB_COL_STATS + " "
+ final String queryText0 = "select " + getStatsList(enableBitVector) + " from " + TAB_COL_STATS + " "
+ " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in (";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
public List<Object[]> run(List<String> input) throws MetaException {
@@ -1260,8 +1282,8 @@ class MetaStoreDirectSql {
}
public AggrStats aggrColStatsForPartitions(String dbName, String tableName,
- List<String> partNames, List<String> colNames, boolean useDensityFunctionForNDVEstimation, double ndvTuner)
- throws MetaException {
+ List<String> partNames, List<String> colNames, boolean useDensityFunctionForNDVEstimation,
+ double ndvTuner, boolean enableBitVector) throws MetaException {
if (colNames.isEmpty() || partNames.isEmpty()) {
LOG.debug("Columns is empty or partNames is empty : Short-circuiting stats eval");
return new AggrStats(Collections.<ColumnStatisticsObj>emptyList(), 0); // Nothing to aggregate
@@ -1295,7 +1317,7 @@ class MetaStoreDirectSql {
// Read aggregated stats for one column
colStatsAggrFromDB =
columnStatisticsObjForPartitions(dbName, tableName, partNames, colNamesForDB,
- partsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+ partsFound, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector);
if (!colStatsAggrFromDB.isEmpty()) {
ColumnStatisticsObj colStatsAggr = colStatsAggrFromDB.get(0);
colStatsList.add(colStatsAggr);
@@ -1308,7 +1330,7 @@ class MetaStoreDirectSql {
partsFound = partsFoundForPartitions(dbName, tableName, partNames, colNames);
colStatsList =
columnStatisticsObjForPartitions(dbName, tableName, partNames, colNames, partsFound,
- useDensityFunctionForNDVEstimation, ndvTuner);
+ useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector);
}
LOG.info("useDensityFunctionForNDVEstimation = " + useDensityFunctionForNDVEstimation
+ "\npartsFound = " + partsFound + "\nColumnStatisticsObj = "
@@ -1371,14 +1393,14 @@ class MetaStoreDirectSql {
private List<ColumnStatisticsObj> columnStatisticsObjForPartitions(final String dbName,
final String tableName, final List<String> partNames, List<String> colNames, long partsFound,
- final boolean useDensityFunctionForNDVEstimation, final double ndvTuner) throws MetaException {
+ final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector) throws MetaException {
final boolean areAllPartsFound = (partsFound == partNames.size());
return runBatched(colNames, new Batchable<String, ColumnStatisticsObj>() {
public List<ColumnStatisticsObj> run(final List<String> inputColNames) throws MetaException {
return runBatched(partNames, new Batchable<String, ColumnStatisticsObj>() {
public List<ColumnStatisticsObj> run(List<String> inputPartNames) throws MetaException {
return columnStatisticsObjForPartitionsBatch(dbName, tableName, inputPartNames,
- inputColNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+ inputColNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector);
}
});
}
@@ -1388,14 +1410,10 @@ class MetaStoreDirectSql {
// Get aggregated column stats for a table per partition for all columns in the partition
// This is primarily used to populate stats object when using CachedStore (Check CachedStore#prewarm)
public Map<String, List<ColumnStatisticsObj>> getColStatsForTablePartitions(String dbName,
- String tblName) throws MetaException {
- String queryText =
- "select \"PARTITION_NAME\", \"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", "
- + "\"LONG_HIGH_VALUE\", \"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", "
- + "\"BIG_DECIMAL_LOW_VALUE\", \"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", "
- + "\"NUM_DISTINCTS\", \"AVG_COL_LEN\", \"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\""
- + " from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?"
- + " order by \"PARTITION_NAME\"";
+ String tblName, boolean enableBitVector) throws MetaException {
+ String queryText = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from "
+ + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?"
+ + " order by \"PARTITION_NAME\"";
long start = 0;
long end = 0;
Query query = null;
@@ -1446,6 +1464,28 @@ class MetaStoreDirectSql {
/** Should be called with the list short enough to not trip up Oracle/etc. */
private List<ColumnStatisticsObj> columnStatisticsObjForPartitionsBatch(String dbName,
String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound,
+ boolean useDensityFunctionForNDVEstimation, double ndvTuner, boolean enableBitVector) throws MetaException {
+ if(enableBitVector) {
+ return aggrStatsUseJava(dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+ }
+ else {
+ return aggrStatsUseDB(dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+ }
+ }
+
+ private List<ColumnStatisticsObj> aggrStatsUseJava(String dbName, String tableName,
+ List<String> partNames, List<String> colNames, boolean areAllPartsFound,
+ boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
+ // 1. get all the stats for colNames in partNames;
+ List<ColumnStatistics> partStats = getPartitionStats(dbName, tableName, partNames, colNames,
+ true);
+ // 2. use util function to aggr stats
+ return MetaStoreUtils.aggrPartitionStats(partStats, dbName, tableName, partNames, colNames,
+ areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+ }
+
+ private List<ColumnStatisticsObj> aggrStatsUseDB(String dbName,
+ String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound,
boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
// TODO: all the extrapolation logic should be moved out of this class,
// only mechanical data retrieval should remain here.
@@ -1717,10 +1757,10 @@ class MetaStoreDirectSql {
ColumnStatisticsData data = new ColumnStatisticsData();
ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
- declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
+ declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], bitVector = row[i++],
avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
- llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
+ llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, bitVector, avglen, maxlen, trues, falses);
return cso;
}
@@ -1753,14 +1793,14 @@ class MetaStoreDirectSql {
}
public List<ColumnStatistics> getPartitionStats(final String dbName, final String tableName,
- final List<String> partNames, List<String> colNames) throws MetaException {
+ final List<String> partNames, List<String> colNames, boolean enableBitVector) throws MetaException {
if (colNames.isEmpty() || partNames.isEmpty()) {
return Collections.emptyList();
}
final boolean doTrace = LOG.isDebugEnabled();
- final String queryText0 = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from "
- + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\""
- + " in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
+ final String queryText0 = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from "
+ + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\""
+ + " in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
public List<Object[]> run(final List<String> inputColNames) throws MetaException {
Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {
@@ -1812,11 +1852,13 @@ class MetaStoreDirectSql {
}
/** The common query part for table and partition stats */
- private static final String STATS_COLLIST =
- "\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", "
- + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"BIG_DECIMAL_LOW_VALUE\", "
- + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"AVG_COL_LEN\", "
- + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" ";
+ private final String getStatsList(boolean enableBitVector) {
+ return "\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", "
+ + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"BIG_DECIMAL_LOW_VALUE\", "
+ + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", "
+ + (enableBitVector ? "\"BIT_VECTOR\", " : "\'\', ") + "\"AVG_COL_LEN\", "
+ + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" ";
+ }
private ColumnStatistics makeColumnStats(
List<Object[]> list, ColumnStatisticsDesc csd, int offset) throws MetaException {
@@ -1826,7 +1868,7 @@ class MetaStoreDirectSql {
for (Object[] row : list) {
// LastAnalyzed is stored per column but thrift has it per several;
// get the lowest for now as nobody actually uses this field.
- Object laObj = row[offset + 14];
+ Object laObj = row[offset + 15];
if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > extractSqlLong(laObj))) {
csd.setLastAnalyzed(extractSqlLong(laObj));
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index b52c94c..edfbf3a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -71,8 +71,10 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
-import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger;
-import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory;
+import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator;
+import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory;
+import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger;
+import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
@@ -1936,7 +1938,7 @@ public class MetaStoreUtils {
}
return metaException;
}
-
+
public static List<String> getColumnNames(List<FieldSchema> schema) {
List<String> cols = new ArrayList<>(schema.size());
for (FieldSchema fs : schema) {
@@ -1945,4 +1947,45 @@ public class MetaStoreUtils {
return cols;
}
+ // given a list of partStats, this function will give you an aggr stats
+ public static List<ColumnStatisticsObj> aggrPartitionStats(List<ColumnStatistics> partStats,
+ String dbName, String tableName, List<String> partNames, List<String> colNames,
+ boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner)
+ throws MetaException {
+ // 1. group by the stats by colNames
+ // map the colName to List<ColumnStatistics>
+ Map<String, List<ColumnStatistics>> map = new HashMap<>();
+ for (ColumnStatistics css : partStats) {
+ List<ColumnStatisticsObj> objs = css.getStatsObj();
+ for (ColumnStatisticsObj obj : objs) {
+ List<ColumnStatisticsObj> singleObj = new ArrayList<>();
+ singleObj.add(obj);
+ ColumnStatistics singleCS = new ColumnStatistics(css.getStatsDesc(), singleObj);
+ if (!map.containsKey(obj.getColName())) {
+ map.put(obj.getColName(), new ArrayList<ColumnStatistics>());
+ }
+ map.get(obj.getColName()).add(singleCS);
+ }
+ }
+ return aggrPartitionStats(map,dbName,tableName,partNames,colNames,areAllPartsFound,useDensityFunctionForNDVEstimation, ndvTuner);
+ }
+
+ public static List<ColumnStatisticsObj> aggrPartitionStats(
+ Map<String, List<ColumnStatistics>> map, String dbName, String tableName,
+ List<String> partNames, List<String> colNames, boolean areAllPartsFound,
+ boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
+ List<ColumnStatisticsObj> colStats = new ArrayList<>();
+ // 2. aggr stats for each colName
+ // TODO: thread pool can be used to speed up the process
+ for (Entry<String, List<ColumnStatistics>> entry : map.entrySet()) {
+ List<ColumnStatistics> css = entry.getValue();
+ ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css
+ .iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(),
+ useDensityFunctionForNDVEstimation, ndvTuner);
+ ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), partNames, css);
+ colStats.add(statsObj);
+ }
+ return colStats;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index db4ec91..eea12291 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -7193,11 +7193,13 @@ public class ObjectStore implements RawStore, Configurable {
protected ColumnStatistics getTableColumnStatisticsInternal(
String dbName, String tableName, final List<String> colNames, boolean allowSql,
boolean allowJdo) throws MetaException, NoSuchObjectException {
+ final boolean enableBitVector = HiveConf.getBoolVar(getConf(),
+ HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR);
return new GetStatHelper(HiveStringUtils.normalizeIdentifier(dbName),
HiveStringUtils.normalizeIdentifier(tableName), allowSql, allowJdo) {
@Override
protected ColumnStatistics getSqlResult(GetHelper<ColumnStatistics> ctx) throws MetaException {
- return directSql.getTableStats(dbName, tblName, colNames);
+ return directSql.getTableStats(dbName, tblName, colNames, enableBitVector);
}
@Override
protected ColumnStatistics getJdoResult(
@@ -7215,7 +7217,7 @@ public class ObjectStore implements RawStore, Configurable {
if (desc.getLastAnalyzed() > mStat.getLastAnalyzed()) {
desc.setLastAnalyzed(mStat.getLastAnalyzed());
}
- statObjs.add(StatObjectConverter.getTableColumnStatisticsObj(mStat));
+ statObjs.add(StatObjectConverter.getTableColumnStatisticsObj(mStat, enableBitVector));
Deadline.checkTimeout();
}
return new ColumnStatistics(desc, statObjs);
@@ -7236,11 +7238,13 @@ public class ObjectStore implements RawStore, Configurable {
protected List<ColumnStatistics> getPartitionColumnStatisticsInternal(
String dbName, String tableName, final List<String> partNames, final List<String> colNames,
boolean allowSql, boolean allowJdo) throws MetaException, NoSuchObjectException {
+ final boolean enableBitVector = HiveConf.getBoolVar(getConf(),
+ HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR);
return new GetListHelper<ColumnStatistics>(dbName, tableName, allowSql, allowJdo) {
@Override
protected List<ColumnStatistics> getSqlResult(
GetHelper<List<ColumnStatistics>> ctx) throws MetaException {
- return directSql.getPartitionStats(dbName, tblName, partNames, colNames);
+ return directSql.getPartitionStats(dbName, tblName, partNames, colNames, enableBitVector);
}
@Override
protected List<ColumnStatistics> getJdoResult(
@@ -7268,7 +7272,7 @@ public class ObjectStore implements RawStore, Configurable {
csd = StatObjectConverter.getPartitionColumnStatisticsDesc(mStatsObj);
curList = new ArrayList<ColumnStatisticsObj>(colNames.size());
}
- curList.add(StatObjectConverter.getPartitionColumnStatisticsObj(mStatsObj));
+ curList.add(StatObjectConverter.getPartitionColumnStatisticsObj(mStatsObj, enableBitVector));
lastPartName = partName;
Deadline.checkTimeout();
}
@@ -7288,12 +7292,14 @@ public class ObjectStore implements RawStore, Configurable {
HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
final double ndvTuner = HiveConf.getFloatVar(getConf(),
HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER);
+ final boolean enableBitVector = HiveConf.getBoolVar(getConf(),
+ HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR);
return new GetHelper<AggrStats>(dbName, tblName, true, false) {
@Override
protected AggrStats getSqlResult(GetHelper<AggrStats> ctx)
throws MetaException {
return directSql.aggrColStatsForPartitions(dbName, tblName, partNames,
- colNames, useDensityFunctionForNDVEstimation, ndvTuner);
+ colNames, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector);
}
@Override
protected AggrStats getJdoResult(GetHelper<AggrStats> ctx)
@@ -7313,11 +7319,13 @@ public class ObjectStore implements RawStore, Configurable {
@Override
public Map<String, List<ColumnStatisticsObj>> getColStatsForTablePartitions(String dbName,
String tableName) throws MetaException, NoSuchObjectException {
+ final boolean enableBitVector = HiveConf.getBoolVar(getConf(),
+ HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR);
return new GetHelper<Map<String, List<ColumnStatisticsObj>>>(dbName, tableName, true, false) {
@Override
protected Map<String, List<ColumnStatisticsObj>> getSqlResult(
GetHelper<Map<String, List<ColumnStatisticsObj>>> ctx) throws MetaException {
- return directSql.getColStatsForTablePartitions(dbName, tblName);
+ return directSql.getColStatsForTablePartitions(dbName, tblName, enableBitVector);
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
index 2dc2804..d53ea4c 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
@@ -76,6 +76,7 @@ public class StatObjectConverter {
mColStats.setLongStats(
longStats.isSetNumNulls() ? longStats.getNumNulls() : null,
longStats.isSetNumDVs() ? longStats.getNumDVs() : null,
+ longStats.isSetBitVectors() ? longStats.getBitVectors().getBytes() : null,
longStats.isSetLowValue() ? longStats.getLowValue() : null,
longStats.isSetHighValue() ? longStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
@@ -83,6 +84,7 @@ public class StatObjectConverter {
mColStats.setDoubleStats(
doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null,
doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null,
+ doubleStats.isSetBitVectors() ? doubleStats.getBitVectors().getBytes() : null,
doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null,
doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
@@ -92,12 +94,14 @@ public class StatObjectConverter {
mColStats.setDecimalStats(
decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null,
decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null,
+ decimalStats.isSetBitVectors() ? decimalStats.getBitVectors().getBytes() : null,
low, high);
} else if (statsObj.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
mColStats.setStringStats(
stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null,
stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null,
+ stringStats.isSetBitVectors() ? stringStats.getBitVectors().getBytes() : null,
stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null,
stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
@@ -111,6 +115,7 @@ public class StatObjectConverter {
mColStats.setDateStats(
dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null,
dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null,
+ dateStats.isSetBitVectors() ? dateStats.getBitVectors().getBytes() : null,
dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null,
dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
}
@@ -146,6 +151,9 @@ public class StatObjectConverter {
if (mStatsObj.getNumDVs() != null) {
oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
}
+ if (mStatsObj.getBitVector() != null) {
+ oldStatsObj.setBitVector(mStatsObj.getBitVector());
+ }
if (mStatsObj.getNumFalses() != null) {
oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
}
@@ -188,6 +196,9 @@ public class StatObjectConverter {
if (mStatsObj.getNumDVs() != null) {
oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
}
+ if (mStatsObj.getBitVector() != null) {
+ oldStatsObj.setBitVector(mStatsObj.getBitVector());
+ }
if (mStatsObj.getNumFalses() != null) {
oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
}
@@ -200,7 +211,7 @@ public class StatObjectConverter {
}
public static ColumnStatisticsObj getTableColumnStatisticsObj(
- MTableColumnStatistics mStatsObj) {
+ MTableColumnStatistics mStatsObj, boolean enableBitVector) {
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColType(mStatsObj.getColType());
statsObj.setColName(mStatsObj.getColName());
@@ -220,6 +231,7 @@ public class StatObjectConverter {
stringStats.setAvgColLen(mStatsObj.getAvgColLen());
stringStats.setMaxColLen(mStatsObj.getMaxColLen());
stringStats.setNumDVs(mStatsObj.getNumDVs());
+ stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
@@ -241,6 +253,7 @@ public class StatObjectConverter {
longStats.setLowValue(longLowValue);
}
longStats.setNumDVs(mStatsObj.getNumDVs());
+ longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setLongStats(longStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
@@ -254,6 +267,7 @@ public class StatObjectConverter {
doubleStats.setLowValue(doubleLowValue);
}
doubleStats.setNumDVs(mStatsObj.getNumDVs());
+ doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
@@ -267,6 +281,7 @@ public class StatObjectConverter {
decimalStats.setLowValue(createThriftDecimal(decimalLowValue));
}
decimalStats.setNumDVs(mStatsObj.getNumDVs());
+ decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setDecimalStats(decimalStats);
} else if (colType.equals("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
@@ -280,6 +295,7 @@ public class StatObjectConverter {
dateStats.setLowValue(new Date(lowValue));
}
dateStats.setNumDVs(mStatsObj.getNumDVs());
+ dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setDateStats(dateStats);
}
statsObj.setStatsData(colStatsData);
@@ -323,6 +339,7 @@ public class StatObjectConverter {
mColStats.setLongStats(
longStats.isSetNumNulls() ? longStats.getNumNulls() : null,
longStats.isSetNumDVs() ? longStats.getNumDVs() : null,
+ longStats.isSetBitVectors() ? longStats.getBitVectors().getBytes() : null,
longStats.isSetLowValue() ? longStats.getLowValue() : null,
longStats.isSetHighValue() ? longStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
@@ -330,6 +347,7 @@ public class StatObjectConverter {
mColStats.setDoubleStats(
doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null,
doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null,
+ doubleStats.isSetBitVectors() ? doubleStats.getBitVectors().getBytes() : null,
doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null,
doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
@@ -339,12 +357,14 @@ public class StatObjectConverter {
mColStats.setDecimalStats(
decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null,
decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null,
+ decimalStats.isSetBitVectors() ? decimalStats.getBitVectors().getBytes() : null,
low, high);
} else if (statsObj.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
mColStats.setStringStats(
stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null,
stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null,
+ stringStats.isSetBitVectors() ? stringStats.getBitVectors().getBytes() : null,
stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null,
stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
@@ -358,6 +378,7 @@ public class StatObjectConverter {
mColStats.setDateStats(
dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null,
dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null,
+ dateStats.isSetBitVectors() ? dateStats.getBitVectors().getBytes() : null,
dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null,
dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
}
@@ -365,7 +386,7 @@ public class StatObjectConverter {
}
public static ColumnStatisticsObj getPartitionColumnStatisticsObj(
- MPartitionColumnStatistics mStatsObj) {
+ MPartitionColumnStatistics mStatsObj, boolean enableBitVector) {
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColType(mStatsObj.getColType());
statsObj.setColName(mStatsObj.getColName());
@@ -385,6 +406,7 @@ public class StatObjectConverter {
stringStats.setAvgColLen(mStatsObj.getAvgColLen());
stringStats.setMaxColLen(mStatsObj.getMaxColLen());
stringStats.setNumDVs(mStatsObj.getNumDVs());
+ stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
@@ -404,6 +426,7 @@ public class StatObjectConverter {
longStats.setLowValue(mStatsObj.getLongLowValue());
}
longStats.setNumDVs(mStatsObj.getNumDVs());
+ longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setLongStats(longStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
@@ -415,6 +438,7 @@ public class StatObjectConverter {
doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
}
doubleStats.setNumDVs(mStatsObj.getNumDVs());
+ doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
@@ -426,6 +450,7 @@ public class StatObjectConverter {
decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue()));
}
decimalStats.setNumDVs(mStatsObj.getNumDVs());
+ decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setDecimalStats(decimalStats);
} else if (colType.equals("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
@@ -433,6 +458,7 @@ public class StatObjectConverter {
dateStats.setHighValue(new Date(mStatsObj.getLongHighValue()));
dateStats.setLowValue(new Date(mStatsObj.getLongLowValue()));
dateStats.setNumDVs(mStatsObj.getNumDVs());
+ dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector()));
colStatsData.setDateStats(dateStats);
}
statsObj.setStatsData(colStatsData);
@@ -450,10 +476,10 @@ public class StatObjectConverter {
return statsDesc;
}
- // SQL
+ // JAVA
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data,
Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh,
- Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
+ Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
colType = colType.toLowerCase();
if (colType.equals("boolean")) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
@@ -468,6 +494,7 @@ public class StatObjectConverter {
stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+ stringStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
data.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
@@ -487,6 +514,7 @@ public class StatObjectConverter {
longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow));
}
longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+ longStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
data.setLongStats(longStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
@@ -498,6 +526,7 @@ public class StatObjectConverter {
doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow));
}
doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+ doubleStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
data.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
@@ -509,6 +538,7 @@ public class StatObjectConverter {
decimalStats.setLowValue(createThriftDecimal((String)declow));
}
decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+ decimalStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
data.setDecimalStats(decimalStats);
} else if (colType.equals("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
@@ -520,10 +550,12 @@ public class StatObjectConverter {
dateStats.setLowValue(new Date(MetaStoreDirectSql.extractSqlLong(llow)));
}
dateStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+ dateStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
data.setDateStats(dateStats);
}
}
+ //DB
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data,
Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh,
Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses,
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
index cea94a0..fb98ccf 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.metastore.cache;
import java.nio.ByteBuffer;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -80,8 +81,8 @@ import org.apache.hadoop.hive.metastore.api.Type;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger;
-import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory;
+import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger;
+import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -92,6 +93,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
// TODO filter->expr
// TODO functionCache
@@ -1562,27 +1564,37 @@ public class CachedStore implements RawStore, Configurable {
private ColumnStatisticsObj mergeColStatsForPartitions(String dbName, String tblName,
List<String> partNames, String colName) throws MetaException {
- ColumnStatisticsObj colStats = null;
+ final boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(getConf(),
+ HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
+ final double ndvTuner = HiveConf.getFloatVar(getConf(),
+ HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER);
+ Map<String, List<ColumnStatistics>> map = new HashMap<>();
+ List<ColumnStatistics> list = new ArrayList<>();
+ boolean areAllPartsFound = true;
for (String partName : partNames) {
- String colStatsCacheKey =
- CacheUtils.buildKey(dbName, tblName, partNameToVals(partName), colName);
- ColumnStatisticsObj colStatsForPart =
- SharedCache.getCachedPartitionColStats(colStatsCacheKey);
- if (colStatsForPart == null) {
- // we don't have stats for all the partitions
- // logic for extrapolation hasn't been added to CacheStore
- // So stop now, and lets fallback to underlying RawStore
- return null;
- }
- if (colStats == null) {
- colStats = colStatsForPart;
+ String colStatsCacheKey = CacheUtils.buildKey(dbName, tblName, partNameToVals(partName),
+ colName);
+ List<ColumnStatisticsObj> singleObj = new ArrayList<>();
+ ColumnStatisticsObj colStatsForPart = SharedCache
+ .getCachedPartitionColStats(colStatsCacheKey);
+ if (colStatsForPart != null) {
+ singleObj.add(colStatsForPart);
+ ColumnStatisticsDesc css = new ColumnStatisticsDesc(false, dbName, tblName);
+ css.setPartName(partName);
+ list.add(new ColumnStatistics(css, singleObj));
} else {
- ColumnStatsMerger merger =
- ColumnStatsMergerFactory.getColumnStatsMerger(colStats, colStatsForPart);
- merger.merge(colStats, colStatsForPart);
+ areAllPartsFound = false;
}
}
- return colStats;
+ map.put(colName, list);
+ List<String> colNames = new ArrayList<>();
+ colNames.add(colName);
+ // Note that enableBitVector does not apply here because ColumnStatisticsObj
+ // itself will tell whether
+ // bitvector is null or not and aggr logic can automatically apply.
+ return MetaStoreUtils
+ .aggrPartitionStats(map, dbName, tblName, partNames, colNames, areAllPartsFound,
+ useDensityFunctionForNDVEstimation, ndvTuner).iterator().next();
}
@Override
[05/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
index 0f28225..ca1ec00 100644
--- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
+++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
@@ -41,20 +41,22 @@ PREHOOK: Input: default@all_nulls
POSTHOOK: query: describe formatted all_nulls a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@all_nulls
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a bigint 0 0 5 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a bigint 0 0 5 1 SExM4AEA
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
PREHOOK: query: describe formatted all_nulls b
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@all_nulls
POSTHOOK: query: describe formatted all_nulls b
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@all_nulls
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-b double 0.0 0.0 5 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b double 0.0 0.0 5 1 SExM4AEA
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
PREHOOK: query: drop table all_nulls
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@all_nulls
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out
index fb833bc..74085bf 100644
--- a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out
+++ b/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out
@@ -48,10 +48,10 @@ PREHOOK: Input: default@space
POSTHOOK: query: desc formatted space ` left`
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@space
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
- left string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ left string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}}
PREHOOK: query: insert into space values ("1", "2", "3")
PREHOOK: type: QUERY
PREHOOK: Output: default@space
@@ -67,10 +67,11 @@ PREHOOK: Input: default@space
POSTHOOK: query: desc formatted space ` left`
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@space
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
- left string 0 1 1.0 1 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ left string 0 1 1.0 1 SExM4AEBxbi8+AQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}}
PREHOOK: query: select * from space
PREHOOK: type: QUERY
PREHOOK: Input: default@space
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
index 9925928..00e53dc 100644
--- a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
+++ b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
@@ -173,17 +173,19 @@ PREHOOK: Input: default@dest1
POSTHOOK: query: desc formatted DEST1 key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dest1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 10 10 0 1 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 10 10 0 1 SExM4AEBg8WRjgM=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: desc formatted DEST1 value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@dest1
POSTHOOK: query: desc formatted DEST1 value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dest1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 1 4.0 4 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 1 4.0 4 SExM4AEBg7CVmgY=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index 5ecb205..c0f0071 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -509,18 +509,20 @@ PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeID int 16 34 1 12 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns=
+ from deserializer
PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeName string 1 12 4.3076923076923075 6 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeName string 1 12 4.3076923076923075 6 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR
+ from deserializer
PREHOOK: query: explain
analyze table Employee_Part compute statistics for columns
PREHOOK: type: QUERY
@@ -598,18 +600,20 @@ PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeID int 16 34 1 12 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns=
+ from deserializer
PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeID int 16 34 1 12 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns=
+ from deserializer
PREHOOK: query: explain
analyze table Employee_Part compute statistics for columns
PREHOOK: type: QUERY
@@ -679,10 +683,11 @@ PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeID int 16 34 2 12 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeID int 16 34 2 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}
PREHOOK: query: create database if not exists dummydb
PREHOOK: type: CREATEDATABASE
PREHOOK: Output: database:dummydb
@@ -711,10 +716,11 @@ PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeID int 16 34 1 12 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}
PREHOOK: query: analyze table default.Employee_Part compute statistics for columns
PREHOOK: type: QUERY
PREHOOK: Input: default@employee_part
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index a64c76b..0cb4863 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -141,9 +141,10 @@ PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='4000.0', country='USA') employeeName
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeName string 0 7 5.142857142857143 6 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeName string 0 7 5.142857142857143 6 SExM4AcHhN+NPL2pzAqA8p0tgLvPcIPS5KcCvbS+dMC7gIYB
+ from deserializer
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID
PREHOOK: type: QUERY
@@ -221,18 +222,20 @@ PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='USA') employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeID int 16 34 1 12 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns=
+ from deserializer
PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='UK') employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='UK') employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeID int 16 31 0 7 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeID int 16 31 0 7 SExM4AcHw4SxaoX10lX5nt07xsfQ5AH8u4h+gtXeeb2uipsB
+ from deserializer
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID
PREHOOK: type: QUERY
@@ -318,9 +321,10 @@ PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='3000.0', country='UK') employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeID int 16 34 1 12 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns=
+ from deserializer
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns
PREHOOK: type: QUERY
@@ -406,9 +410,10 @@ PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='3500.0', country='UK') employeeName
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeName string 0 12 5.142857142857143 6 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeName string 0 12 5.142857142857143 6 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR
+ from deserializer
PREHOOK: query: drop table Employee
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table Employee
@@ -481,9 +486,10 @@ PREHOOK: Input: default@employee
POSTHOOK: query: describe formatted Employee partition (employeeSalary='3500.0', country='UK') employeeName
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeName string 0 12 5.142857142857143 6 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeName string 0 12 5.142857142857143 6 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR
+ from deserializer
PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='USA')
PREHOOK: type: LOAD
#### A masked pattern was here ####
@@ -528,9 +534,10 @@ PREHOOK: Input: default@employee
POSTHOOK: query: describe formatted Employee partition (employeeSalary='3000.0', country='USA') employeeName
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeName string 0 12 5.142857142857143 6 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeName string 0 12 5.142857142857143 6 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR
+ from deserializer
PREHOOK: query: alter table Employee add columns (c int ,d string)
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@employee
@@ -564,24 +571,27 @@ PREHOOK: Input: default@employee
POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') employeeName
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-employeeName string 0 9 4.777777777777778 6 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+employeeName string 0 9 4.777777777777778 6 SExM4AkJhN+NPL2pzAqA8p0tgLvPcIPS5KcCvbS+dMC7gIYBwJnbgQGAz/1W
+ from deserializer
PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') c
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee
POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') c
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-c int 2000 4000 0 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+c int 2000 4000 0 3 SExM4AMDwpKn6wH/9JpogbzaCQ==
+ from deserializer
PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') d
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee
POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') d
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-d string 0 2 2.4444444444444446 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+d string 0 2 2.4444444444444446 3 SExM4AICgaD/7QKE/4mqAw==
+ from deserializer
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
index 91c8f15..b85c1ff 100644
--- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
@@ -285,30 +285,39 @@ PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: describe formatted UserVisits_web_text_none destURL
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-destURL string 0 55 48.945454545454545 96 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+destURL string 0 55 48.945454545454545 96 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA
+ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH
+/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA
+vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
PREHOOK: query: describe formatted UserVisits_web_text_none adRevenue
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: describe formatted UserVisits_web_text_none adRevenue
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b
+Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK
+wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA
+86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
PREHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-avgTimeOnSite int 1 9 0 9 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
PREHOOK: query: CREATE TABLE empty_tab(
a int,
b double,
@@ -414,10 +423,14 @@ PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: describe formatted default.UserVisits_web_text_none destURL
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-destURL string 0 55 48.945454545454545 96 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+destURL string 0 55 48.945454545454545 96 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA
+ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH
+/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA
+vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
PREHOOK: query: CREATE TABLE UserVisits_in_dummy_db (
sourceIP string,
destURL string,
@@ -707,30 +720,39 @@ PREHOOK: Input: dummydb@uservisits_in_dummy_db
POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: dummydb@uservisits_in_dummy_db
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-destURL string 0 55 48.945454545454545 96 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+destURL string 0 55 48.945454545454545 96 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA
+ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH
+/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA
+vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue
PREHOOK: type: DESCTABLE
PREHOOK: Input: dummydb@uservisits_in_dummy_db
POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: dummydb@uservisits_in_dummy_db
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b
+Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK
+wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA
+86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite
PREHOOK: type: DESCTABLE
PREHOOK: Input: dummydb@uservisits_in_dummy_db
POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: dummydb@uservisits_in_dummy_db
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-avgTimeOnSite int 1 9 0 9 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}}
PREHOOK: query: drop table dummydb.UserVisits_in_dummy_db
PREHOOK: type: DROPTABLE
PREHOOK: Input: dummydb@uservisits_in_dummy_db
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/compustat_avro.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compustat_avro.q.out b/ql/src/test/results/clientpositive/compustat_avro.q.out
index 2f8dc10..db20bef 100644
--- a/ql/src/test/results/clientpositive/compustat_avro.q.out
+++ b/ql/src/test/results/clientpositive/compustat_avro.q.out
@@ -30,10 +30,10 @@ PREHOOK: Input: default@testavro
POSTHOOK: query: describe formatted testAvro col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@testavro
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 string from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 string from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}}
PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3
PREHOOK: type: QUERY
PREHOOK: Input: default@testavro
@@ -48,7 +48,7 @@ PREHOOK: Input: default@testavro
POSTHOOK: query: describe formatted testAvro col1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@testavro
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-col1 string 0 0 0.0 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+col1 string 0 0 0.0 0 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/compute_stats_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out
index 5cd2180..78d04f9 100644
--- a/ql/src/test/results/clientpositive/compute_stats_date.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out
@@ -109,10 +109,12 @@ PREHOOK: Input: default@tab_date
POSTHOOK: query: describe formatted tab_date fl_date
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tab_date
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-fl_date date 2000-11-20 2010-10-29 0 19 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+fl_date date 2000-11-20 2010-10-29 0 19 SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy
+/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}}
PREHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0')
PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
POSTHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0')
@@ -123,7 +125,9 @@ PREHOOK: Input: default@tab_date
POSTHOOK: query: describe formatted tab_date fl_date
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tab_date
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-fl_date date 1970-01-01 2015-01-01 0 19 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+fl_date date 1970-01-01 2015-01-01 0 19 SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy
+/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
index fcfce78..e18b989 100644
--- a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
@@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 18) from tab_decimal
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_decimal
#### A masked pattern was here ####
-{"columntype":"Decimal","min":-87.2,"max":123456789012345678901234567890.123,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}{0, 1, 2, 3, 5}{0, 1, 3}{0, 1, 2, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 6, 8}{0, 1, 2, 3}{0, 1, 2}{0, 1, 4, 5}"}
+{"columntype":"Decimal","min":-87.2,"max":123456789012345678901234567890.123,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"Rk0SAB8AAAAvAAAADwAAAAcAAAAHAAAALwAAAAsAAAAXAAAALwAAAA8AAAAHAAAAAwAAAAcAAAAP\r\nAAAARwEAAA8AAAAHAAAAMwAAAA==\r\n"}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/compute_stats_double.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_double.q.out b/ql/src/test/results/clientpositive/compute_stats_double.q.out
index e6a087d..d937c3a 100644
--- a/ql/src/test/results/clientpositive/compute_stats_double.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_double.q.out
@@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_double
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_double
#### A masked pattern was here ####
-{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2}{0, 1}{0, 1, 3, 4}{0, 1, 3}{0, 1, 2, 3, 8}{0, 1, 3}{0, 1, 2}{0, 1, 4}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2, 3, 4}{0, 1, 2}{0, 1, 2, 3, 4}{0, 1, 3}"}
+{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"Rk0QAB8AAAAHAAAAAwAAABsAAAALAAAADwEAAAsAAAAHAAAAEwAAAAcAAAAPAAAADwAAAB8AAAAH\r\nAAAAHwAAAAsAAAA=\r\n"}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/compute_stats_long.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_long.q.out b/ql/src/test/results/clientpositive/compute_stats_long.q.out
index fb985d8..3451072 100644
--- a/ql/src/test/results/clientpositive/compute_stats_long.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_long.q.out
@@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_int
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_int
#### A masked pattern was here ####
-{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3}{0, 2, 5}{0, 1, 2, 3, 4}{0, 1, 2, 4, 6, 7}{0, 1, 2, 4}{0, 1, 2, 4, 5}{0, 1, 2, 5}{0, 1, 2}{0, 1, 2, 3}{0, 1, 3, 4}{0, 1, 2, 5, 6}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 2, 3, 10}{0, 1, 2, 4}"}
+{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"Rk0QAA8AAAAlAAAAHwAAANcAAAAXAAAANwAAACcAAAAHAAAADwAAABsAAABnAAAADwAAAAsAAAAP\r\nAAAADwQAABcAAAA=\r\n"}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/compute_stats_string.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_string.q.out b/ql/src/test/results/clientpositive/compute_stats_string.q.out
index a5d66eb..bbb2361 100644
--- a/ql/src/test/results/clientpositive/compute_stats_string.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_string.q.out
@@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_string
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_string
#### A masked pattern was here ####
-{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"{0, 1, 2, 3}{0, 1}{0, 1, 3}{0, 2}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 3}{0, 1}{0, 1}{0, 1, 2, 4}{0, 1, 4}{0, 2, 4}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}"}
+{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"Rk0QAA8AAAADAAAACwAAAAUAAAAPAAAACwAAAA8AAAALAAAAAwAAAAMAAAAXAAAAEwAAABUAAAAP\r\nAAAABwAAAAcAAAA=\r\n"}
[04/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
index 5593e42..559f05e 100644
--- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
+++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
@@ -14,10 +14,31 @@ PREHOOK: Input: default@src
POSTHOOK: query: describe formatted src key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 309 2.812 3 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe extended src1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@src1
@@ -34,10 +55,12 @@ PREHOOK: Input: default@src1
POSTHOOK: query: describe formatted src1 value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 19 4.92 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 19 4.92 7 SExM4BMTgaTbFcCikRTAp44YwK72BIGdshzAtN4dgfC7Ab6ikDTAz6JGgejDCP+AlzSA84UvwYTL
+Wr+ivynA6+uCAsDjm8kBgri1Ab++nA+/vawa
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe extended src_json
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@src_json
@@ -53,10 +76,11 @@ PREHOOK: Input: default@src_json
POSTHOOK: query: describe formatted src_json json
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_json
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-json string 0 1 644.0 644 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"json\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+json string 0 1 644.0 644 SExM4AEBhZK/6AY=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"json\":\"true\"}}
PREHOOK: query: describe extended src_sequencefile
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@src_sequencefile
@@ -73,10 +97,31 @@ PREHOOK: Input: default@src_sequencefile
POSTHOOK: query: describe formatted src_sequencefile value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@src_sequencefile
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 309 6.812 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 309 6.812 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT
+A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF
+r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB
+gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB
+wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC
+gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6
+xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W
+sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC
+wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG
+gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8
+BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl
+nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC
+v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA
+/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+
+ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM
+fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+
+tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP
+KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC
+sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC
+zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm
+A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe extended srcbucket
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@srcbucket
@@ -93,10 +138,38 @@ PREHOOK: Input: default@srcbucket
POSTHOOK: query: describe formatted srcbucket value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcbucket
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 430 6.802 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 430 6.802 7 SExM4K4DrgPM7Sz2soMHgPgNw751/KEegclIgPbqBb/h5wOAnfoBgsWzBb7bTcD0a4CqkQPA7mKB
+osYB/6nkAcHVGcHekwOA7wS/u44D/7c4wJvkAYLQxAL/7acDgLXtAb/E0QOB1IQGwKwcv76qAsCV
+swLAzlqA4JsEgcNMv9SGCcDHgAWC2Ei+18ACwLCLBcCdmAGBuSGAhBmFr5AB+tCzAcGDswGC8n39
+6cQDwPDhA8GT1AG/klbAwtYBgvP0Av7EB4GRvQL/0voFgeMggv1uwFj9i2iC06gBgJQH/rXFAYGC
+lwGAijqB5oEB/5DfDP+2RYGo0gP/qKUGgNqHBoKL0AK+3BaA7gvBwq0Ev4j7BcLbmQG+8sQFgLx/
+gIXzAYDdjQHC5rIC/+OKAf/vRsD+BsK12AK+3iDCj6cDwcuXAoC1Bb6hYL+52ATAhvEDgKkLgPWj
+AoDJ9wSCiY0B/o/EBID8iALB/12/4poGwL4Sw7JgvdijAcHnqgP/obsCw4ZzveelAYKYVb7VSoCo
+/wHA6VjAjJgBwfSUAoD3PMPPygKAsBeAwxO84LoCgYc9v5mVAcDaLsH7kALA+SH/wdQCgLf0AYH6
+xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7+rASAzogCgfKyAv/esgHA5SKAog3BlS6/4ckE
+wJYOgsI1/qWdAYCxlQPAndwBwPaSAoPN3gH9oMcFgaefAcHzlwP/xdQCv45EwK+ZDMCAmAGBkA+/
+xOUEgZXHAcGHzgT+hdkBw4QvvqmlA/+0mAGA5qwDw7Mkv7edBv6CpgLAj/sCwIV1gf2zBoSuiAH7
+pMMDwvHQAv69yQOC1Ez+wbkCgIngA4Hy3Aa/6LgBwIUMwK1XgIilBsCX9grBvakCv6m4AYGp7wKB
+nni/1fkE/98ogNojgJTZBoCWkwTAkPUCgablA/+ApwPA2JoCh8K0AbmZqQKB5kCB9fcCv5bvBMGd
+gQP+0F2AvpQEgeChCr/m1AjA5pEBgMWAA8D42wLCwMkB/oMgwYTTBIC0jAT/oD2CvK4C/rQ5gMym
+BcPGyQS+1XTDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8ClnALCsOIC/s2t
+Af/JsQLAmWCA+akCgrq2A77LxgPBpc4Hv9urBMKGxAT/3o4B/6abB8CL+wTA4qcKgeqxAcDugwK/
+nLICgsOsAv7NyQHA/f4CwOKFA4DygQKA7IwFgIiYAoC23QGAuR3B+yHAqQ6/xRWBm9EBhPaFAby9
+jgO/rbwEwuDXBr7HtAHA+1HAsxfAy6IDhMahBLyaSIHHSf+RyQGAjkvAtxrAt5oDwsiOA4CiDb6m
+0gHCt8kCv81ww8V+vseFCsKclgb+5/AB/pWuB8LwUr+u6gKB8Z8BvqwYwOqBAYSbVPzW2QKC8osD
+/v7KA8D+eYC5A8GYygLEpZQLu/fNAoDIasPXG8DAOL3ZNYGhF8Cj9gWCl4IB/YFvgeecCIHegAH/
+mosB/6lchIQe/qP+BYChKv6SigGAjH3AtakDg78k/frBAYHGygHA8uQB/8LtAYGulAK/3tcDwZ7H
+Ar/UvgWB6Sz/4OICwfyQAf/X9wWBvx+AowGAxY8CgY5j/rVegekTgMQ4v7ApwIryAsDSmQGB+JkD
+xcCxAfqrngKA7O0CgKgZhLXJAsCEDP3qS//G5ATBiqQDgcGkAcDijQT+w44BgMCUCMD+HcCY/QHB
+togBgM8pgrZ3gNMTvuRLv6JgwpJC/sHNAcK0uQL+vzSA45YEwfqvAoDG/AH/4rwDgrfRA77HpwaB
+2Ee/mMcHgafGAr+mzwfBs6sCwdpvgO8X/qq9CIGNpAL/uT+ApoACwIifCoDZBoKwRv62hALA7YcE
+gKF2go9LgKqIBL6u4gSB6UWFxmr6+kHByq8O/8TAAcTpsQeA9hv87TuCuLUBv/eKAYCX+AHAhQ7/
+/8kBws2sA8GtgQK+r5MFwfjTBL7DQMGH1gLA1YECgoGcCf2WF8Dc7QKC9/wC/riiAcHXngPDmI8B
+/JhghYepB7zEpgP/hReD5z29musBgYCDAYDUF8DgkwbA5vUBv5Ye
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe extended srcbucket2
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@srcbucket2
@@ -113,10 +186,31 @@ PREHOOK: Input: default@srcbucket2
POSTHOOK: query: describe formatted srcbucket2 value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcbucket2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 309 6.812 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 309 6.812 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT
+A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF
+r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB
+gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB
+wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC
+gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6
+xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W
+sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC
+wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG
+gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8
+BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl
+nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC
+v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA
+/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+
+ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM
+fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+
+tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP
+KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC
+sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC
+zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm
+A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: describe extended srcpart
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@srcpart
@@ -141,9 +235,30 @@ PREHOOK: Input: default@srcpart
POSTHOOK: query: describe formatted srcpart PARTITION (ds="2008-04-09", hr="12") key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcpart
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 309 2.812 3 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
PREHOOK: query: describe extended alltypesorc
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesorc
@@ -170,37 +285,59 @@ PREHOOK: Input: default@alltypesorc
POSTHOOK: query: describe formatted alltypesorc ctinyint
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@alltypesorc
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-ctinyint tinyint -64 62 3115 127 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ctinyint tinyint -64 62 3115 127 SExM4H9/wffjBsSrxgHA1wz9+IYHg+DQCP3JihW/hsoBwPWWAcCpzQaAka8Nguq/AsK1iAL87ZUO
+gJNyw+PsDL27kQLAhqMCwurIAb6YtwuCsqkM/uPMAoHrwgqAxvIFv4+0BsPOQb6yowG/ur4Nx8qY
+FbyohgT+yXaA6b0C/8ydAYCV8gKA2pQswJKLAYGJLP/rxgKAk/YDhfPdELvbjQTB5+wEv5WXE8Ch
+lAuHqL8IupGWB4O/f76/iwK+iKEJwaPQAf+XygGCnp4fv/WnBcCSzg/AgSSBieICv7yhBb/FvwnC
+oMIVv+uBKYWN+wT6/KIGxLiuAf3/uAPCqdEIvabTBYHXviP//KQEgKuDCcGV3Ai/h7wGwdqmA4DO
+pwaB1+kGgJuiFP6GugLB6sYcgNvQAb+vZMHalQX/jPYBgaebAoCHiwj/8NQIgeajCb+mUYHomgGA
+uLEa/+yOBoTZwAK8pmPAwvUCgNSdB4SjmgW86+QGgZWyAsLt0wL/26AG/9OGDf+a0QSCvZgGvp/J
+AYKB0AK+4ZoFwMqsA8CF8QqAy5cCgMjKDoDasB7A3/wYgoa7BP6VxASCvrwD/p2zAoGivgLAhPwX
+/+7aD8Hztw7AkIsE/8CDC4GHyxT/zd0EwaXNBcCM4hP/qMsBgaClAr+48AbCqqEI
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}}
PREHOOK: query: describe formatted alltypesorc cfloat
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesorc
POSTHOOK: query: describe formatted alltypesorc cfloat
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@alltypesorc
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-cfloat float -64.0 79.5530014038086 3115 131 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+cfloat float -64.0 79.5530014038086 3115 131 SExM4IMBgwGCipYJ/7xdxIybDoK1gwiAsCS79s8MgIafCsHkghfAg+AHvrH2AYHVhgfDuPwIwIsU
+voWVAr/C+AiB5IADvtnDCYC79RPA2PkIwozYBb6JtAGA0owCgJ7bBMLTyQ/+j8UHweetA4C3Nb/e
+hwaAh7oRwOdSxvHFAvzIpAK+w44ExIbjEb6o7AH/kYodgYKOB//70wy/wdUCwPehFcTdqAX/wuML
+vY68B4b06QS7m/4GwL3bB7/RxQOB0twCgLoTw5/mAsS/6we59YkDwd2GC8GD7gW9xLgHwdrsA4Wx
+fP3L7Au9usENgM/tDYLx4Ar+n7MJho+kBPrh2ROAs+4Cwc+0CoKbrAP+2f8Ev8mtFcC6lgWAivUC
+wKH3B4ff0RH66u0OwM/7C4PflA29yYUBgIyFBIHt3QzB2YsK/aDFEoCT4QPA2bYVwNXRB8Gc0gO/
+xLcMwea/A8GajwP+q58Cga3hCv/vnwTD0LsE/fvlBoCDb8GBygWC+/oK/oaXBcC+CIOT3wb+wu8B
+/8j9FMCM9A3/wPgDgqybAb65xAKAm/ICwNErgIbqCsCwiQfAy98IwPumAYTJrwT8i6ECgo/rA4KX
+3AmA0A/85FDB0qsCgc2RA4Cw6g/+3+EGgZSjEsC/8An/+cwbwvjlBcCv4Qe+8tILwIDhAYGDxgvA
+q6ECv6q2CQ==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}}
PREHOOK: query: describe formatted alltypesorc ctimestamp1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesorc
POSTHOOK: query: describe formatted alltypesorc ctimestamp1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@alltypesorc
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-ctimestamp1 timestamp -30 31 3115 35 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ctimestamp1 timestamp -30 31 3115 35 SExM4CMjxdOOGLzQ1BbA9ZYBwKnNBoCRrw3En8gEvKmpLYCSgzWA4vdMwJKLAcGuwk7AqvwNwcjk
+OMLQ8Vn9/7gDgKfjMcC9hBa/h7wGwajOCYHyixu+m7FqwML1AoCwmzCAj+odgMuXAoDIyg7Aua03
+gJz/CIDc7wWBor4Cv/PWJ4GEwxKAyNs9/6jLAYKDtxE=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}}
PREHOOK: query: describe formatted alltypesorc cboolean2
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesorc
POSTHOOK: query: describe formatted alltypesorc cboolean2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@alltypesorc
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-cboolean2 boolean 3115 3983 5190 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+cboolean2 boolean 3115 3983 5190 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/decimal_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out
index f58a7cc..2742a32 100644
--- a/ql/src/test/results/clientpositive/decimal_stats.q.out
+++ b/ql/src/test/results/clientpositive/decimal_stats.q.out
@@ -46,10 +46,11 @@ PREHOOK: Input: default@decimal_1
POSTHOOK: query: desc formatted decimal_1 v
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@decimal_1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-v decimal(10,0) 500 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\",\"u\":\"true\",\"v\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+v decimal(10,0) 500 1 SExM4AEA
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\",\"u\":\"true\",\"v\":\"true\"}}
PREHOOK: query: explain select * from decimal_1 order by t limit 100
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from decimal_1 order by t limit 100
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/deleteAnalyze.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out
index 1bae859..d3609f8 100644
--- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out
+++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out
@@ -72,10 +72,10 @@ PREHOOK: Input: default@testdeci2
POSTHOOK: query: describe formatted testdeci2 amount
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@testdeci2
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-amount decimal(10,3) from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+amount decimal(10,3) from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
PREHOOK: query: analyze table testdeci2 compute statistics for columns
PREHOOK: type: QUERY
PREHOOK: Input: default@testdeci2
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/describe_syntax.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/describe_syntax.q.out b/ql/src/test/results/clientpositive/describe_syntax.q.out
index 19147a1..16b7763 100644
--- a/ql/src/test/results/clientpositive/describe_syntax.q.out
+++ b/ql/src/test/results/clientpositive/describe_syntax.q.out
@@ -211,10 +211,10 @@ PREHOOK: Input: db1@t1
POSTHOOK: query: DESCRIBE FORMATTED t1 key1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: db1@t1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key1 int from deserializer
-COLUMN_STATS_ACCURATE {}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key1 int from deserializer
+COLUMN_STATS_ACCURATE {}
PREHOOK: query: DESCRIBE db1.t1 key1
PREHOOK: type: DESCTABLE
PREHOOK: Input: db1@t1
@@ -235,10 +235,10 @@ PREHOOK: Input: db1@t1
POSTHOOK: query: DESCRIBE FORMATTED db1.t1 key1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: db1@t1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key1 int from deserializer
-COLUMN_STATS_ACCURATE {}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key1 int from deserializer
+COLUMN_STATS_ACCURATE {}
PREHOOK: query: DESCRIBE t1 key1
PREHOOK: type: DESCTABLE
PREHOOK: Input: db1@t1
@@ -259,10 +259,10 @@ PREHOOK: Input: db1@t1
POSTHOOK: query: DESCRIBE FORMATTED t1 key1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: db1@t1
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key1 int from deserializer
-COLUMN_STATS_ACCURATE {}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key1 int from deserializer
+COLUMN_STATS_ACCURATE {}
PREHOOK: query: DESCRIBE t1 PARTITION(ds='4', part='5')
PREHOOK: type: DESCTABLE
PREHOOK: Input: db1@t1
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/describe_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/describe_table.q.out b/ql/src/test/results/clientpositive/describe_table.q.out
index 3ba9a7b..7644fbd 100644
--- a/ql/src/test/results/clientpositive/describe_table.q.out
+++ b/ql/src/test/results/clientpositive/describe_table.q.out
@@ -210,10 +210,31 @@ PREHOOK: Input: default@srcpart
POSTHOOK: query: describe formatted srcpart key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcpart
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 309 2.812 3 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: describe formatted srcpart PARTITION(ds='2008-04-08', hr='12')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@srcpart
@@ -302,10 +323,31 @@ PREHOOK: Input: default@srcpart
POSTHOOK: query: describe formatted `srcpart` `key`
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcpart
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key string 0 309 2.812 3 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s
+1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36
+BcC//AOAtLEEgMbwAf+mwQiAqfgH
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}}
PREHOOK: query: describe formatted `srcpart` PARTITION(ds='2008-04-08', hr='12')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@srcpart
@@ -352,20 +394,20 @@ PREHOOK: Input: default@srcpart
POSTHOOK: query: describe formatted `srcpart` `ds`
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcpart
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-ds string 0 2 100.0 100
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ds\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+ds string 0 2 100.0 100
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ds\":\"true\"}}
PREHOOK: query: describe formatted `srcpart` `hr`
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@srcpart
POSTHOOK: query: describe formatted `srcpart` `hr`
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@srcpart
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-hr string 0 2 100.0 100
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"hr\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+hr string 0 2 100.0 100
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"hr\":\"true\"}}
PREHOOK: query: create table srcpart_serdeprops like srcpart
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
index 73d4cd7..7cb62a8 100644
--- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
@@ -51,9 +51,9 @@ PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string from deserializer
PREHOOK: query: explain
analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue
PREHOOK: type: QUERY
@@ -242,30 +242,39 @@ PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string 0 55 12.763636363636364 13 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string 0 55 12.763636363636364 13 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M
+wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB
+wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz
+AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-avgTimeOnSite int 1 9 0 9 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none adRevenue
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b
+Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK
+wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA
+86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: CREATE TABLE empty_tab(
a int,
b double,
@@ -292,10 +301,10 @@ PREHOOK: Input: default@empty_tab
POSTHOOK: query: desc formatted empty_tab a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@empty_tab
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a int from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a int from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
PREHOOK: query: explain
analyze table empty_tab compute statistics for columns a,b,c,d,e
PREHOOK: type: QUERY
@@ -361,20 +370,20 @@ PREHOOK: Input: default@empty_tab
POSTHOOK: query: desc formatted empty_tab a
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@empty_tab
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-a int 0 0 0 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+a int 0 0 0 0 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
PREHOOK: query: desc formatted empty_tab b
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@empty_tab
POSTHOOK: query: desc formatted empty_tab b
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@empty_tab
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-b double 0.0 0.0 0 0 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+b double 0.0 0.0 0 0 from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}}
PREHOOK: query: CREATE DATABASE test
PREHOOK: type: CREATEDATABASE
PREHOOK: Output: database:test
@@ -451,28 +460,32 @@ PREHOOK: Input: test@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string from deserializer
PREHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP
PREHOOK: type: DESCTABLE
PREHOOK: Input: test@uservisits_web_text_none
POSTHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string from deserializer
PREHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
POSTHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sourceIP string 0 55 12.763636363636364 13 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sourceIP string 0 55 12.763636363636364 13 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M
+wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB
+wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz
+AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword
PREHOOK: type: QUERY
PREHOOK: Input: test@uservisits_web_text_none
@@ -495,17 +508,25 @@ PREHOOK: Input: test@uservisits_web_text_none
POSTHOOK: query: desc formatted UserVisits_web_text_none sKeyword
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sKeyword string 0 54 7.872727272727273 19 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sKeyword string 0 54 7.872727272727273 19 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA
++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr
+aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9
+x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}}
PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword
PREHOOK: type: DESCTABLE
PREHOOK: Input: test@uservisits_web_text_none
POSTHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-sKeyword string 0 54 7.872727272727273 19 from deserializer
-COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+sKeyword string 0 54 7.872727272727273 19 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA
++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr
+aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9
+x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out
index 1096e9f..657859e 100644
--- a/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out
@@ -59,20 +59,62 @@ PREHOOK: Input: default@encrypted_table
POSTHOOK: query: DESCRIBE FORMATTED encrypted_table key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@encrypted_table
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 0 498 0 309 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L
+vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb
+YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO
+vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo
+Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7
+Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69
+yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi
+AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy
+8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/
+1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ
+2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe
+A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+
+we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu
+9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc
+6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB
+gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g
+4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD
+gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl
+Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA
+t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA
+4gPA7aoC/6mKCIDZpgLDoEQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: DESCRIBE FORMATTED encrypted_table value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@encrypted_table
POSTHOOK: query: DESCRIBE FORMATTED encrypted_table value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@encrypted_table
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 309 6.812 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 309 6.812 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT
+A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF
+r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB
+gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB
+wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC
+gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6
+xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W
+sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC
+wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG
+gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8
+BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl
+nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC
+v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA
+/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+
+ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM
+fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+
+tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP
+KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC
+sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC
+zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm
+A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: ALTER TABLE default.encrypted_table RENAME TO encrypted_db.encrypted_table_2
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: default@encrypted_table
@@ -92,20 +134,62 @@ PREHOOK: Input: default@encrypted_table
POSTHOOK: query: DESCRIBE FORMATTED encrypted_table key
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@encrypted_table
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-key int 0 498 0 309 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L
+vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb
+YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO
+vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo
+Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7
+Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69
+yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi
+AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy
+8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/
+1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ
+2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe
+A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+
+we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu
+9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc
+6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB
+gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g
+4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD
+gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl
+Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA
+t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA
+4gPA7aoC/6mKCIDZpgLDoEQ=
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: DESCRIBE FORMATTED encrypted_table value
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@encrypted_table
POSTHOOK: query: DESCRIBE FORMATTED encrypted_table value
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@encrypted_table
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-value string 0 309 6.812 7 from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+value string 0 309 6.812 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT
+A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF
+r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB
+gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB
+wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC
+gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6
+xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W
+sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC
+wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG
+gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8
+BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl
+nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC
+v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA
+/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+
+ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM
+fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+
+tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP
+KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC
+sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC
+zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm
+A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg==
+ from deserializer
+COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: ALTER TABLE default.encrypted_table RENAME TO default.plain_table
PREHOOK: type: ALTERTABLE_RENAME
PREHOOK: Input: default@encrypted_table
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
index b212da9..a606632 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
@@ -81,9 +81,10 @@ PREHOOK: Input: default@loc_orc_1d
POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-
-state string 0 3 0.75 2 from deserializer
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector
+
+state string 0 3 0.75 2 SExM4AMDgaTbFcD8mOYCwMOJoQQ=
+ from deserializer
PREHOOK: query: explain extended select state from loc_orc_1d
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select state from loc_orc_1d
[10/11] hive git commit: HIVE-16997: Extend object store to store and
use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
new file mode 100644
index 0000000..6fae3e5
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
@@ -0,0 +1,358 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Date;
+import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
+ IExtrapolatePartStatus {
+
+ private static final Logger LOG = LoggerFactory.getLogger(DateColumnStatsAggregator.class);
+
+ @Override
+ public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
+ List<ColumnStatistics> css) throws MetaException {
+ ColumnStatisticsObj statsObj = null;
+
+ // check if all the ColumnStatisticsObjs contain stats and all the ndv are
+ // bitvectors
+ boolean doAllPartitionContainStats = partNames.size() == css.size();
+ LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats);
+ NumDistinctValueEstimator ndvEstimator = null;
+ String colType = null;
+ for (ColumnStatistics cs : css) {
+ if (cs.getStatsObjSize() != 1) {
+ throw new MetaException(
+ "The number of columns should be exactly one in aggrStats, but found "
+ + cs.getStatsObjSize());
+ }
+ ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+ if (statsObj == null) {
+ colType = cso.getColType();
+ statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
+ .getStatsData().getSetField());
+ }
+ if (!cso.getStatsData().getDateStats().isSetBitVectors()
+ || cso.getStatsData().getDateStats().getBitVectors().length() == 0) {
+ ndvEstimator = null;
+ break;
+ } else {
+ // check if all of the bit vectors can merge
+ NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(cso.getStatsData().getDateStats().getBitVectors());
+ if (ndvEstimator == null) {
+ ndvEstimator = estimator;
+ } else {
+ if (ndvEstimator.canMerge(estimator)) {
+ continue;
+ } else {
+ ndvEstimator = null;
+ break;
+ }
+ }
+ }
+ }
+ if (ndvEstimator != null) {
+ ndvEstimator = NumDistinctValueEstimatorFactory
+ .getEmptyNumDistinctValueEstimator(ndvEstimator);
+ }
+ LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
+ ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
+ if (doAllPartitionContainStats || css.size() < 2) {
+ DateColumnStatsData aggregateData = null;
+ long lowerBound = 0;
+ long higherBound = 0;
+ double densityAvgSum = 0.0;
+ for (ColumnStatistics cs : css) {
+ ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+ DateColumnStatsData newData = cso.getStatsData().getDateStats();
+ lowerBound = Math.max(lowerBound, newData.getNumDVs());
+ higherBound += newData.getNumDVs();
+ densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue()))
+ / newData.getNumDVs();
+ if (ndvEstimator != null) {
+ ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(newData.getBitVectors()));
+ }
+ if (aggregateData == null) {
+ aggregateData = newData.deepCopy();
+ } else {
+ aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue()));
+ aggregateData
+ .setHighValue(max(aggregateData.getHighValue(), newData.getHighValue()));
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ }
+ }
+ if (ndvEstimator != null) {
+ // if all the ColumnStatisticsObjs contain bitvectors, we do not need to
+ // use uniform distribution assumption because we can merge bitvectors
+ // to get a good estimation.
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ } else {
+ long estimation;
+ if (useDensityFunctionForNDVEstimation) {
+ // We have estimation, lowerbound and higherbound. We use estimation
+ // if it is between lowerbound and higherbound.
+ double densityAvg = densityAvgSum / partNames.size();
+ estimation = (long) (diff(aggregateData.getHighValue(), aggregateData.getLowValue()) / densityAvg);
+ if (estimation < lowerBound) {
+ estimation = lowerBound;
+ } else if (estimation > higherBound) {
+ estimation = higherBound;
+ }
+ } else {
+ estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
+ }
+ aggregateData.setNumDVs(estimation);
+ }
+ columnStatisticsData.setDateStats(aggregateData);
+ } else {
+ // we need extrapolation
+ LOG.debug("start extrapolation for " + colName);
+
+ Map<String, Integer> indexMap = new HashMap<String, Integer>();
+ for (int index = 0; index < partNames.size(); index++) {
+ indexMap.put(partNames.get(index), index);
+ }
+ Map<String, Double> adjustedIndexMap = new HashMap<String, Double>();
+ Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<String, ColumnStatisticsData>();
+ // while we scan the css, we also get the densityAvg, lowerbound and
+ // higerbound when useDensityFunctionForNDVEstimation is true.
+ double densityAvgSum = 0.0;
+ if (ndvEstimator == null) {
+ // if not every partition uses bitvector for ndv, we just fall back to
+ // the traditional extrapolation methods.
+ for (ColumnStatistics cs : css) {
+ String partName = cs.getStatsDesc().getPartName();
+ ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+ DateColumnStatsData newData = cso.getStatsData().getDateStats();
+ if (useDensityFunctionForNDVEstimation) {
+ densityAvgSum += diff(newData.getHighValue(), newData.getLowValue()) / newData.getNumDVs();
+ }
+ adjustedIndexMap.put(partName, (double) indexMap.get(partName));
+ adjustedStatsMap.put(partName, cso.getStatsData());
+ }
+ } else {
+ // we first merge all the adjacent bitvectors that we could merge and
+ // derive new partition names and index.
+ StringBuilder pseudoPartName = new StringBuilder();
+ double pseudoIndexSum = 0;
+ int length = 0;
+ int curIndex = -1;
+ DateColumnStatsData aggregateData = null;
+ for (ColumnStatistics cs : css) {
+ String partName = cs.getStatsDesc().getPartName();
+ ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+ DateColumnStatsData newData = cso.getStatsData().getDateStats();
+ // newData.isSetBitVectors() should be true for sure because we
+ // already checked it before.
+ if (indexMap.get(partName) != curIndex) {
+ // There is bitvector, but it is not adjacent to the previous ones.
+ if (length > 0) {
+ // we have to set ndv
+ adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length);
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ ColumnStatisticsData csd = new ColumnStatisticsData();
+ csd.setDateStats(aggregateData);
+ adjustedStatsMap.put(pseudoPartName.toString(), csd);
+ if (useDensityFunctionForNDVEstimation) {
+ densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue())
+ / aggregateData.getNumDVs();
+ }
+ // reset everything
+ pseudoPartName = new StringBuilder();
+ pseudoIndexSum = 0;
+ length = 0;
+ ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator);
+ }
+ aggregateData = null;
+ }
+ curIndex = indexMap.get(partName);
+ pseudoPartName.append(partName);
+ pseudoIndexSum += curIndex;
+ length++;
+ curIndex++;
+ if (aggregateData == null) {
+ aggregateData = newData.deepCopy();
+ } else {
+ aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue()));
+ aggregateData.setHighValue(max(aggregateData.getHighValue(), newData.getHighValue()));
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ }
+ ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(newData.getBitVectors()));
+ }
+ if (length > 0) {
+ // we have to set ndv
+ adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length);
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ ColumnStatisticsData csd = new ColumnStatisticsData();
+ csd.setDateStats(aggregateData);
+ adjustedStatsMap.put(pseudoPartName.toString(), csd);
+ if (useDensityFunctionForNDVEstimation) {
+ densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue())
+ / aggregateData.getNumDVs();
+ }
+ }
+ }
+ extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
+ adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
+ }
+ statsObj.setStatsData(columnStatisticsData);
+ LOG.debug("Ndv estimatation for " + colName + " is "
+ + columnStatisticsData.getDateStats().getNumDVs());
+ return statsObj;
+ }
+
+ private long diff(Date d1, Date d2) {
+ return d1.getDaysSinceEpoch() - d2.getDaysSinceEpoch();
+ }
+
+ private Date min(Date d1, Date d2) {
+ return d1.compareTo(d2) < 0 ? d1 : d2;
+ }
+
+ private Date max(Date d1, Date d2) {
+ return d1.compareTo(d2) < 0 ? d2 : d1;
+ }
+
+ @Override
+ public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
+ int numPartsWithStats, Map<String, Double> adjustedIndexMap,
+ Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
+ int rightBorderInd = numParts;
+ DateColumnStatsData extrapolateDateData = new DateColumnStatsData();
+ Map<String, DateColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
+ for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
+ extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDateStats());
+ }
+ List<Map.Entry<String, DateColumnStatsData>> list = new LinkedList<Map.Entry<String, DateColumnStatsData>>(
+ extractedAdjustedStatsMap.entrySet());
+ // get the lowValue
+ Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+ public int compare(Map.Entry<String, DateColumnStatsData> o1,
+ Map.Entry<String, DateColumnStatsData> o2) {
+ return diff(o1.getValue().getLowValue(), o2.getValue().getLowValue()) < 0 ? -1 : 1;
+ }
+ });
+ double minInd = adjustedIndexMap.get(list.get(0).getKey());
+ double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
+ long lowValue = 0;
+ long min = list.get(0).getValue().getLowValue().getDaysSinceEpoch();
+ long max = list.get(list.size() - 1).getValue().getLowValue().getDaysSinceEpoch();
+ if (minInd == maxInd) {
+ lowValue = min;
+ } else if (minInd < maxInd) {
+ // left border is the min
+ lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd));
+ } else {
+ // right border is the min
+ lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd));
+ }
+
+ // get the highValue
+ Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+ public int compare(Map.Entry<String, DateColumnStatsData> o1,
+ Map.Entry<String, DateColumnStatsData> o2) {
+ return diff(o1.getValue().getHighValue(), o2.getValue().getHighValue()) < 0 ? -1 : 1;
+ }
+ });
+ minInd = adjustedIndexMap.get(list.get(0).getKey());
+ maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
+ long highValue = 0;
+ min = list.get(0).getValue().getHighValue().getDaysSinceEpoch();
+ max = list.get(list.size() - 1).getValue().getHighValue().getDaysSinceEpoch();
+ if (minInd == maxInd) {
+ highValue = min;
+ } else if (minInd < maxInd) {
+ // right border is the max
+ highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
+ } else {
+ // left border is the max
+ highValue = (long) (min + (max - min) * minInd / (minInd - maxInd));
+ }
+
+ // get the #nulls
+ long numNulls = 0;
+ for (Map.Entry<String, DateColumnStatsData> entry : extractedAdjustedStatsMap.entrySet()) {
+ numNulls += entry.getValue().getNumNulls();
+ }
+ // we scale up sumNulls based on the number of partitions
+ numNulls = numNulls * numParts / numPartsWithStats;
+
+ // get the ndv
+ long ndv = 0;
+ Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+ public int compare(Map.Entry<String, DateColumnStatsData> o1,
+ Map.Entry<String, DateColumnStatsData> o2) {
+ return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+ }
+ });
+ long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
+ long higherBound = 0;
+ for (Map.Entry<String, DateColumnStatsData> entry : list) {
+ higherBound += entry.getValue().getNumDVs();
+ }
+ if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
+ ndv = (long) ((highValue - lowValue) / densityAvg);
+ if (ndv < lowerBound) {
+ ndv = lowerBound;
+ } else if (ndv > higherBound) {
+ ndv = higherBound;
+ }
+ } else {
+ minInd = adjustedIndexMap.get(list.get(0).getKey());
+ maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
+ min = list.get(0).getValue().getNumDVs();
+ max = list.get(list.size() - 1).getValue().getNumDVs();
+ if (minInd == maxInd) {
+ ndv = min;
+ } else if (minInd < maxInd) {
+ // right border is the max
+ ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
+ } else {
+ // left border is the max
+ ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
+ }
+ }
+ extrapolateDateData.setLowValue(new Date(lowValue));
+ extrapolateDateData.setHighValue(new Date(highValue));
+ extrapolateDateData.setNumNulls(numNulls);
+ extrapolateDateData.setNumDVs(ndv);
+ extrapolateData.setDateStats(extrapolateDateData);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
new file mode 100644
index 0000000..2ea2fcc
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class StringColumnStatsAggregator extends ColumnStatsAggregator implements
+ IExtrapolatePartStatus {
+
+ private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class);
+
+ @Override
+ public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
+ List<ColumnStatistics> css) throws MetaException {
+ ColumnStatisticsObj statsObj = null;
+
+ // check if all the ColumnStatisticsObjs contain stats and all the ndv are
+ // bitvectors. Only when both of the conditions are true, we merge bit
+ // vectors. Otherwise, just use the maximum function.
+ boolean doAllPartitionContainStats = partNames.size() == css.size();
+ LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats);
+ NumDistinctValueEstimator ndvEstimator = null;
+ String colType = null;
+ for (ColumnStatistics cs : css) {
+ if (cs.getStatsObjSize() != 1) {
+ throw new MetaException(
+ "The number of columns should be exactly one in aggrStats, but found "
+ + cs.getStatsObjSize());
+ }
+ ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+ if (statsObj == null) {
+ colType = cso.getColType();
+ statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
+ .getStatsData().getSetField());
+ }
+ if (!cso.getStatsData().getStringStats().isSetBitVectors()
+ || cso.getStatsData().getStringStats().getBitVectors().length() == 0) {
+ ndvEstimator = null;
+ break;
+ } else {
+ // check if all of the bit vectors can merge
+ NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors());
+ if (ndvEstimator == null) {
+ ndvEstimator = estimator;
+ } else {
+ if (ndvEstimator.canMerge(estimator)) {
+ continue;
+ } else {
+ ndvEstimator = null;
+ break;
+ }
+ }
+ }
+ }
+ if (ndvEstimator != null) {
+ ndvEstimator = NumDistinctValueEstimatorFactory
+ .getEmptyNumDistinctValueEstimator(ndvEstimator);
+ }
+ LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
+ ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
+ if (doAllPartitionContainStats || css.size() < 2) {
+ StringColumnStatsData aggregateData = null;
+ for (ColumnStatistics cs : css) {
+ ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+ StringColumnStatsData newData = cso.getStatsData().getStringStats();
+ if (ndvEstimator != null) {
+ ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(newData.getBitVectors()));
+ }
+ if (aggregateData == null) {
+ aggregateData = newData.deepCopy();
+ } else {
+ aggregateData
+ .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
+ aggregateData
+ .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+ }
+ }
+ if (ndvEstimator != null) {
+ // if all the ColumnStatisticsObjs contain bitvectors, we do not need to
+ // use uniform distribution assumption because we can merge bitvectors
+ // to get a good estimation.
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ } else {
+ // aggregateData already has the ndv of the max of all
+ }
+ columnStatisticsData.setStringStats(aggregateData);
+ } else {
+ // we need extrapolation
+ LOG.debug("start extrapolation for " + colName);
+
+ Map<String, Integer> indexMap = new HashMap<String, Integer>();
+ for (int index = 0; index < partNames.size(); index++) {
+ indexMap.put(partNames.get(index), index);
+ }
+ Map<String, Double> adjustedIndexMap = new HashMap<String, Double>();
+ Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<String, ColumnStatisticsData>();
+ if (ndvEstimator == null) {
+ // if not every partition uses bitvector for ndv, we just fall back to
+ // the traditional extrapolation methods.
+ for (ColumnStatistics cs : css) {
+ String partName = cs.getStatsDesc().getPartName();
+ ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+ StringColumnStatsData newData = cso.getStatsData().getStringStats();
+ adjustedIndexMap.put(partName, (double) indexMap.get(partName));
+ adjustedStatsMap.put(partName, cso.getStatsData());
+ }
+ } else {
+ // we first merge all the adjacent bitvectors that we could merge and
+ // derive new partition names and index.
+ StringBuilder pseudoPartName = new StringBuilder();
+ double pseudoIndexSum = 0;
+ int length = 0;
+ int curIndex = -1;
+ StringColumnStatsData aggregateData = null;
+ for (ColumnStatistics cs : css) {
+ String partName = cs.getStatsDesc().getPartName();
+ ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+ StringColumnStatsData newData = cso.getStatsData().getStringStats();
+ // newData.isSetBitVectors() should be true for sure because we
+ // already checked it before.
+ if (indexMap.get(partName) != curIndex) {
+ // There is bitvector, but it is not adjacent to the previous ones.
+ if (length > 0) {
+ // we have to set ndv
+ adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length);
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ ColumnStatisticsData csd = new ColumnStatisticsData();
+ csd.setStringStats(aggregateData);
+ adjustedStatsMap.put(pseudoPartName.toString(), csd);
+ // reset everything
+ pseudoPartName = new StringBuilder();
+ pseudoIndexSum = 0;
+ length = 0;
+ ndvEstimator = NumDistinctValueEstimatorFactory
+ .getEmptyNumDistinctValueEstimator(ndvEstimator);
+ }
+ aggregateData = null;
+ }
+ curIndex = indexMap.get(partName);
+ pseudoPartName.append(partName);
+ pseudoIndexSum += curIndex;
+ length++;
+ curIndex++;
+ if (aggregateData == null) {
+ aggregateData = newData.deepCopy();
+ } else {
+ aggregateData.setAvgColLen(Math.min(aggregateData.getAvgColLen(),
+ newData.getAvgColLen()));
+ aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(),
+ newData.getMaxColLen()));
+ aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+ }
+ ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
+ .getNumDistinctValueEstimator(newData.getBitVectors()));
+ }
+ if (length > 0) {
+ // we have to set ndv
+ adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length);
+ aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
+ ColumnStatisticsData csd = new ColumnStatisticsData();
+ csd.setStringStats(aggregateData);
+ adjustedStatsMap.put(pseudoPartName.toString(), csd);
+ }
+ }
+ extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
+ adjustedStatsMap, -1);
+ }
+ LOG.debug("Ndv estimatation for " + colName + " is "
+ + columnStatisticsData.getStringStats().getNumDVs());
+ statsObj.setStatsData(columnStatisticsData);
+ return statsObj;
+ }
+
+ @Override
+ public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
+ int numPartsWithStats, Map<String, Double> adjustedIndexMap,
+ Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
+ int rightBorderInd = numParts;
+ StringColumnStatsData extrapolateStringData = new StringColumnStatsData();
+ Map<String, StringColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
+ for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
+ extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getStringStats());
+ }
+ List<Map.Entry<String, StringColumnStatsData>> list = new LinkedList<Map.Entry<String, StringColumnStatsData>>(
+ extractedAdjustedStatsMap.entrySet());
+ // get the avgLen
+ Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+ public int compare(Map.Entry<String, StringColumnStatsData> o1,
+ Map.Entry<String, StringColumnStatsData> o2) {
+ return o1.getValue().getAvgColLen() < o2.getValue().getAvgColLen() ? -1 : 1;
+ }
+ });
+ double minInd = adjustedIndexMap.get(list.get(0).getKey());
+ double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
+ double avgColLen = 0;
+ double min = list.get(0).getValue().getAvgColLen();
+ double max = list.get(list.size() - 1).getValue().getAvgColLen();
+ if (minInd == maxInd) {
+ avgColLen = min;
+ } else if (minInd < maxInd) {
+ // right border is the max
+ avgColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
+ } else {
+ // left border is the max
+ avgColLen = (min + (max - min) * minInd / (minInd - maxInd));
+ }
+
+ // get the maxLen
+ Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+ public int compare(Map.Entry<String, StringColumnStatsData> o1,
+ Map.Entry<String, StringColumnStatsData> o2) {
+ return o1.getValue().getMaxColLen() < o2.getValue().getMaxColLen() ? -1 : 1;
+ }
+ });
+ minInd = adjustedIndexMap.get(list.get(0).getKey());
+ maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
+ double maxColLen = 0;
+ min = list.get(0).getValue().getAvgColLen();
+ max = list.get(list.size() - 1).getValue().getAvgColLen();
+ if (minInd == maxInd) {
+ maxColLen = min;
+ } else if (minInd < maxInd) {
+ // right border is the max
+ maxColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
+ } else {
+ // left border is the max
+ maxColLen = (min + (max - min) * minInd / (minInd - maxInd));
+ }
+
+ // get the #nulls
+ long numNulls = 0;
+ for (Map.Entry<String, StringColumnStatsData> entry : extractedAdjustedStatsMap.entrySet()) {
+ numNulls += entry.getValue().getNumNulls();
+ }
+ // we scale up sumNulls based on the number of partitions
+ numNulls = numNulls * numParts / numPartsWithStats;
+
+ // get the ndv
+ long ndv = 0;
+ Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+ public int compare(Map.Entry<String, StringColumnStatsData> o1,
+ Map.Entry<String, StringColumnStatsData> o2) {
+ return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+ }
+ });
+ minInd = adjustedIndexMap.get(list.get(0).getKey());
+ maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
+ min = list.get(0).getValue().getNumDVs();
+ max = list.get(list.size() - 1).getValue().getNumDVs();
+ if (minInd == maxInd) {
+ ndv = (long) min;
+ } else if (minInd < maxInd) {
+ // right border is the max
+ ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
+ } else {
+ // left border is the max
+ ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
+ }
+ extrapolateStringData.setAvgColLen(avgColLen);
+ ;
+ extrapolateStringData.setMaxColLen((long) maxColLen);
+ extrapolateStringData.setNumNulls(numNulls);
+ extrapolateStringData.setNumDVs(ndv);
+ extrapolateData.setStringStats(extrapolateStringData);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
index 0e11989..78a962a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java
@@ -32,8 +32,8 @@ import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregator;
-import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregatorFactory;
+import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator;
+import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory;
import java.io.IOException;
import java.security.MessageDigest;
@@ -84,7 +84,10 @@ class StatsCache {
.build(new CacheLoader<StatsCacheKey, AggrStats>() {
@Override
public AggrStats load(StatsCacheKey key) throws Exception {
- boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
+ boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
+ double ndvTuner = HiveConf.getFloatVar(conf,
+ HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER);
HBaseReadWrite hrw = HBaseReadWrite.getInstance();
AggrStats aggrStats = hrw.getAggregatedStats(key.hashed);
if (aggrStats == null) {
@@ -100,7 +103,7 @@ class StatsCache {
if (aggregator == null) {
aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css.iterator()
.next().getStatsObj().iterator().next().getStatsData().getSetField(),
- useDensityFunctionForNDVEstimation);
+ useDensityFunctionForNDVEstimation, ndvTuner);
}
ColumnStatisticsObj statsObj = aggregator
.aggregate(key.colName, key.partNames, css);
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
index d81d612..e6c836b 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats;
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
import java.util.List;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
index e796df2..a34bc9f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats;
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
import java.util.List;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
index 29a0539..a52e5e5 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats;
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
import java.util.List;
@@ -27,7 +27,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
public abstract class ColumnStatsAggregator {
public boolean useDensityFunctionForNDVEstimation;
-
+ public double ndvTuner;
public abstract ColumnStatisticsObj aggregate(String colName, List<String> partNames,
List<ColumnStatistics> css) throws MetaException;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
index 568bf06..173e06f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java
@@ -17,13 +17,14 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats;
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
@@ -34,7 +35,8 @@ public class ColumnStatsAggregatorFactory {
private ColumnStatsAggregatorFactory() {
}
- public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, boolean useDensityFunctionForNDVEstimation) {
+ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type,
+ boolean useDensityFunctionForNDVEstimation, double ndvTuner) {
ColumnStatsAggregator agg;
switch (type) {
case BOOLEAN_STATS:
@@ -43,6 +45,9 @@ public class ColumnStatsAggregatorFactory {
case LONG_STATS:
agg = new LongColumnStatsAggregator();
break;
+ case DATE_STATS:
+ agg = new DateColumnStatsAggregator();
+ break;
case DOUBLE_STATS:
agg = new DoubleColumnStatsAggregator();
break;
@@ -59,6 +64,7 @@ public class ColumnStatsAggregatorFactory {
throw new RuntimeException("Woh, bad. Unknown stats type " + type.toString());
}
agg.useDensityFunctionForNDVEstimation = useDensityFunctionForNDVEstimation;
+ agg.ndvTuner = ndvTuner;
return agg;
}
@@ -76,6 +82,10 @@ public class ColumnStatsAggregatorFactory {
csd.setLongStats(new LongColumnStatsData());
break;
+ case DATE_STATS:
+ csd.setDateStats(new DateColumnStatsData());
+ break;
+
case DOUBLE_STATS:
csd.setDoubleStats(new DoubleColumnStatsData());
break;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
index 8eb64e0..5924c3e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats;
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
import java.util.Collections;
import java.util.Comparator;
@@ -35,9 +35,13 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.hbase.HBaseUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {
+
+ private static final Logger LOG = LoggerFactory.getLogger(DecimalColumnStatsAggregator.class);
@Override
public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
@@ -47,6 +51,7 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
// check if all the ColumnStatisticsObjs contain stats and all the ndv are
// bitvectors
boolean doAllPartitionContainStats = partNames.size() == css.size();
+ LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats);
NumDistinctValueEstimator ndvEstimator = null;
String colType = null;
for (ColumnStatistics cs : css) {
@@ -85,6 +90,7 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
ndvEstimator = NumDistinctValueEstimatorFactory
.getEmptyNumDistinctValueEstimator(ndvEstimator);
}
+ LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
if (doAllPartitionContainStats || css.size() < 2) {
DecimalColumnStatsData aggregateData = null;
@@ -94,12 +100,10 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
for (ColumnStatistics cs : css) {
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
DecimalColumnStatsData newData = cso.getStatsData().getDecimalStats();
- if (useDensityFunctionForNDVEstimation) {
- lowerBound = Math.max(lowerBound, newData.getNumDVs());
- higherBound += newData.getNumDVs();
- densityAvgSum += (HBaseUtils.getDoubleValue(newData.getHighValue()) - HBaseUtils
- .getDoubleValue(newData.getLowValue())) / newData.getNumDVs();
- }
+ lowerBound = Math.max(lowerBound, newData.getNumDVs());
+ higherBound += newData.getNumDVs();
+ densityAvgSum += (HBaseUtils.getDoubleValue(newData.getHighValue()) - HBaseUtils
+ .getDoubleValue(newData.getLowValue())) / newData.getNumDVs();
if (ndvEstimator != null) {
ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
.getNumDistinctValueEstimator(newData.getBitVectors()));
@@ -129,28 +133,27 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
// to get a good estimation.
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
} else {
+ long estimation;
if (useDensityFunctionForNDVEstimation) {
// We have estimation, lowerbound and higherbound. We use estimation
// if it is between lowerbound and higherbound.
double densityAvg = densityAvgSum / partNames.size();
- long estimation = (long) ((HBaseUtils.getDoubleValue(aggregateData.getHighValue()) - HBaseUtils
+ estimation = (long) ((HBaseUtils.getDoubleValue(aggregateData.getHighValue()) - HBaseUtils
.getDoubleValue(aggregateData.getLowValue())) / densityAvg);
if (estimation < lowerBound) {
- aggregateData.setNumDVs(lowerBound);
+ estimation = lowerBound;
} else if (estimation > higherBound) {
- aggregateData.setNumDVs(higherBound);
- } else {
- aggregateData.setNumDVs(estimation);
+ estimation = higherBound;
}
} else {
- // Without useDensityFunctionForNDVEstimation, we just use the
- // default one, which is the max of all the partitions and it is
- // already done.
+ estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
}
+ aggregateData.setNumDVs(estimation);
}
columnStatisticsData.setDecimalStats(aggregateData);
} else {
// we need extrapolation
+ LOG.debug("start extrapolation for " + colName);
Map<String, Integer> indexMap = new HashMap<String, Integer>();
for (int index = 0; index < partNames.size(); index++) {
indexMap.put(partNames.get(index), index);
@@ -251,6 +254,8 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
}
statsObj.setStatsData(columnStatisticsData);
+ LOG.debug("Ndv estimatation for " + colName + " is "
+ + columnStatisticsData.getDecimalStats().getNumDVs());
return statsObj;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
index b6b8612..e55c412 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats;
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
import java.util.Collections;
import java.util.Comparator;
@@ -33,10 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {
+ private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class);
+
@Override
public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
List<ColumnStatistics> css) throws MetaException {
@@ -45,6 +49,7 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
// check if all the ColumnStatisticsObjs contain stats and all the ndv are
// bitvectors
boolean doAllPartitionContainStats = partNames.size() == css.size();
+ LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats);
NumDistinctValueEstimator ndvEstimator = null;
String colType = null;
for (ColumnStatistics cs : css) {
@@ -83,6 +88,7 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
ndvEstimator = NumDistinctValueEstimatorFactory
.getEmptyNumDistinctValueEstimator(ndvEstimator);
}
+ LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
if (doAllPartitionContainStats || css.size() < 2) {
DoubleColumnStatsData aggregateData = null;
@@ -92,11 +98,9 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
for (ColumnStatistics cs : css) {
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats();
- if (useDensityFunctionForNDVEstimation) {
- lowerBound = Math.max(lowerBound, newData.getNumDVs());
- higherBound += newData.getNumDVs();
- densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
- }
+ lowerBound = Math.max(lowerBound, newData.getNumDVs());
+ higherBound += newData.getNumDVs();
+ densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
if (ndvEstimator != null) {
ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
.getNumDistinctValueEstimator(newData.getBitVectors()));
@@ -117,27 +121,26 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
// to get a good estimation.
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
} else {
+ long estimation;
if (useDensityFunctionForNDVEstimation) {
// We have estimation, lowerbound and higherbound. We use estimation
// if it is between lowerbound and higherbound.
double densityAvg = densityAvgSum / partNames.size();
- long estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg);
+ estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg);
if (estimation < lowerBound) {
- aggregateData.setNumDVs(lowerBound);
+ estimation = lowerBound;
} else if (estimation > higherBound) {
- aggregateData.setNumDVs(higherBound);
- } else {
- aggregateData.setNumDVs(estimation);
+ estimation = higherBound;
}
} else {
- // Without useDensityFunctionForNDVEstimation, we just use the
- // default one, which is the max of all the partitions and it is
- // already done.
+ estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
}
+ aggregateData.setNumDVs(estimation);
}
columnStatisticsData.setDoubleStats(aggregateData);
} else {
// we need extrapolation
+ LOG.debug("start extrapolation for " + colName);
Map<String, Integer> indexMap = new HashMap<String, Integer>();
for (int index = 0; index < partNames.size(); index++) {
indexMap.put(partNames.get(index), index);
@@ -225,6 +228,8 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
}
+ LOG.debug("Ndv estimatation for " + colName + " is "
+ + columnStatisticsData.getDoubleStats().getNumDVs());
statsObj.setStatsData(columnStatisticsData);
return statsObj;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java
index af75bce..acf679e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java
@@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats;
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
import java.util.Map;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
index 2da6f60..2ee09f3 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats;
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
import java.util.Collections;
import java.util.Comparator;
@@ -33,10 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {
+ private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class);
+
@Override
public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
List<ColumnStatistics> css) throws MetaException {
@@ -45,6 +49,7 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
// check if all the ColumnStatisticsObjs contain stats and all the ndv are
// bitvectors
boolean doAllPartitionContainStats = partNames.size() == css.size();
+ LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats);
NumDistinctValueEstimator ndvEstimator = null;
String colType = null;
for (ColumnStatistics cs : css) {
@@ -83,6 +88,7 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
ndvEstimator = NumDistinctValueEstimatorFactory
.getEmptyNumDistinctValueEstimator(ndvEstimator);
}
+ LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null));
ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
if (doAllPartitionContainStats || css.size() < 2) {
LongColumnStatsData aggregateData = null;
@@ -92,11 +98,9 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
for (ColumnStatistics cs : css) {
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
LongColumnStatsData newData = cso.getStatsData().getLongStats();
- if (useDensityFunctionForNDVEstimation) {
- lowerBound = Math.max(lowerBound, newData.getNumDVs());
- higherBound += newData.getNumDVs();
- densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
- }
+ lowerBound = Math.max(lowerBound, newData.getNumDVs());
+ higherBound += newData.getNumDVs();
+ densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
if (ndvEstimator != null) {
ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
.getNumDistinctValueEstimator(newData.getBitVectors()));
@@ -117,27 +121,27 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
// to get a good estimation.
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
} else {
+ long estimation;
if (useDensityFunctionForNDVEstimation) {
// We have estimation, lowerbound and higherbound. We use estimation
// if it is between lowerbound and higherbound.
double densityAvg = densityAvgSum / partNames.size();
- long estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg);
+ estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg);
if (estimation < lowerBound) {
- aggregateData.setNumDVs(lowerBound);
+ estimation = lowerBound;
} else if (estimation > higherBound) {
- aggregateData.setNumDVs(higherBound);
- } else {
- aggregateData.setNumDVs(estimation);
+ estimation = higherBound;
}
} else {
- // Without useDensityFunctionForNDVEstimation, we just use the
- // default one, which is the max of all the partitions and it is
- // already done.
+ estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
}
+ aggregateData.setNumDVs(estimation);
}
columnStatisticsData.setLongStats(aggregateData);
} else {
// we need extrapolation
+ LOG.debug("start extrapolation for " + colName);
+
Map<String, Integer> indexMap = new HashMap<String, Integer>();
for (int index = 0; index < partNames.size(); index++) {
indexMap.put(partNames.get(index), index);
@@ -226,6 +230,8 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
}
statsObj.setStatsData(columnStatisticsData);
+ LOG.debug("Ndv estimatation for " + colName + " is "
+ + columnStatisticsData.getLongStats().getNumDVs());
return statsObj;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
deleted file mode 100644
index 83c6c54..0000000
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.hbase.stats;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
-
-public class StringColumnStatsAggregator extends ColumnStatsAggregator {
-
- @Override
- public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
- List<ColumnStatistics> css) throws MetaException {
- ColumnStatisticsObj statsObj = null;
-
- // check if all the ColumnStatisticsObjs contain stats and all the ndv are
- // bitvectors. Only when both of the conditions are true, we merge bit
- // vectors. Otherwise, just use the maximum function.
- boolean doAllPartitionContainStats = partNames.size() == css.size();
- NumDistinctValueEstimator ndvEstimator = null;
- String colType = null;
- for (ColumnStatistics cs : css) {
- if (cs.getStatsObjSize() != 1) {
- throw new MetaException(
- "The number of columns should be exactly one in aggrStats, but found "
- + cs.getStatsObjSize());
- }
- ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- if (statsObj == null) {
- colType = cso.getColType();
- statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso
- .getStatsData().getSetField());
- }
- if (!cso.getStatsData().getStringStats().isSetBitVectors()
- || cso.getStatsData().getStringStats().getBitVectors().length() == 0) {
- ndvEstimator = null;
- break;
- } else {
- // check if all of the bit vectors can merge
- NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors());
- if (ndvEstimator == null) {
- ndvEstimator = estimator;
- } else {
- if (ndvEstimator.canMerge(estimator)) {
- continue;
- } else {
- ndvEstimator = null;
- break;
- }
- }
- }
- }
- if (ndvEstimator != null) {
- ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator);
- }
- ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
- if (doAllPartitionContainStats && ndvEstimator!=null) {
- StringColumnStatsData aggregateData = null;
- for (ColumnStatistics cs : css) {
- ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- StringColumnStatsData newData = cso.getStatsData().getStringStats();
- ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory
- .getNumDistinctValueEstimator(newData.getBitVectors()));
- if (aggregateData == null) {
- aggregateData = newData.deepCopy();
- } else {
- aggregateData
- .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
- aggregateData
- .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
- aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- }
- }
- aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
- columnStatisticsData.setStringStats(aggregateData);
- } else {
- StringColumnStatsData aggregateData = null;
- for (ColumnStatistics cs : css) {
- ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- StringColumnStatsData newData = cso.getStatsData().getStringStats();
- if (aggregateData == null) {
- aggregateData = newData.deepCopy();
- } else {
- aggregateData
- .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
- aggregateData
- .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
- aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
- aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
- }
- }
- columnStatisticsData.setStringStats(aggregateData);
- }
- statsObj.setStatsData(columnStatisticsData);
- return statsObj;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
index af0669e..4c2d1bc 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
index 33ff6a1..8e50153 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
index d3051a2..474d4dd 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.slf4j.Logger;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
index c013ba5..0ce1847 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java
index e899bfe..2542a00 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
index 4099ffc..4e8e129 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
index 1691fc9..4ef5c39 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
index 361af35..acf7f03 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
index 8e28f90..b3cd33c 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+package org.apache.hadoop.hive.metastore.columnstats.merge;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
----------------------------------------------------------------------
diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
index 2967a60..20129bb 100644
--- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
+++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
@@ -48,6 +48,7 @@ public class MPartitionColumnStatistics {
private String decimalHighValue;
private Long numNulls;
private Long numDVs;
+ private byte[] bitVector;
private Double avgColLen;
private Long maxColLen;
private Long numTrues;
@@ -166,31 +167,35 @@ public class MPartitionColumnStatistics {
this.numNulls = numNulls;
}
- public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) {
+ public void setLongStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.longLowValue = lowValue;
this.longHighValue = highValue;
}
- public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) {
+ public void setDoubleStats(Long numNulls, Long numNDVs, byte[] bitVector, Double lowValue, Double highValue) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.doubleLowValue = lowValue;
this.doubleHighValue = highValue;
}
public void setDecimalStats(
- Long numNulls, Long numNDVs, String lowValue, String highValue) {
+ Long numNulls, Long numNDVs, byte[] bitVector, String lowValue, String highValue) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.decimalLowValue = lowValue;
this.decimalHighValue = highValue;
}
- public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) {
+ public void setStringStats(Long numNulls, Long numNDVs, byte[] bitVector, Long maxColLen, Double avgColLen) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.maxColLen = maxColLen;
this.avgColLen = avgColLen;
}
@@ -201,9 +206,10 @@ public class MPartitionColumnStatistics {
this.avgColLen = avgColLen;
}
- public void setDateStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) {
+ public void setDateStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.longLowValue = lowValue;
this.longHighValue = highValue;
}
@@ -255,4 +261,12 @@ public class MPartitionColumnStatistics {
public void setDecimalHighValue(String decimalHighValue) {
this.decimalHighValue = decimalHighValue;
}
+
+ public byte[] getBitVector() {
+ return bitVector;
+ }
+
+ public void setBitVector(byte[] bitVector) {
+ this.bitVector = bitVector;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
----------------------------------------------------------------------
diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
index 132f7a1..6cfaca3 100644
--- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
+++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
@@ -46,6 +46,7 @@ public class MTableColumnStatistics {
private String decimalHighValue;
private Long numNulls;
private Long numDVs;
+ private byte[] bitVector;
private Double avgColLen;
private Long maxColLen;
private Long numTrues;
@@ -156,31 +157,35 @@ public class MTableColumnStatistics {
this.numNulls = numNulls;
}
- public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) {
+ public void setLongStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.longLowValue = lowValue;
this.longHighValue = highValue;
}
- public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) {
+ public void setDoubleStats(Long numNulls, Long numNDVs, byte[] bitVector, Double lowValue, Double highValue) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.doubleLowValue = lowValue;
this.doubleHighValue = highValue;
}
public void setDecimalStats(
- Long numNulls, Long numNDVs, String lowValue, String highValue) {
+ Long numNulls, Long numNDVs, byte[] bitVector, String lowValue, String highValue) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.decimalLowValue = lowValue;
this.decimalHighValue = highValue;
}
- public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) {
+ public void setStringStats(Long numNulls, Long numNDVs, byte[] bitVector, Long maxColLen, Double avgColLen) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.maxColLen = maxColLen;
this.avgColLen = avgColLen;
}
@@ -191,9 +196,10 @@ public class MTableColumnStatistics {
this.avgColLen = avgColLen;
}
- public void setDateStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) {
+ public void setDateStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
+ this.bitVector = bitVector;
this.longLowValue = lowValue;
this.longHighValue = highValue;
}
@@ -246,4 +252,12 @@ public class MTableColumnStatistics {
public void setDecimalHighValue(String decimalHighValue) {
this.decimalHighValue = decimalHighValue;
}
+
+ public byte[] getBitVector() {
+ return bitVector;
+ }
+
+ public void setBitVector(byte[] bitVector) {
+ this.bitVector = bitVector;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/model/package.jdo
----------------------------------------------------------------------
diff --git a/metastore/src/model/package.jdo b/metastore/src/model/package.jdo
index 9c4bc21..570fd44 100644
--- a/metastore/src/model/package.jdo
+++ b/metastore/src/model/package.jdo
@@ -879,6 +879,9 @@
<field name="numDVs">
<column name="NUM_DISTINCTS" jdbc-type="BIGINT" allows-null="true"/>
</field>
+ <field name="bitVector">
+ <column name="BIT_VECTOR" jdbc-type="BLOB" allows-null="true"/>
+ </field>
<field name="avgColLen">
<column name="AVG_COL_LEN" jdbc-type="DOUBLE" allows-null="true"/>
</field>
@@ -943,6 +946,9 @@
<field name="numDVs">
<column name="NUM_DISTINCTS" jdbc-type="BIGINT" allows-null="true"/>
</field>
+ <field name="bitVector">
+ <column name="BIT_VECTOR" jdbc-type="BLOB" allows-null="true"/>
+ </field>
<field name="avgColLen">
<column name="AVG_COL_LEN" jdbc-type="DOUBLE" allows-null="true"/>
</field>