You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2017/12/20 10:40:03 UTC
[02/37] hive git commit: HIVE-18149: Stats: rownum estimation from
datasize underestimates in most cases (Zoltan Haindrich,
reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_bucket.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_bucket.q.out b/ql/src/test/results/clientpositive/vector_bucket.q.out
index 137a7ad..34977a0 100644
--- a/ql/src/test/results/clientpositive/vector_bucket.q.out
+++ b/ql/src/test/results/clientpositive/vector_bucket.q.out
@@ -27,7 +27,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: values__tmp__table__1
- Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -37,7 +37,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
@@ -46,7 +46,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string)
Execution mode: vectorized
Map Vectorization:
@@ -66,10 +66,10 @@ STAGE PLANS:
Select Operator
expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out b/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out
index bc4904d..bc5208c 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out
@@ -143,7 +143,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: decimal_txt
- Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:dec:decimal(10,0)/DECIMAL_64, 1:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
@@ -154,7 +154,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: decimal(10,0))
sort order: +
@@ -163,7 +163,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -188,10 +188,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: decimal(10,0))
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
index 1e26d29..e783650 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
@@ -1148,7 +1148,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: decimal_precision_txt_small
- Statistics: Num rows: 1 Data size: 2661 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 26610 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:dec:decimal(20,10), 1:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
@@ -1159,7 +1159,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 1 Data size: 2661 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 26610 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: avg(dec), sum(dec)
Group By Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
index 69c4a40..541adfb 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
@@ -254,7 +254,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: decimal_udf2_txt
- Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:key:decimal(14,5)/DECIMAL_64, 1:value:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
@@ -264,7 +264,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterDecimal64ColEqualDecimal64Scalar(col 0:decimal(14,5)/DECIMAL_64, val 1000000)
predicate: (key = 10) (type: boolean)
- Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: null (type: double), null (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -273,13 +273,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9]
selectExpressions: ConstantVectorExpression(val null) -> 3:double, ConstantVectorExpression(val null) -> 4:double, ConstantVectorExpression(val 1.4711276743037347) -> 5:double, ConstantVectorExpression(val -0.8390715290764524) -> 6:double, ConstantVectorExpression(val -0.5440211108893698) -> 7:double, ConstantVectorExpression(val 0.6483608274590866) -> 8:double, ConstantVectorExpression(val 0.17453292519943295) -> 9:double
- Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -346,7 +346,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: decimal_udf2_txt
- Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:key:decimal(14,5)/DECIMAL_64, 1:value:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
@@ -356,7 +356,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterDecimal64ColEqualDecimal64Scalar(col 0:decimal(14,5)/DECIMAL_64, val 1000000)
predicate: (key = 10) (type: boolean)
- Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -365,13 +365,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10]
selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 2.302585092994046) -> 5:double, ConstantVectorExpression(val 1.0) -> 6:double, FuncLogWithBaseLongToDouble(col 1:double) -> 7:double, VectorUDFAdaptor(log(value, 10)) -> 8:double, ConstantVectorExpression(val 1.0) -> 9:double, ConstantVectorExpression(val 3.1622776601683795) -> 10:double
- Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_gather_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_gather_stats.q.out b/ql/src/test/results/clientpositive/vector_gather_stats.q.out
index cf9bc85..9675a8b 100644
--- a/ql/src/test/results/clientpositive/vector_gather_stats.q.out
+++ b/ql/src/test/results/clientpositive/vector_gather_stats.q.out
@@ -75,7 +75,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: cd
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
index eaa4031..9432a66 100644
--- a/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
+++ b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
TableScan
alias: _dummy_table
Row Limit Per Split: 1
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
HashTable Sink Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
index d7c34ac..3280236 100644
--- a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
+++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
@@ -250,9 +250,9 @@ STAGE PLANS:
TableScan
alias: _dummy_table
Row Limit Per Split: 1
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -328,7 +328,7 @@ STAGE PLANS:
TableScan
alias: _dummy_table
Row Limit Per Split: 1
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 1 (type: int)
outputColumnNames: _col0
http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out b/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
index cc97d03..8ed69a4 100644
--- a/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
@@ -464,9 +464,9 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: parquet_nullsplit
- Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash