You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2017/12/20 10:40:03 UTC

[02/37] hive git commit: HIVE-18149: Stats: rownum estimation from datasize underestimates in most cases (Zoltan Haindrich, reviewed by Ashutosh Chauhan)

http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_bucket.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_bucket.q.out b/ql/src/test/results/clientpositive/vector_bucket.q.out
index 137a7ad..34977a0 100644
--- a/ql/src/test/results/clientpositive/vector_bucket.q.out
+++ b/ql/src/test/results/clientpositive/vector_bucket.q.out
@@ -27,7 +27,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: values__tmp__table__1
-            Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
             TableScan Vectorization:
                 native: true
             Select Operator
@@ -37,7 +37,7 @@ STAGE PLANS:
                   className: VectorSelectOperator
                   native: true
                   projectedOutputColumnNums: [0, 1]
-              Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
               Reduce Output Operator
                 sort order: 
                 Map-reduce partition columns: UDFToInteger(_col0) (type: int)
@@ -46,7 +46,7 @@ STAGE PLANS:
                     native: false
                     nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
-                Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
                 value expressions: _col0 (type: string), _col1 (type: string)
       Execution mode: vectorized
       Map Vectorization:
@@ -66,10 +66,10 @@ STAGE PLANS:
         Select Operator
           expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out b/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out
index bc4904d..bc5208c 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_10_0.q.out
@@ -143,7 +143,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: decimal_txt
-            Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
             TableScan Vectorization:
                 native: true
                 vectorizationSchemaColumns: [0:dec:decimal(10,0)/DECIMAL_64, 1:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
@@ -154,7 +154,7 @@ STAGE PLANS:
                   className: VectorSelectOperator
                   native: true
                   projectedOutputColumnNums: [0]
-              Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
               Reduce Output Operator
                 key expressions: _col0 (type: decimal(10,0))
                 sort order: +
@@ -163,7 +163,7 @@ STAGE PLANS:
                     native: false
                     nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
-                Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
@@ -188,10 +188,10 @@ STAGE PLANS:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: decimal(10,0))
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
index 1e26d29..e783650 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
@@ -1148,7 +1148,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: decimal_precision_txt_small
-            Statistics: Num rows: 1 Data size: 2661 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 26610 Basic stats: COMPLETE Column stats: NONE
             TableScan Vectorization:
                 native: true
                 vectorizationSchemaColumns: [0:dec:decimal(20,10), 1:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
@@ -1159,7 +1159,7 @@ STAGE PLANS:
                   className: VectorSelectOperator
                   native: true
                   projectedOutputColumnNums: [0]
-              Statistics: Num rows: 1 Data size: 2661 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 26610 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: avg(dec), sum(dec)
                 Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
index 69c4a40..541adfb 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
@@ -254,7 +254,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: decimal_udf2_txt
-            Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
             TableScan Vectorization:
                 native: true
                 vectorizationSchemaColumns: [0:key:decimal(14,5)/DECIMAL_64, 1:value:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
@@ -264,7 +264,7 @@ STAGE PLANS:
                   native: true
                   predicateExpression: FilterDecimal64ColEqualDecimal64Scalar(col 0:decimal(14,5)/DECIMAL_64, val 1000000)
               predicate: (key = 10) (type: boolean)
-              Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: null (type: double), null (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -273,13 +273,13 @@ STAGE PLANS:
                     native: true
                     projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9]
                     selectExpressions: ConstantVectorExpression(val null) -> 3:double, ConstantVectorExpression(val null) -> 4:double, ConstantVectorExpression(val 1.4711276743037347) -> 5:double, ConstantVectorExpression(val -0.8390715290764524) -> 6:double, ConstantVectorExpression(val -0.5440211108893698) -> 7:double, ConstantVectorExpression(val 0.6483608274590866) -> 8:double, ConstantVectorExpression(val 0.17453292519943295) -> 9:double
-                Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -346,7 +346,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: decimal_udf2_txt
-            Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
             TableScan Vectorization:
                 native: true
                 vectorizationSchemaColumns: [0:key:decimal(14,5)/DECIMAL_64, 1:value:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
@@ -356,7 +356,7 @@ STAGE PLANS:
                   native: true
                   predicateExpression: FilterDecimal64ColEqualDecimal64Scalar(col 0:decimal(14,5)/DECIMAL_64, val 1000000)
               predicate: (key = 10) (type: boolean)
-              Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -365,13 +365,13 @@ STAGE PLANS:
                     native: true
                     projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10]
                     selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 2.302585092994046) -> 5:double, ConstantVectorExpression(val 1.0) -> 6:double, FuncLogWithBaseLongToDouble(col 1:double) -> 7:double, VectorUDFAdaptor(log(value, 10)) -> 8:double, ConstantVectorExpression(val 1.0) -> 9:double, ConstantVectorExpression(val 3.1622776601683795) -> 10:double
-                Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 1 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_gather_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_gather_stats.q.out b/ql/src/test/results/clientpositive/vector_gather_stats.q.out
index cf9bc85..9675a8b 100644
--- a/ql/src/test/results/clientpositive/vector_gather_stats.q.out
+++ b/ql/src/test/results/clientpositive/vector_gather_stats.q.out
@@ -75,7 +75,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: cd
-            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
index eaa4031..9432a66 100644
--- a/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
+++ b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
           TableScan
             alias: _dummy_table
             Row Limit Per Split: 1
-            Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
               HashTable Sink Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
index d7c34ac..3280236 100644
--- a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
+++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
@@ -250,9 +250,9 @@ STAGE PLANS:
           TableScan
             alias: _dummy_table
             Row Limit Per Split: 1
-            Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
-              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
                 aggregations: count()
                 mode: hash
@@ -328,7 +328,7 @@ STAGE PLANS:
           TableScan
             alias: _dummy_table
             Row Limit Per Split: 1
-            Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: 1 (type: int)
               outputColumnNames: _col0

http://git-wip-us.apache.org/repos/asf/hive/blob/e26b9325/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out b/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
index cc97d03..8ed69a4 100644
--- a/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
@@ -464,9 +464,9 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: parquet_nullsplit
-            Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
             Select Operator
-              Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count()
                 mode: hash