You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/03/22 10:31:13 UTC
hive git commit: HIVE-13313: TABLESAMPLE ROWS feature broken for
Vectorization (Matt McCline, Sergey Shelukhin)
Repository: hive
Updated Branches:
refs/heads/master d3d5d33c3 -> bcb7d9e13
HIVE-13313: TABLESAMPLE ROWS feature broken for Vectorization (Matt McCline, Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bcb7d9e1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bcb7d9e1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bcb7d9e1
Branch: refs/heads/master
Commit: bcb7d9e1399f461cbd54945321465c0b812c6d16
Parents: d3d5d33
Author: Matt McCline <mm...@hortonworks.com>
Authored: Tue Mar 22 02:30:58 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Tue Mar 22 02:30:58 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/TableScanOperator.java | 19 +-
.../clientpositive/vector_tablesample_rows.q | 38 ++
.../vector_tablesample_rows.q.out | 371 +++++++++++++++++++
3 files changed, 425 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bcb7d9e1/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
index 1b3cc82..5f2a0c2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -107,9 +108,21 @@ public class TableScanOperator extends Operator<TableScanDesc> implements
**/
@Override
public void process(Object row, int tag) throws HiveException {
- if (rowLimit >= 0 && currCount++ >= rowLimit) {
- setDone(true);
- return;
+ if (rowLimit >= 0) {
+ if (row instanceof VectorizedRowBatch) {
+ VectorizedRowBatch batch = (VectorizedRowBatch) row;
+ if (currCount >= rowLimit) {
+ setDone(true);
+ return;
+ }
+ if (currCount + batch.size > rowLimit) {
+ batch.size = rowLimit - currCount;
+ }
+ currCount += batch.size;
+ } else if (currCount++ >= rowLimit) {
+ setDone(true);
+ return;
+ }
}
if (conf != null && conf.isGatherStats()) {
gatherStats(row);
http://git-wip-us.apache.org/repos/asf/hive/blob/bcb7d9e1/ql/src/test/queries/clientpositive/vector_tablesample_rows.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_tablesample_rows.q b/ql/src/test/queries/clientpositive/vector_tablesample_rows.q
new file mode 100644
index 0000000..4deb1c8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_tablesample_rows.q
@@ -0,0 +1,38 @@
+set hive.cli.print.header=true;
+set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+set hive.mapred.mode=nonstrict;
+
+explain
+select 'key1', 'value1' from alltypesorc tablesample (1 rows);
+
+select 'key1', 'value1' from alltypesorc tablesample (1 rows);
+
+
+create table decimal_2 (t decimal(18,9)) stored as orc;
+
+explain
+insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows);
+
+insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows);
+
+select count(*) from decimal_2;
+
+drop table decimal_2;
+
+
+-- Dummy tables HIVE-13190
+explain
+select count(1) from (select * from (Select 1 a) x order by x.a) y;
+
+select count(1) from (select * from (Select 1 a) x order by x.a) y;
+
+explain
+create temporary table dual as select 1;
+
+create temporary table dual as select 1;
+
+select * from dual;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/bcb7d9e1/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
new file mode 100644
index 0000000..25f2996
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
@@ -0,0 +1,371 @@
+PREHOOK: query: explain
+select 'key1', 'value1' from alltypesorc tablesample (1 rows)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 'key1', 'value1' from alltypesorc tablesample (1 rows)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Row Limit Per Split: 1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'key1' (type: string), 'value1' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+_c0 _c1
+key1 value1
+PREHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@decimal_2
+POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@decimal_2
+PREHOOK: query: explain
+insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Row Limit Per Split: 1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 17.29 (type: decimal(18,9))
+ outputColumnNames: _col0
+ Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.decimal_2
+ Execution mode: vectorized
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.decimal_2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-5
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@decimal_2
+POSTHOOK: query: insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@decimal_2
+POSTHOOK: Lineage: decimal_2.t EXPRESSION []
+_col0
+PREHOOK: query: select count(*) from decimal_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from decimal_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_2
+#### A masked pattern was here ####
+c0
+1
+PREHOOK: query: drop table decimal_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@decimal_2
+PREHOOK: Output: default@decimal_2
+POSTHOOK: query: drop table decimal_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@decimal_2
+POSTHOOK: Output: default@decimal_2
+PREHOOK: query: -- Dummy tables HIVE-13190
+explain
+select count(1) from (select * from (Select 1 a) x order by x.a) y
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Dummy tables HIVE-13190
+explain
+select count(1) from (select * from (Select 1 a) x order by x.a) y
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 1 (type: int)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+_c0
+1
+PREHOOK: query: explain
+create temporary table dual as select 1
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: explain
+create temporary table dual as select 1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-8 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-8
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dual
+ Execution mode: vectorized
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-8
+ Create Table Operator:
+ Create Table
+ columns: _c0 int
+ input format: org.apache.hadoop.mapred.TextInputFormat
+#### A masked pattern was here ####
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dual
+ isTemporary: true
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dual
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dual
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create temporary table dual as select 1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dual
+POSTHOOK: query: create temporary table dual as select 1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dual
+_c0
+PREHOOK: query: select * from dual
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dual
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dual
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dual
+#### A masked pattern was here ####
+dual._c0
+1