You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/10/31 21:15:03 UTC
svn commit: r1635852 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
test/org/apache/hadoop/hive/ql/exec/vector/ test/queries/clientpositive/
test/results/clientpositive/
Author: gunther
Date: Fri Oct 31 20:15:02 2014
New Revision: 1635852
URL: http://svn.apache.org/r1635852
Log:
HIVE-8498: Insert into table misses some rows when vectorization is enabled (Jitendra Nath Pandey, reviewed by Matt McCline, Gopal V and Prasanth J)
Added:
hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q
hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java?rev=1635852&r1=1635851&r2=1635852&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java Fri Oct 31 20:15:02 2014
@@ -38,6 +38,9 @@ public class VectorFilterOperator extend
private VectorExpression conditionEvaluator = null;
+ // Temporary selected vector
+ private int[] temporarySelected = new int [VectorizedRowBatch.DEFAULT_SIZE];
+
// filterMode is 1 if condition is always true, -1 if always false
// and 0 if condition needs to be computed.
transient private int filterMode = 0;
@@ -82,8 +85,16 @@ public class VectorFilterOperator extend
public void processOp(Object row, int tag) throws HiveException {
VectorizedRowBatch vrg = (VectorizedRowBatch) row;
- //Evaluate the predicate expression
+
//The selected vector represents selected rows.
+ //Clone the selected vector
+ System.arraycopy(vrg.selected, 0, temporarySelected, 0, vrg.size);
+ int [] selectedBackup = vrg.selected;
+ vrg.selected = temporarySelected;
+ int sizeBackup = vrg.size;
+ boolean selectedInUseBackup = vrg.selectedInUse;
+
+ //Evaluate the predicate expression
switch (filterMode) {
case 0:
conditionEvaluator.evaluate(vrg);
@@ -99,6 +110,11 @@ public class VectorFilterOperator extend
if (vrg.size > 0) {
forward(vrg, null);
}
+
+ // Restore the original selected vector
+ vrg.selected = selectedBackup;
+ vrg.size = sizeBackup;
+ vrg.selectedInUse = selectedInUseBackup;
}
static public String getOperatorName() {
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java?rev=1635852&r1=1635851&r2=1635852&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java Fri Oct 31 20:15:02 2014
@@ -106,7 +106,7 @@ public class TestVectorFilterOperator {
VectorizedRowBatch vrg = fdr.getNext();
- vfo.processOp(vrg, 0);
+ vfo.getConditionEvaluator().evaluate(vrg);
//Verify
int rows = 0;
Added: hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q?rev=1635852&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q Fri Oct 31 20:15:02 2014
@@ -0,0 +1,34 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=minimal;
+
+create table orc1
+ stored as orc
+ tblproperties("orc.compress"="ZLIB")
+ as
+ select rn
+ from
+ (
+ select cast(1 as int) as rn from src limit 1
+ union all
+ select cast(100 as int) as rn from src limit 1
+ union all
+ select cast(10000 as int) as rn from src limit 1
+ ) t;
+
+create table orc_rn1 (rn int);
+create table orc_rn2 (rn int);
+create table orc_rn3 (rn int);
+
+explain from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000;
+
+from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000;
+
+select * from orc_rn1;
+select * from orc_rn2;
+select * from orc_rn3;
Added: hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out?rev=1635852&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out Fri Oct 31 20:15:02 2014
@@ -0,0 +1,350 @@
+PREHOOK: query: create table orc1
+ stored as orc
+ tblproperties("orc.compress"="ZLIB")
+ as
+ select rn
+ from
+ (
+ select cast(1 as int) as rn from src limit 1
+ union all
+ select cast(100 as int) as rn from src limit 1
+ union all
+ select cast(10000 as int) as rn from src limit 1
+ ) t
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc1
+POSTHOOK: query: create table orc1
+ stored as orc
+ tblproperties("orc.compress"="ZLIB")
+ as
+ select rn
+ from
+ (
+ select cast(1 as int) as rn from src limit 1
+ union all
+ select cast(100 as int) as rn from src limit 1
+ union all
+ select cast(10000 as int) as rn from src limit 1
+ ) t
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc1
+PREHOOK: query: create table orc_rn1 (rn int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_rn1
+POSTHOOK: query: create table orc_rn1 (rn int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_rn1
+PREHOOK: query: create table orc_rn2 (rn int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_rn2
+POSTHOOK: query: create table orc_rn2 (rn int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_rn2
+PREHOOK: query: create table orc_rn3 (rn int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_rn3
+POSTHOOK: query: create table orc_rn3 (rn int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_rn3
+PREHOOK: query: explain from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000
+PREHOOK: type: QUERY
+POSTHOOK: query: explain from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7
+ Stage-6
+ Stage-0 depends on stages: Stage-6, Stage-5, Stage-8
+ Stage-4 depends on stages: Stage-0
+ Stage-5
+ Stage-7
+ Stage-8 depends on stages: Stage-7
+ Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13
+ Stage-12
+ Stage-1 depends on stages: Stage-12, Stage-11, Stage-14
+ Stage-10 depends on stages: Stage-1
+ Stage-11
+ Stage-13
+ Stage-14 depends on stages: Stage-13
+ Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19
+ Stage-18
+ Stage-2 depends on stages: Stage-18, Stage-17, Stage-20
+ Stage-16 depends on stages: Stage-2
+ Stage-17
+ Stage-19
+ Stage-20 depends on stages: Stage-19
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (rn < 100) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rn (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn1
+ Filter Operator
+ predicate: ((rn >= 100) and (rn < 1000)) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: rn (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn2
+ Filter Operator
+ predicate: (rn >= 1000) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rn (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn3
+ Execution mode: vectorized
+
+ Stage: Stage-9
+ Conditional Operator
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn1
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn1
+
+ Stage: Stage-7
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn1
+
+ Stage: Stage-8
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-15
+ Conditional Operator
+
+ Stage: Stage-12
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn2
+
+ Stage: Stage-10
+ Stats-Aggr Operator
+
+ Stage: Stage-11
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn2
+
+ Stage: Stage-13
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn2
+
+ Stage: Stage-14
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-21
+ Conditional Operator
+
+ Stage: Stage-18
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn3
+
+ Stage: Stage-16
+ Stats-Aggr Operator
+
+ Stage: Stage-17
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn3
+
+ Stage: Stage-19
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orc_rn3
+
+ Stage: Stage-20
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc1
+PREHOOK: Output: default@orc_rn1
+PREHOOK: Output: default@orc_rn2
+PREHOOK: Output: default@orc_rn3
+POSTHOOK: query: from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc1
+POSTHOOK: Output: default@orc_rn1
+POSTHOOK: Output: default@orc_rn2
+POSTHOOK: Output: default@orc_rn3
+POSTHOOK: Lineage: orc_rn1.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_rn2.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_rn3.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ]
+PREHOOK: query: select * from orc_rn1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_rn1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_rn1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_rn1
+#### A masked pattern was here ####
+1
+PREHOOK: query: select * from orc_rn2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_rn2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_rn2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_rn2
+#### A masked pattern was here ####
+100
+PREHOOK: query: select * from orc_rn3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_rn3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_rn3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_rn3
+#### A masked pattern was here ####
+10000