You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/10/31 21:15:03 UTC

svn commit: r1635852 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ test/org/apache/hadoop/hive/ql/exec/vector/ test/queries/clientpositive/ test/results/clientpositive/

Author: gunther
Date: Fri Oct 31 20:15:02 2014
New Revision: 1635852

URL: http://svn.apache.org/r1635852
Log:
HIVE-8498: Insert into table misses some rows when vectorization is enabled (Jitendra Nath Pandey, reviewed by Matt McCline, Gopal V and Prasanth J)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q
    hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java?rev=1635852&r1=1635851&r2=1635852&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java Fri Oct 31 20:15:02 2014
@@ -38,6 +38,9 @@ public class VectorFilterOperator extend
 
   private VectorExpression conditionEvaluator = null;
 
+  // Temporary selected vector
+  private int[] temporarySelected = new int [VectorizedRowBatch.DEFAULT_SIZE];
+
   // filterMode is 1 if condition is always true, -1 if always false
   // and 0 if condition needs to be computed.
   transient private int filterMode = 0;
@@ -82,8 +85,16 @@ public class VectorFilterOperator extend
   public void processOp(Object row, int tag) throws HiveException {
 
     VectorizedRowBatch vrg = (VectorizedRowBatch) row;
-    //Evaluate the predicate expression
+
     //The selected vector represents selected rows.
+    //Clone the selected vector
+    System.arraycopy(vrg.selected, 0, temporarySelected, 0, vrg.size);
+    int [] selectedBackup = vrg.selected;
+    vrg.selected = temporarySelected;
+    int sizeBackup = vrg.size;
+    boolean selectedInUseBackup = vrg.selectedInUse;
+
+    //Evaluate the predicate expression
     switch (filterMode) {
       case 0:
         conditionEvaluator.evaluate(vrg);
@@ -99,6 +110,11 @@ public class VectorFilterOperator extend
     if (vrg.size > 0) {
       forward(vrg, null);
     }
+
+    // Restore the original selected vector
+    vrg.selected = selectedBackup;
+    vrg.size = sizeBackup;
+    vrg.selectedInUse = selectedInUseBackup;
   }
 
   static public String getOperatorName() {

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java?rev=1635852&r1=1635851&r2=1635852&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java Fri Oct 31 20:15:02 2014
@@ -106,7 +106,7 @@ public class TestVectorFilterOperator {
 
     VectorizedRowBatch vrg = fdr.getNext();
 
-    vfo.processOp(vrg, 0);
+    vfo.getConditionEvaluator().evaluate(vrg);
 
     //Verify
     int rows = 0;

Added: hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q?rev=1635852&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vector_multi_insert.q Fri Oct 31 20:15:02 2014
@@ -0,0 +1,34 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=minimal;
+
+create table orc1
+  stored as orc
+  tblproperties("orc.compress"="ZLIB")
+  as
+    select rn
+    from
+    (
+      select cast(1 as int) as rn from src limit 1
+      union all
+      select cast(100 as int) as rn from src limit 1
+      union all
+      select cast(10000 as int) as rn from src limit 1
+    ) t;
+
+create table orc_rn1 (rn int);
+create table orc_rn2 (rn int);
+create table orc_rn3 (rn int);
+
+explain from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000;
+
+from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000;
+
+select * from orc_rn1;
+select * from orc_rn2;
+select * from orc_rn3;

Added: hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out?rev=1635852&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vector_multi_insert.q.out Fri Oct 31 20:15:02 2014
@@ -0,0 +1,350 @@
+PREHOOK: query: create table orc1
+  stored as orc
+  tblproperties("orc.compress"="ZLIB")
+  as
+    select rn
+    from
+    (
+      select cast(1 as int) as rn from src limit 1
+      union all
+      select cast(100 as int) as rn from src limit 1
+      union all
+      select cast(10000 as int) as rn from src limit 1
+    ) t
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc1
+POSTHOOK: query: create table orc1
+  stored as orc
+  tblproperties("orc.compress"="ZLIB")
+  as
+    select rn
+    from
+    (
+      select cast(1 as int) as rn from src limit 1
+      union all
+      select cast(100 as int) as rn from src limit 1
+      union all
+      select cast(10000 as int) as rn from src limit 1
+    ) t
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc1
+PREHOOK: query: create table orc_rn1 (rn int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_rn1
+POSTHOOK: query: create table orc_rn1 (rn int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_rn1
+PREHOOK: query: create table orc_rn2 (rn int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_rn2
+POSTHOOK: query: create table orc_rn2 (rn int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_rn2
+PREHOOK: query: create table orc_rn3 (rn int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_rn3
+POSTHOOK: query: create table orc_rn3 (rn int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_rn3
+PREHOOK: query: explain from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000
+PREHOOK: type: QUERY
+POSTHOOK: query: explain from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7
+  Stage-6
+  Stage-0 depends on stages: Stage-6, Stage-5, Stage-8
+  Stage-4 depends on stages: Stage-0
+  Stage-5
+  Stage-7
+  Stage-8 depends on stages: Stage-7
+  Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13
+  Stage-12
+  Stage-1 depends on stages: Stage-12, Stage-11, Stage-14
+  Stage-10 depends on stages: Stage-1
+  Stage-11
+  Stage-13
+  Stage-14 depends on stages: Stage-13
+  Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19
+  Stage-18
+  Stage-2 depends on stages: Stage-18, Stage-17, Stage-20
+  Stage-16 depends on stages: Stage-2
+  Stage-17
+  Stage-19
+  Stage-20 depends on stages: Stage-19
+
+STAGE PLANS:
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (rn < 100) (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: rn (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.orc_rn1
+            Filter Operator
+              predicate: ((rn >= 100) and (rn < 1000)) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Select Operator
+                expressions: rn (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.orc_rn2
+            Filter Operator
+              predicate: (rn >= 1000) (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: rn (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.orc_rn3
+      Execution mode: vectorized
+
+  Stage: Stage-9
+    Conditional Operator
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.orc_rn1
+
+  Stage: Stage-4
+    Stats-Aggr Operator
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.orc_rn1
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.orc_rn1
+
+  Stage: Stage-8
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-15
+    Conditional Operator
+
+  Stage: Stage-12
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.orc_rn2
+
+  Stage: Stage-10
+    Stats-Aggr Operator
+
+  Stage: Stage-11
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.orc_rn2
+
+  Stage: Stage-13
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.orc_rn2
+
+  Stage: Stage-14
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-21
+    Conditional Operator
+
+  Stage: Stage-18
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-2
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.orc_rn3
+
+  Stage: Stage-16
+    Stats-Aggr Operator
+
+  Stage: Stage-17
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.orc_rn3
+
+  Stage: Stage-19
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.orc_rn3
+
+  Stage: Stage-20
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc1
+PREHOOK: Output: default@orc_rn1
+PREHOOK: Output: default@orc_rn2
+PREHOOK: Output: default@orc_rn3
+POSTHOOK: query: from orc1 a
+insert overwrite table orc_rn1 select a.* where a.rn < 100
+insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
+insert overwrite table orc_rn3 select a.* where a.rn >= 1000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc1
+POSTHOOK: Output: default@orc_rn1
+POSTHOOK: Output: default@orc_rn2
+POSTHOOK: Output: default@orc_rn3
+POSTHOOK: Lineage: orc_rn1.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_rn2.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_rn3.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ]
+PREHOOK: query: select * from orc_rn1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_rn1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_rn1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_rn1
+#### A masked pattern was here ####
+1
+PREHOOK: query: select * from orc_rn2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_rn2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_rn2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_rn2
+#### A masked pattern was here ####
+100
+PREHOOK: query: select * from orc_rn3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_rn3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_rn3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_rn3
+#### A masked pattern was here ####
+10000