You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ji...@apache.org on 2014/04/07 20:48:23 UTC

svn commit: r1585557 - in /hive/branches/branch-0.13/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java test/queries/clientpositive/vector_non_string_partition.q test/results/clientpositive/vector_non_string_partition.q.out

Author: jitendra
Date: Mon Apr  7 18:48:23 2014
New Revision: 1585557

URL: http://svn.apache.org/r1585557
Log:
HIVE-6841: Vectorized execution throws NPE for partitioning columns with __HIVE_DEFAULT_PARTITION__ (reviewd by Hari, Ashutosh)

Added:
    hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vector_non_string_partition.q
    hive/branches/branch-0.13/ql/src/test/results/clientpositive/vector_non_string_partition.q.out
Modified:
    hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java

Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1585557&r1=1585556&r2=1585557&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Mon Apr  7 18:48:23 2014
@@ -29,6 +29,8 @@ import java.util.Properties;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.type.Decimal128;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -63,7 +65,9 @@ import org.apache.hadoop.mapred.FileSpli
  * with the partition column.
  */
 public class VectorizedRowBatchCtx {
-  
+
+  private static final Log LOG = LogFactory.getLog(VectorizedRowBatchCtx.class.getName());
+
   // OI for raw row data (EG without partition cols)
   private StructObjectInspector rawRowOI;
 
@@ -223,6 +227,9 @@ public class VectorizedRowBatchCtx {
                   convert(partSpec.get(key));              
           partitionTypes.put(key, TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]).getPrimitiveCategory());
         }
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Partition column: name: " + key + ", value: " + objectVal + ", type: " + partitionTypes.get(key));
+        }
         partitionValues.put(key, objectVal);
         partObjectInspectors.add(objectInspector);
       }
@@ -263,7 +270,7 @@ public class VectorizedRowBatchCtx {
       // in the included list.
       if ((colsToInclude == null) || colsToInclude.contains(j)
           || ((partitionValues != null) &&
-              (partitionValues.get(fieldRefs.get(j).getFieldName()) != null))) {
+              partitionValues.containsKey(fieldRefs.get(j).getFieldName()))) {
         ObjectInspector foi = fieldRefs.get(j).getFieldObjectInspector();
         switch (foi.getCategory()) {
         case PRIMITIVE: {

Added: hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vector_non_string_partition.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vector_non_string_partition.q?rev=1585557&view=auto
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vector_non_string_partition.q (added)
+++ hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vector_non_string_partition.q Mon Apr  7 18:48:23 2014
@@ -0,0 +1,17 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC;
+SET hive.exec.dynamic.partition.mode=nonstrict;
+SET hive.exec.dynamic.partition=true;
+
+INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc 
+WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble;
+
+SHOW PARTITIONS non_string_part;
+
+EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10;
+
+SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10;
+
+EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10;
+
+SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10;

Added: hive/branches/branch-0.13/ql/src/test/results/clientpositive/vector_non_string_partition.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/results/clientpositive/vector_non_string_partition.q.out?rev=1585557&view=auto
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/results/clientpositive/vector_non_string_partition.q.out (added)
+++ hive/branches/branch-0.13/ql/src/test/results/clientpositive/vector_non_string_partition.q.out Mon Apr  7 18:48:23 2014
@@ -0,0 +1,180 @@
+PREHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@non_string_part
+PREHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc 
+WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@non_string_part
+POSTHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc 
+WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+PREHOOK: query: SHOW PARTITIONS non_string_part
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@non_string_part
+POSTHOOK: query: SHOW PARTITIONS non_string_part
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@non_string_part
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+ctinyint=__HIVE_DEFAULT_PARTITION__
+PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: non_string_part
+            Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (cint > 0) (type: boolean)
+              Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: cint (type: int), ctinyint (type: tinyint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int), _col1 (type: tinyint)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Extract
+          Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+
+PREHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@non_string_part
+PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@non_string_part
+POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+762	NULL
+762	NULL
+6981	NULL
+6981	NULL
+6981	NULL
+86028	NULL
+504142	NULL
+799471	NULL
+1248059	NULL
+1286921	NULL
+PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: non_string_part
+            Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (cint > 0) (type: boolean)
+              Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: cint (type: int), cstring1 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: string)
+                  sort order: ++
+                  Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int), _col1 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Extract
+          Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+
+PREHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@non_string_part
+PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@non_string_part
+POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+762	3WsVeqb28VWEEOLI8ail
+762	40ks5556SV
+6981	1FNNhmiFLGw425NA13g
+6981	o5mb0QP5Y48Qd4vdB0
+6981	sF2CRfgt2K
+86028	T2o8XRFAL0HC4ikDQnfoCymw
+504142	PlOxor04p5cvVl
+799471	2fu24
+1248059	Uhps6mMh3IfHB3j7yH62K
+1286921	ODLrXI8882q8LS8