You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/11/08 21:28:42 UTC

svn commit: r1540173 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java test/queries/clientpositive/orc_predicate_pushdown.q test/results/clientpositive/orc_predicate_pushdown.q.out

Author: gunther
Date: Fri Nov  8 20:28:42 2013
New Revision: 1540173

URL: http://svn.apache.org/r1540173
Log:
HIVE-5601: NPE in ORC's PPD when using select * from table with where predicate (Prasanth J via Owen O'Malley and Gunther Hagleitner)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
    hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1540173&r1=1540172&r2=1540173&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Fri Nov  8 20:28:42 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -362,6 +363,13 @@ final class ReaderImpl implements Reader
   public RecordReader rows(long offset, long length, boolean[] include,
                            SearchArgument sarg, String[] columnNames
                            ) throws IOException {
+
+    // if included columns is null, then include all columns
+    if (include == null) {
+      include = new boolean[footer.getTypesCount()];
+      Arrays.fill(include, true);
+    }
+
     return new RecordReaderImpl(this.getStripes(), fileSystem,  path, offset,
         length, footer.getTypesList(), codec, bufferSize,
         include, footer.getRowIndexStride(), sarg, columnNames);

Modified: hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q?rev=1540173&r1=1540172&r2=1540173&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q Fri Nov  8 20:28:42 2013
@@ -49,6 +49,16 @@ SET hive.optimize.index.filter=false;
 -- hive.optimize.index.filter is set to true. the explain plan should show filter expression
 -- in table scan operator.
 
+SELECT * FROM orc_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM orc_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=false;
+
+SELECT * FROM orc_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM orc_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=false;
+
 SELECT SUM(HASH(t)) FROM orc_pred
   WHERE t IS NOT NULL
   AND t < 0

Modified: hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out?rev=1540173&r1=1540172&r2=1540173&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out Fri Nov  8 20:28:42 2013
@@ -277,10 +277,7 @@ PREHOOK: query: -- all the following que
 -- hive.optimize.index.filter is set to true. the explain plan should show filter expression
 -- in table scan operator.
 
-SELECT SUM(HASH(t)) FROM orc_pred
-  WHERE t IS NOT NULL
-  AND t < 0
-  AND t > -2
+SELECT * FROM orc_pred WHERE t<2 limit 1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_pred
 #### A masked pattern was here ####
@@ -288,7 +285,90 @@ POSTHOOK: query: -- all the following qu
 -- hive.optimize.index.filter is set to true. the explain plan should show filter expression
 -- in table scan operator.
 
-SELECT SUM(HASH(t)) FROM orc_pred
+SELECT * FROM orc_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+-3	467	65575	4294967437	81.64	23.53	true	tom hernandez	2013-03-01 09:11:58.703188	32.85	study skills
+PREHOOK: query: SELECT * FROM orc_pred WHERE t<2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+-3	467	65575	4294967437	81.64	23.53	true	tom hernandez	2013-03-01 09:11:58.703188	32.85	study skills
+PREHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+124	336	65664	4294967435	74.72	42.47	true	bob davidson	2013-03-01 09:11:58.703302	45.4	yard duty
+PREHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+124	336	65664	4294967435	74.72	42.47	true	bob davidson	2013-03-01 09:11:58.703302	45.4	yard duty
+PREHOOK: query: SELECT SUM(HASH(t)) FROM orc_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM orc_pred
   WHERE t IS NOT NULL
   AND t < 0
   AND t > -2