You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/11/08 21:28:42 UTC
svn commit: r1540173 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
test/queries/clientpositive/orc_predicate_pushdown.q
test/results/clientpositive/orc_predicate_pushdown.q.out
Author: gunther
Date: Fri Nov 8 20:28:42 2013
New Revision: 1540173
URL: http://svn.apache.org/r1540173
Log:
HIVE-5601: NPE in ORC's PPD when using select * from table with where predicate (Prasanth J via Owen O'Malley and Gunther Hagleitner)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1540173&r1=1540172&r2=1540173&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Fri Nov 8 20:28:42 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -362,6 +363,13 @@ final class ReaderImpl implements Reader
public RecordReader rows(long offset, long length, boolean[] include,
SearchArgument sarg, String[] columnNames
) throws IOException {
+
+ // if included columns is null, then include all columns
+ if (include == null) {
+ include = new boolean[footer.getTypesCount()];
+ Arrays.fill(include, true);
+ }
+
return new RecordReaderImpl(this.getStripes(), fileSystem, path, offset,
length, footer.getTypesList(), codec, bufferSize,
include, footer.getRowIndexStride(), sarg, columnNames);
Modified: hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q?rev=1540173&r1=1540172&r2=1540173&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q Fri Nov 8 20:28:42 2013
@@ -49,6 +49,16 @@ SET hive.optimize.index.filter=false;
-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
-- in table scan operator.
+SELECT * FROM orc_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM orc_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=false;
+
+SELECT * FROM orc_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM orc_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=false;
+
SELECT SUM(HASH(t)) FROM orc_pred
WHERE t IS NOT NULL
AND t < 0
Modified: hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out?rev=1540173&r1=1540172&r2=1540173&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out Fri Nov 8 20:28:42 2013
@@ -277,10 +277,7 @@ PREHOOK: query: -- all the following que
-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
-- in table scan operator.
-SELECT SUM(HASH(t)) FROM orc_pred
- WHERE t IS NOT NULL
- AND t < 0
- AND t > -2
+SELECT * FROM orc_pred WHERE t<2 limit 1
PREHOOK: type: QUERY
PREHOOK: Input: default@orc_pred
#### A masked pattern was here ####
@@ -288,7 +285,90 @@ POSTHOOK: query: -- all the following qu
-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
-- in table scan operator.
-SELECT SUM(HASH(t)) FROM orc_pred
+SELECT * FROM orc_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 32.85 study skills
+PREHOOK: query: SELECT * FROM orc_pred WHERE t<2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 32.85 study skills
+PREHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty
+PREHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty
+PREHOOK: query: SELECT SUM(HASH(t)) FROM orc_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM orc_pred
WHERE t IS NOT NULL
AND t < 0
AND t > -2