You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/08/31 04:36:45 UTC
svn commit: r1519132 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
test/queries/clientpositive/avro_partitioned.q
test/results/clientpositive/avro_partitioned.q.out
Author: hashutosh
Date: Sat Aug 31 02:36:45 2013
New Revision: 1519132
URL: http://svn.apache.org/r1519132
Log:
HIVE-4789 : FetchOperator fails on partitioned Avro data (Sean Busbey via Ashutosh Chauhan)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q
hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1519132&r1=1519131&r2=1519132&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Sat Aug 31 02:36:45 2013
@@ -390,7 +390,7 @@ public class FetchOperator implements Se
splitNum = 0;
serde = partDesc.getDeserializerClass().newInstance();
- serde.initialize(job, partDesc.getProperties());
+ serde.initialize(job, partDesc.getOverlayedProperties());
if (currTbl != null) {
tblSerde = serde;
@@ -410,7 +410,7 @@ public class FetchOperator implements Se
if (LOG.isDebugEnabled()) {
LOG.debug("Creating fetchTask with deserializer typeinfo: "
+ serde.getObjectInspector().getTypeName());
- LOG.debug("deserializer properties: " + partDesc.getProperties());
+ LOG.debug("deserializer properties: " + partDesc.getOverlayedProperties());
}
if (currPart != null) {
@@ -623,7 +623,7 @@ public class FetchOperator implements Se
for (PartitionDesc listPart : listParts) {
partition = listPart;
Deserializer partSerde = listPart.getDeserializerClass().newInstance();
- partSerde.initialize(job, listPart.getProperties());
+ partSerde.initialize(job, listPart.getOverlayedProperties());
partitionedTableOI = ObjectInspectorConverters.getConvertedOI(
partSerde.getObjectInspector(), tableOI);
Modified: hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q?rev=1519132&r1=1519131&r2=1519132&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q Sat Aug 31 02:36:45 2013
@@ -64,3 +64,10 @@ SET hive.exec.dynamic.partition.mode=non
INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes;
SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 ORDER BY air_date;
+
+-- Verify that Fetch works in addition to Map
+SELECT * FROM episodes_partitioned LIMIT 5;
+-- Fetch w/filter to specific partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 6;
+-- Fetch w/non-existant partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5;
Modified: hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out?rev=1519132&r1=1519131&r2=1519132&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out Sat Aug 31 02:36:45 2013
@@ -197,3 +197,118 @@ POSTHOOK: Lineage: episodes_partitioned
The Doctor's Wife 14 May 2011 11 11
Rose 26 March 2005 9 9
The Eleventh Hour 3 April 2010 11 11
+PREHOOK: query: -- Verify that Fetch works in addition to Map
+SELECT * FROM episodes_partitioned LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@episodes_partitioned
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=1
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=11
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=2
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=4
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=5
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=6
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=9
+#### A masked pattern was here ####
+POSTHOOK: query: -- Verify that Fetch works in addition to Map
+SELECT * FROM episodes_partitioned LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@episodes_partitioned
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9
+#### A masked pattern was here ####
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+An Unearthly Child 23 November 1963 1 1
+The Eleventh Hour 3 April 2010 11 11
+The Doctor's Wife 14 May 2011 11 11
+The Power of the Daleks 5 November 1966 2 2
+Horror of Fang Rock 3 September 1977 4 4
+PREHOOK: query: -- Fetch w/filter to specific partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 6
+PREHOOK: type: QUERY
+PREHOOK: Input: default@episodes_partitioned
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=6
+#### A masked pattern was here ####
+POSTHOOK: query: -- Fetch w/filter to specific partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 6
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@episodes_partitioned
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6
+#### A masked pattern was here ####
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+The Mysterious Planet 6 September 1986 6 6
+PREHOOK: query: -- Fetch w/non-existant partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@episodes_partitioned
+#### A masked pattern was here ####
+POSTHOOK: query: -- Fetch w/non-existant partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@episodes_partitioned
+#### A masked pattern was here ####
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]