You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/08/31 04:36:45 UTC

svn commit: r1519132 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/FetchOperator.java test/queries/clientpositive/avro_partitioned.q test/results/clientpositive/avro_partitioned.q.out

Author: hashutosh
Date: Sat Aug 31 02:36:45 2013
New Revision: 1519132

URL: http://svn.apache.org/r1519132
Log:
HIVE-4789 : FetchOperator fails on partitioned Avro data (Sean Busbey via Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
    hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q
    hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1519132&r1=1519131&r2=1519132&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Sat Aug 31 02:36:45 2013
@@ -390,7 +390,7 @@ public class FetchOperator implements Se
 
       splitNum = 0;
       serde = partDesc.getDeserializerClass().newInstance();
-      serde.initialize(job, partDesc.getProperties());
+      serde.initialize(job, partDesc.getOverlayedProperties());
 
       if (currTbl != null) {
         tblSerde = serde;
@@ -410,7 +410,7 @@ public class FetchOperator implements Se
       if (LOG.isDebugEnabled()) {
         LOG.debug("Creating fetchTask with deserializer typeinfo: "
             + serde.getObjectInspector().getTypeName());
-        LOG.debug("deserializer properties: " + partDesc.getProperties());
+        LOG.debug("deserializer properties: " + partDesc.getOverlayedProperties());
       }
 
       if (currPart != null) {
@@ -623,7 +623,7 @@ public class FetchOperator implements Se
       for (PartitionDesc listPart : listParts) {
         partition = listPart;
         Deserializer partSerde = listPart.getDeserializerClass().newInstance();
-        partSerde.initialize(job, listPart.getProperties());
+        partSerde.initialize(job, listPart.getOverlayedProperties());
 
         partitionedTableOI = ObjectInspectorConverters.getConvertedOI(
             partSerde.getObjectInspector(), tableOI);

Modified: hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q?rev=1519132&r1=1519131&r2=1519132&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/avro_partitioned.q Sat Aug 31 02:36:45 2013
@@ -64,3 +64,10 @@ SET hive.exec.dynamic.partition.mode=non
 INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes;
 
 SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 ORDER BY air_date;
+
+-- Verify that Fetch works in addition to Map
+SELECT * FROM episodes_partitioned LIMIT 5;
+-- Fetch w/filter to specific partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 6;
+-- Fetch w/non-existant partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5;

Modified: hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out?rev=1519132&r1=1519131&r2=1519132&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/avro_partitioned.q.out Sat Aug 31 02:36:45 2013
@@ -197,3 +197,118 @@ POSTHOOK: Lineage: episodes_partitioned 
 The Doctor's Wife	14 May 2011	11	11
 Rose	26 March 2005	9	9
 The Eleventh Hour	3 April 2010	11	11
+PREHOOK: query: -- Verify that Fetch works in addition to Map
+SELECT * FROM episodes_partitioned LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@episodes_partitioned
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=1
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=11
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=2
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=4
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=5
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=6
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=9
+#### A masked pattern was here ####
+POSTHOOK: query: -- Verify that Fetch works in addition to Map
+SELECT * FROM episodes_partitioned LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@episodes_partitioned
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9
+#### A masked pattern was here ####
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+An Unearthly Child	23 November 1963	1	1
+The Eleventh Hour	3 April 2010	11	11
+The Doctor's Wife	14 May 2011	11	11
+The Power of the Daleks	5 November 1966	2	2
+Horror of Fang Rock	3 September 1977	4	4
+PREHOOK: query: -- Fetch w/filter to specific partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 6
+PREHOOK: type: QUERY
+PREHOOK: Input: default@episodes_partitioned
+PREHOOK: Input: default@episodes_partitioned@doctor_pt=6
+#### A masked pattern was here ####
+POSTHOOK: query: -- Fetch w/filter to specific partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 6
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@episodes_partitioned
+POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6
+#### A masked pattern was here ####
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+The Mysterious Planet	6 September 1986	6	6
+PREHOOK: query: -- Fetch w/non-existant partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@episodes_partitioned
+#### A masked pattern was here ####
+POSTHOOK: query: -- Fetch w/non-existant partition
+SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@episodes_partitioned
+#### A masked pattern was here ####
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ]