You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jv...@apache.org on 2010/07/31 04:33:24 UTC

svn commit: r980998 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/index/compact/ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: jvs
Date: Sat Jul 31 02:33:24 2010
New Revision: 980998

URL: http://svn.apache.org/viewvc?rev=980998&view=rev
Log:
HIVE-1494. Index followup:  remove sort by clause and fix a bug in
collect_set udaf
(He Yongqiang via jvs)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=980998&r1=980997&r2=980998&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Sat Jul 31 02:33:24 2010
@@ -104,6 +104,10 @@ Trunk -  Unreleased
     appears more than once on CLASSPATH
     (Carl Steinbach via jvs)
 
+    HIVE-1494. Index followup:  remove sort by clause and fix a bug in
+    collect_set udaf
+    (He Yongqiang via jvs)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=980998&r1=980997&r2=980998&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Sat Jul 31 02:33:24 2010
@@ -163,8 +163,6 @@ public class CompactIndexHandler extends
     }
     command.append(" GROUP BY ");
     command.append(indexCols + ", " + VirtualColumn.FILENAME.getName());
-    command.append(" SORT BY ");
-    command.append(indexCols);
 
     Driver driver = new Driver(db.getConf());
     driver.compile(command.toString());

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java?rev=980998&r1=980997&r2=980998&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java Sat Jul 31 02:33:24 2010
@@ -80,11 +80,22 @@ public class GenericUDAFCollectSet exten
       // The output of a partial aggregation is a list
       if (m == Mode.PARTIAL1) {
         inputOI = (PrimitiveObjectInspector) parameters[0];
-        return ObjectInspectorFactory.getStandardListObjectInspector(inputOI);
+        return ObjectInspectorFactory
+            .getStandardListObjectInspector((PrimitiveObjectInspector) ObjectInspectorUtils
+                .getStandardObjectInspector(inputOI));
       } else {
-        internalMergeOI = (StandardListObjectInspector) parameters[0];
-        loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI);
-        return loi;
+        if (!(parameters[0] instanceof StandardListObjectInspector)) {
+          //no map aggregation.
+          inputOI = (PrimitiveObjectInspector)  ObjectInspectorUtils
+          .getStandardObjectInspector(parameters[0]);
+          return (StandardListObjectInspector) ObjectInspectorFactory
+              .getStandardListObjectInspector(inputOI);
+        } else {
+          internalMergeOI = (StandardListObjectInspector) parameters[0];
+          inputOI = (PrimitiveObjectInspector) internalMergeOI.getListElementObjectInspector();
+          loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI);          
+          return loi;
+        }
       }
     }
     

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q?rev=980998&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q Sat Jul 31 02:33:24 2010
@@ -0,0 +1,30 @@
+DESCRIBE FUNCTION collect_set;
+DESCRIBE FUNCTION EXTENDED collect_set;
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = false;
+
+SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = false;
+
+SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = true;
+
+SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = true;
+
+SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out?rev=980998&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out Sat Jul 31 02:33:24 2010
@@ -0,0 +1,138 @@
+PREHOOK: query: DESCRIBE FUNCTION collect_set
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION collect_set
+POSTHOOK: type: DESCFUNCTION
+collect_set(x) - Returns a set of objects with duplicate elements eliminated
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set
+POSTHOOK: type: DESCFUNCTION
+collect_set(x) - Returns a set of objects with duplicate elements eliminated
+PREHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-05-53_522_3530309455217069909/-mr-10000
+POSTHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-05-53_522_3530309455217069909/-mr-10000
+0	["val_0"]
+10	["val_10"]
+100	["val_100"]
+103	["val_103"]
+104	["val_104"]
+105	["val_105"]
+11	["val_11"]
+111	["val_111"]
+113	["val_113"]
+114	["val_114"]
+116	["val_116"]
+118	["val_118"]
+119	["val_119"]
+12	["val_12"]
+120	["val_120"]
+125	["val_125"]
+126	["val_126"]
+128	["val_128"]
+129	["val_129"]
+131	["val_131"]
+PREHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-01_685_2541220772134185861/-mr-10000
+POSTHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-01_685_2541220772134185861/-mr-10000
+0	["val_0"]
+10	["val_10"]
+100	["val_100"]
+103	["val_103"]
+104	["val_104"]
+105	["val_105"]
+11	["val_11"]
+111	["val_111"]
+113	["val_113"]
+114	["val_114"]
+116	["val_116"]
+118	["val_118"]
+119	["val_119"]
+12	["val_12"]
+120	["val_120"]
+125	["val_125"]
+126	["val_126"]
+128	["val_128"]
+129	["val_129"]
+131	["val_131"]
+PREHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-09_632_7957341289614848394/-mr-10000
+POSTHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-09_632_7957341289614848394/-mr-10000
+0	["val_0"]
+10	["val_10"]
+100	["val_100"]
+103	["val_103"]
+104	["val_104"]
+105	["val_105"]
+11	["val_11"]
+111	["val_111"]
+113	["val_113"]
+114	["val_114"]
+116	["val_116"]
+118	["val_118"]
+119	["val_119"]
+12	["val_12"]
+120	["val_120"]
+125	["val_125"]
+126	["val_126"]
+128	["val_128"]
+129	["val_129"]
+131	["val_131"]
+PREHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-23_542_3742508270905277781/-mr-10000
+POSTHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-23_542_3742508270905277781/-mr-10000
+0	["val_0"]
+10	["val_10"]
+100	["val_100"]
+103	["val_103"]
+104	["val_104"]
+105	["val_105"]
+11	["val_11"]
+111	["val_111"]
+113	["val_113"]
+114	["val_114"]
+116	["val_116"]
+118	["val_118"]
+119	["val_119"]
+12	["val_12"]
+120	["val_120"]
+125	["val_125"]
+126	["val_126"]
+128	["val_128"]
+129	["val_129"]
+131	["val_131"]