You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jv...@apache.org on 2010/07/31 04:33:24 UTC
svn commit: r980998 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/index/compact/
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: jvs
Date: Sat Jul 31 02:33:24 2010
New Revision: 980998
URL: http://svn.apache.org/viewvc?rev=980998&view=rev
Log:
HIVE-1494. Index followup: remove sort by clause and fix a bug in
collect_set udaf
(He Yongqiang via jvs)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=980998&r1=980997&r2=980998&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Sat Jul 31 02:33:24 2010
@@ -104,6 +104,10 @@ Trunk - Unreleased
appears more than once on CLASSPATH
(Carl Steinbach via jvs)
+ HIVE-1494. Index followup: remove sort by clause and fix a bug in
+ collect_set udaf
+ (He Yongqiang via jvs)
+
TESTS
HIVE-1464. improve test query performance
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=980998&r1=980997&r2=980998&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Sat Jul 31 02:33:24 2010
@@ -163,8 +163,6 @@ public class CompactIndexHandler extends
}
command.append(" GROUP BY ");
command.append(indexCols + ", " + VirtualColumn.FILENAME.getName());
- command.append(" SORT BY ");
- command.append(indexCols);
Driver driver = new Driver(db.getConf());
driver.compile(command.toString());
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java?rev=980998&r1=980997&r2=980998&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java Sat Jul 31 02:33:24 2010
@@ -80,11 +80,22 @@ public class GenericUDAFCollectSet exten
// The output of a partial aggregation is a list
if (m == Mode.PARTIAL1) {
inputOI = (PrimitiveObjectInspector) parameters[0];
- return ObjectInspectorFactory.getStandardListObjectInspector(inputOI);
+ return ObjectInspectorFactory
+ .getStandardListObjectInspector((PrimitiveObjectInspector) ObjectInspectorUtils
+ .getStandardObjectInspector(inputOI));
} else {
- internalMergeOI = (StandardListObjectInspector) parameters[0];
- loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI);
- return loi;
+ if (!(parameters[0] instanceof StandardListObjectInspector)) {
+ //no map aggregation.
+ inputOI = (PrimitiveObjectInspector) ObjectInspectorUtils
+ .getStandardObjectInspector(parameters[0]);
+ return (StandardListObjectInspector) ObjectInspectorFactory
+ .getStandardListObjectInspector(inputOI);
+ } else {
+ internalMergeOI = (StandardListObjectInspector) parameters[0];
+ inputOI = (PrimitiveObjectInspector) internalMergeOI.getListElementObjectInspector();
+ loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI);
+ return loi;
+ }
}
}
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q?rev=980998&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q Sat Jul 31 02:33:24 2010
@@ -0,0 +1,30 @@
+DESCRIBE FUNCTION collect_set;
+DESCRIBE FUNCTION EXTENDED collect_set;
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = false;
+
+SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = false;
+
+SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = true;
+
+SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = true;
+
+SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out?rev=980998&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out Sat Jul 31 02:33:24 2010
@@ -0,0 +1,138 @@
+PREHOOK: query: DESCRIBE FUNCTION collect_set
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION collect_set
+POSTHOOK: type: DESCFUNCTION
+collect_set(x) - Returns a set of objects with duplicate elements eliminated
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set
+POSTHOOK: type: DESCFUNCTION
+collect_set(x) - Returns a set of objects with duplicate elements eliminated
+PREHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-05-53_522_3530309455217069909/-mr-10000
+POSTHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-05-53_522_3530309455217069909/-mr-10000
+0 ["val_0"]
+10 ["val_10"]
+100 ["val_100"]
+103 ["val_103"]
+104 ["val_104"]
+105 ["val_105"]
+11 ["val_11"]
+111 ["val_111"]
+113 ["val_113"]
+114 ["val_114"]
+116 ["val_116"]
+118 ["val_118"]
+119 ["val_119"]
+12 ["val_12"]
+120 ["val_120"]
+125 ["val_125"]
+126 ["val_126"]
+128 ["val_128"]
+129 ["val_129"]
+131 ["val_131"]
+PREHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-01_685_2541220772134185861/-mr-10000
+POSTHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-01_685_2541220772134185861/-mr-10000
+0 ["val_0"]
+10 ["val_10"]
+100 ["val_100"]
+103 ["val_103"]
+104 ["val_104"]
+105 ["val_105"]
+11 ["val_11"]
+111 ["val_111"]
+113 ["val_113"]
+114 ["val_114"]
+116 ["val_116"]
+118 ["val_118"]
+119 ["val_119"]
+12 ["val_12"]
+120 ["val_120"]
+125 ["val_125"]
+126 ["val_126"]
+128 ["val_128"]
+129 ["val_129"]
+131 ["val_131"]
+PREHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-09_632_7957341289614848394/-mr-10000
+POSTHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-09_632_7957341289614848394/-mr-10000
+0 ["val_0"]
+10 ["val_10"]
+100 ["val_100"]
+103 ["val_103"]
+104 ["val_104"]
+105 ["val_105"]
+11 ["val_11"]
+111 ["val_111"]
+113 ["val_113"]
+114 ["val_114"]
+116 ["val_116"]
+118 ["val_118"]
+119 ["val_119"]
+12 ["val_12"]
+120 ["val_120"]
+125 ["val_125"]
+126 ["val_126"]
+128 ["val_128"]
+129 ["val_129"]
+131 ["val_131"]
+PREHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-23_542_3742508270905277781/-mr-10000
+POSTHOOK: query: SELECT key, collect_set(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-23_542_3742508270905277781/-mr-10000
+0 ["val_0"]
+10 ["val_10"]
+100 ["val_100"]
+103 ["val_103"]
+104 ["val_104"]
+105 ["val_105"]
+11 ["val_11"]
+111 ["val_111"]
+113 ["val_113"]
+114 ["val_114"]
+116 ["val_116"]
+118 ["val_118"]
+119 ["val_119"]
+12 ["val_12"]
+120 ["val_120"]
+125 ["val_125"]
+126 ["val_126"]
+128 ["val_128"]
+129 ["val_129"]
+131 ["val_131"]