You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2010/10/06 21:15:12 UTC
svn commit: r1005209 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/
ql/src/test/queries/clientpositive/ ql/src/test/res...
Author: heyongqiang
Date: Wed Oct 6 19:15:12 2010
New Revision: 1005209
URL: http://svn.apache.org/viewvc?rev=1005209&view=rev
Log:
HIVE-1674 count(*) returns wrong result when a mapper returns empty results.(Ning Zhang via He Yongqiang)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Oct 6 19:15:12 2010
@@ -324,6 +324,9 @@ Trunk - Unreleased
HIVE-1691 Validate partition spec in analyze
(Ning Zhang via namit)
+ HIVE-1674 count(*) returns wrong result when a mapper returns empty results
+ (Ning Zhang via He Yongqiang)
+
TESTS
HIVE-1464. improve test query performance
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Wed Oct 6 19:15:12 2010
@@ -43,9 +43,9 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.Text;
@@ -874,8 +874,16 @@ public class GroupByOperator extends Ope
// This is based on the assumption that a null row is ignored by
// aggregation functions
for (int ai = 0; ai < aggregations.length; ai++) {
+
+ // o is set to NULL in order to distinguish no rows at all
+ Object[] o;
+ if (aggregationParameterFields[ai].length > 0) {
+ o = new Object[aggregationParameterFields[ai].length];
+ } else {
+ o = null;
+ }
+
// Calculate the parameters
- Object[] o = new Object[aggregationParameterFields[ai].length];
for (int pi = 0; pi < aggregationParameterFields[ai].length; pi++) {
o[pi] = null;
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java Wed Oct 6 19:15:12 2010
@@ -146,14 +146,14 @@ public class OpProcFactory {
for(FieldSchema col : cols) {
fieldSchemaMap.put(col.getName(), col);
}
-
+
Iterator<VirtualColumn> vcs = VirtualColumn.registry.values().iterator();
while (vcs.hasNext()) {
VirtualColumn vc = vcs.next();
fieldSchemaMap.put(vc.getName(), new FieldSchema(vc.getName(),
vc.getTypeInfo().getTypeName(), ""));
}
-
+
TableAliasInfo tai = new TableAliasInfo();
tai.setAlias(top.getConf().getAlias());
tai.setTable(tab);
@@ -162,7 +162,7 @@ public class OpProcFactory {
Dependency dep = new Dependency();
BaseColumnInfo bci = new BaseColumnInfo();
bci.setTabAlias(tai);
- bci.setColumn(fieldSchemaMap.get(ci.getInternalName()));
+ bci.setColumn(fieldSchemaMap.get(ci.getInternalName()));
// Populate the dependency
dep.setType(LineageInfo.DependencyType.SIMPLE);
@@ -347,15 +347,17 @@ public class OpProcFactory {
// the dependency list of the input operator.
if (bci_set.isEmpty()) {
Set<TableAliasInfo> tai_set = new LinkedHashSet<TableAliasInfo>();
- for(ColumnInfo ci : inpOp.getSchema().getSignature()) {
- Dependency inp_dep = lctx.getIndex().getDependency(inpOp, ci);
- // The dependency can be null as some of the input cis may not have
- // been set in case of joins.
- if (inp_dep != null) {
- for(BaseColumnInfo bci : inp_dep.getBaseCols()) {
- new_type = LineageCtx.getNewDependencyType(inp_dep.getType(), new_type);
- tai_set.add(bci.getTabAlias());
- }
+ if (inpOp.getSchema() != null && inpOp.getSchema().getSignature() != null ) {
+ for(ColumnInfo ci : inpOp.getSchema().getSignature()) {
+ Dependency inp_dep = lctx.getIndex().getDependency(inpOp, ci);
+ // The dependency can be null as some of the input cis may not have
+ // been set in case of joins.
+ if (inp_dep != null) {
+ for(BaseColumnInfo bci : inp_dep.getBaseCols()) {
+ new_type = LineageCtx.getNewDependencyType(inp_dep.getType(), new_type);
+ tai_set.add(bci.getTabAlias());
+ }
+ }
}
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java Wed Oct 6 19:15:12 2010
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -41,6 +43,8 @@ import org.apache.hadoop.io.LongWritable
+ "which the supplied expression(s) are unique and non-NULL.")
public class GenericUDAFCount implements GenericUDAFResolver2 {
+ private static final Log LOG = LogFactory.getLog(GenericUDAFCount.class.getName());
+
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
@@ -114,6 +118,10 @@ public class GenericUDAFCount implements
@Override
public void iterate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
+ // parameters == null means the input table/split is empty
+ if (parameters == null) {
+ return;
+ }
if (countAllColumns) {
assert parameters.length == 0;
((CountAgg) agg).value++;
Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q Wed Oct 6 19:15:12 2010
@@ -15,3 +15,6 @@ SELECT count(*) FROM src;
EXPLAIN SELECT count(1) FROM src;
SELECT count(1) FROM src;
+
+select count(1) from src where false;
+select count(*) from src where false;
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out Wed Oct 6 19:15:12 2010
@@ -74,11 +74,11 @@ STAGE PLANS:
PREHOOK: query: SELECT count(key) FROM src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-26_584_2445455362948160825/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-41_208_4400041131469894408/-mr-10000
POSTHOOK: query: SELECT count(key) FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-26_584_2445455362948160825/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-41_208_4400041131469894408/-mr-10000
500
PREHOOK: query: EXPLAIN SELECT count(DISTINCT key) FROM src
PREHOOK: type: QUERY
@@ -148,11 +148,11 @@ STAGE PLANS:
PREHOOK: query: SELECT count(DISTINCT key) FROM src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-31_832_6294236640527541514/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-45_054_1308573560669467912/-mr-10000
POSTHOOK: query: SELECT count(DISTINCT key) FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-31_832_6294236640527541514/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-45_054_1308573560669467912/-mr-10000
309
PREHOOK: query: EXPLAIN SELECT count(DISTINCT key, value) FROM src
PREHOOK: type: QUERY
@@ -228,11 +228,11 @@ STAGE PLANS:
PREHOOK: query: SELECT count(DISTINCT key, value) FROM src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-35_826_4104856806164432180/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-48_609_6289034930587379052/-mr-10000
POSTHOOK: query: SELECT count(DISTINCT key, value) FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-35_826_4104856806164432180/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-48_609_6289034930587379052/-mr-10000
309
PREHOOK: query: EXPLAIN SELECT count(*) FROM src
PREHOOK: type: QUERY
@@ -292,11 +292,11 @@ STAGE PLANS:
PREHOOK: query: SELECT count(*) FROM src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-40_398_2344399307637124134/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-52_155_8998470975585414020/-mr-10000
POSTHOOK: query: SELECT count(*) FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-40_398_2344399307637124134/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-52_155_8998470975585414020/-mr-10000
500
PREHOOK: query: EXPLAIN SELECT count(1) FROM src
PREHOOK: type: QUERY
@@ -356,9 +356,27 @@ STAGE PLANS:
PREHOOK: query: SELECT count(1) FROM src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-45_028_714373071146042914/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-55_652_6830399124243606163/-mr-10000
POSTHOOK: query: SELECT count(1) FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-45_028_714373071146042914/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-55_652_6830399124243606163/-mr-10000
500
+PREHOOK: query: select count(1) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-59_254_6453543783867223758/-mr-10000
+POSTHOOK: query: select count(1) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-59_254_6453543783867223758/-mr-10000
+0
+PREHOOK: query: select count(*) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-10-02_792_1217690019692171518/-mr-10000
+POSTHOOK: query: select count(*) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-10-02_792_1217690019692171518/-mr-10000
+0