You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2010/10/06 21:15:12 UTC

svn commit: r1005209 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/ ql/src/test/queries/clientpositive/ ql/src/test/res...

Author: heyongqiang
Date: Wed Oct  6 19:15:12 2010
New Revision: 1005209

URL: http://svn.apache.org/viewvc?rev=1005209&view=rev
Log:
HIVE-1674 count(*) returns wrong result when a mapper returns empty results.(Ning Zhang via He Yongqiang)

Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Oct  6 19:15:12 2010
@@ -324,6 +324,9 @@ Trunk -  Unreleased
     HIVE-1691 Validate partition spec in analyze
     (Ning Zhang via namit)
 
+    HIVE-1674 count(*) returns wrong result when a mapper returns empty results
+    (Ning Zhang via He Yongqiang)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Wed Oct  6 19:15:12 2010
@@ -43,9 +43,9 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.Text;
@@ -874,8 +874,16 @@ public class GroupByOperator extends Ope
           // This is based on the assumption that a null row is ignored by
           // aggregation functions
           for (int ai = 0; ai < aggregations.length; ai++) {
+
+            // o is set to NULL in order to distinguish no rows at all
+            Object[] o;
+            if (aggregationParameterFields[ai].length > 0) {
+              o = new Object[aggregationParameterFields[ai].length];
+            } else {
+              o = null;
+            }
+
             // Calculate the parameters
-            Object[] o = new Object[aggregationParameterFields[ai].length];
             for (int pi = 0; pi < aggregationParameterFields[ai].length; pi++) {
               o[pi] = null;
             }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java Wed Oct  6 19:15:12 2010
@@ -146,14 +146,14 @@ public class OpProcFactory {
       for(FieldSchema col : cols) {
         fieldSchemaMap.put(col.getName(), col);
       }
-      
+
       Iterator<VirtualColumn> vcs = VirtualColumn.registry.values().iterator();
       while (vcs.hasNext()) {
         VirtualColumn vc = vcs.next();
         fieldSchemaMap.put(vc.getName(), new FieldSchema(vc.getName(),
             vc.getTypeInfo().getTypeName(), ""));
       }
-      
+
       TableAliasInfo tai = new TableAliasInfo();
       tai.setAlias(top.getConf().getAlias());
       tai.setTable(tab);
@@ -162,7 +162,7 @@ public class OpProcFactory {
         Dependency dep = new Dependency();
         BaseColumnInfo bci = new BaseColumnInfo();
         bci.setTabAlias(tai);
-        bci.setColumn(fieldSchemaMap.get(ci.getInternalName()));          
+        bci.setColumn(fieldSchemaMap.get(ci.getInternalName()));
 
         // Populate the dependency
         dep.setType(LineageInfo.DependencyType.SIMPLE);
@@ -347,15 +347,17 @@ public class OpProcFactory {
         // the dependency list of the input operator.
         if (bci_set.isEmpty()) {
           Set<TableAliasInfo> tai_set = new LinkedHashSet<TableAliasInfo>();
-          for(ColumnInfo ci : inpOp.getSchema().getSignature()) {
-            Dependency inp_dep = lctx.getIndex().getDependency(inpOp, ci);
-            // The dependency can be null as some of the input cis may not have
-            // been set in case of joins.
-            if (inp_dep != null) {
-              for(BaseColumnInfo bci : inp_dep.getBaseCols()) {
-                new_type = LineageCtx.getNewDependencyType(inp_dep.getType(), new_type);
-                tai_set.add(bci.getTabAlias());
-              }
+          if (inpOp.getSchema() != null && inpOp.getSchema().getSignature() != null ) {
+            for(ColumnInfo ci : inpOp.getSchema().getSignature()) {
+              Dependency inp_dep = lctx.getIndex().getDependency(inpOp, ci);
+            	// The dependency can be null as some of the input cis may not have
+            	// been set in case of joins.
+            	if (inp_dep != null) {
+            	  for(BaseColumnInfo bci : inp_dep.getBaseCols()) {
+            	    new_type = LineageCtx.getNewDependencyType(inp_dep.getType(), new_type);
+            	    tai_set.add(bci.getTabAlias());
+            	  }
+            	}
             }
           }
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java Wed Oct  6 19:15:12 2010
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.udf.generic;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -41,6 +43,8 @@ import org.apache.hadoop.io.LongWritable
           +        "which the supplied expression(s) are unique and non-NULL.")
 public class GenericUDAFCount implements GenericUDAFResolver2 {
 
+  private static final Log LOG = LogFactory.getLog(GenericUDAFCount.class.getName());
+
   @Override
   public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
       throws SemanticException {
@@ -114,6 +118,10 @@ public class GenericUDAFCount implements
     @Override
     public void iterate(AggregationBuffer agg, Object[] parameters)
       throws HiveException {
+      // parameters == null means the input table/split is empty
+      if (parameters == null) {
+        return;
+      }
       if (countAllColumns) {
         assert parameters.length == 0;
         ((CountAgg) agg).value++;

Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_count.q Wed Oct  6 19:15:12 2010
@@ -15,3 +15,6 @@ SELECT count(*) FROM src;
 
 EXPLAIN SELECT count(1) FROM src;
 SELECT count(1) FROM src;
+
+select count(1) from src where false;
+select count(*) from src where false;

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out?rev=1005209&r1=1005208&r2=1005209&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_count.q.out Wed Oct  6 19:15:12 2010
@@ -74,11 +74,11 @@ STAGE PLANS:
 PREHOOK: query: SELECT count(key) FROM src
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-26_584_2445455362948160825/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-41_208_4400041131469894408/-mr-10000
 POSTHOOK: query: SELECT count(key) FROM src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-26_584_2445455362948160825/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-41_208_4400041131469894408/-mr-10000
 500
 PREHOOK: query: EXPLAIN SELECT count(DISTINCT key) FROM src
 PREHOOK: type: QUERY
@@ -148,11 +148,11 @@ STAGE PLANS:
 PREHOOK: query: SELECT count(DISTINCT key) FROM src
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-31_832_6294236640527541514/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-45_054_1308573560669467912/-mr-10000
 POSTHOOK: query: SELECT count(DISTINCT key) FROM src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-31_832_6294236640527541514/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-45_054_1308573560669467912/-mr-10000
 309
 PREHOOK: query: EXPLAIN SELECT count(DISTINCT key, value) FROM src
 PREHOOK: type: QUERY
@@ -228,11 +228,11 @@ STAGE PLANS:
 PREHOOK: query: SELECT count(DISTINCT key, value) FROM src
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-35_826_4104856806164432180/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-48_609_6289034930587379052/-mr-10000
 POSTHOOK: query: SELECT count(DISTINCT key, value) FROM src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-35_826_4104856806164432180/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-48_609_6289034930587379052/-mr-10000
 309
 PREHOOK: query: EXPLAIN SELECT count(*) FROM src
 PREHOOK: type: QUERY
@@ -292,11 +292,11 @@ STAGE PLANS:
 PREHOOK: query: SELECT count(*) FROM src
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-40_398_2344399307637124134/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-52_155_8998470975585414020/-mr-10000
 POSTHOOK: query: SELECT count(*) FROM src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-40_398_2344399307637124134/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-52_155_8998470975585414020/-mr-10000
 500
 PREHOOK: query: EXPLAIN SELECT count(1) FROM src
 PREHOOK: type: QUERY
@@ -356,9 +356,27 @@ STAGE PLANS:
 PREHOOK: query: SELECT count(1) FROM src
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-45_028_714373071146042914/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-55_652_6830399124243606163/-mr-10000
 POSTHOOK: query: SELECT count(1) FROM src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/var/folders/rF/rFg7A9swER0pyf9VBov+VU+++TM/-Tmp-/arvind/hive_2010-07-06_14-12-45_028_714373071146042914/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-55_652_6830399124243606163/-mr-10000
 500
+PREHOOK: query: select count(1) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-59_254_6453543783867223758/-mr-10000
+POSTHOOK: query: select count(1) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-09-59_254_6453543783867223758/-mr-10000
+0
+PREHOOK: query: select count(*) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-10-02_792_1217690019692171518/-mr-10000
+POSTHOOK: query: select count(*) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-10-01_11-10-02_792_1217690019692171518/-mr-10000
+0