You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/10/02 09:15:37 UTC

svn commit: r1392761 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/ test/results/clientpositive/

Author: namit
Date: Tue Oct  2 07:15:37 2012
New Revision: 1392761

URL: http://svn.apache.org/viewvc?rev=1392761&view=rev
Log:
HIVE-3495 For UDAFs, when generating a plan without map-side-aggregation, constant 
agg parameters will be replaced by ExprNodeColumnDesc (Yin Huai via namit)



Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q
    hive/trunk/ql/src/test/results/clientpositive/count.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out
    hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Oct  2 07:15:37 2012
@@ -165,6 +165,7 @@ import org.apache.hadoop.hive.serde2.Des
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -2480,6 +2481,50 @@ public class SemanticAnalyzer extends Ba
   }
 
   /**
+   * Check if the given internalName represents a constant parameter in aggregation parameters
+   * of an aggregation tree.
+   * This method is only invoked when map-side aggregation is not involved. In this case,
+   * every parameter in every aggregation tree should already have a corresponding ColumnInfo,
+   * which is generated when the corresponding ReduceSinkOperator of the GroupByOperator being
+   * generating is generated. If we find that this parameter is a constant parameter,
+   * we will return the corresponding ExprNodeDesc in reduceValues, and we will not need to
+   * use a new ExprNodeColumnDesc, which can not be treated as a constant parameter, for this
+   * parameter (since the writableObjectInspector of a ExprNodeColumnDesc will not be
+   * a instance of ConstantObjectInspector).
+   *
+   * @param reduceValues
+   *          value columns of the corresponding ReduceSinkOperator
+   * @param internalName
+   *          the internal name of this parameter
+   * @return the ExprNodeDesc of the constant parameter if the given internalName represents
+   *         a constant parameter; otherwise, return null
+   */
+  private ExprNodeDesc isConstantParameterInAggregationParameters(String internalName,
+      List<ExprNodeDesc> reduceValues) {
+    // only the pattern of "VALUE._col([0-9]+)" should be handled.
+
+    String[] terms = internalName.split("\\.");
+    if (terms.length != 2 || reduceValues == null) {
+      return null;
+    }
+
+    if (Utilities.ReduceField.VALUE.toString().equals(terms[0])) {
+      int pos = getPositionFromInternalName(terms[1]);
+      if (pos >= 0 && pos < reduceValues.size()) {
+        ExprNodeDesc reduceValue = reduceValues.get(pos);
+        if (reduceValue != null) {
+          if (reduceValue.getWritableObjectInspector() instanceof ConstantObjectInspector) {
+            // this internalName represents a constant parameter in aggregation parameters
+            return reduceValue;
+          }
+        }
+      }
+    }
+
+    return null;
+  }
+
+  /**
    * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
    * The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
    *
@@ -2528,12 +2573,14 @@ public class SemanticAnalyzer extends Ba
     // get the last colName for the reduce KEY
     // it represents the column name corresponding to distinct aggr, if any
     String lastKeyColName = null;
+    List<ExprNodeDesc> reduceValues = null;
     if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
       List<String> inputKeyCols = ((ReduceSinkDesc)
           reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
       if (inputKeyCols.size() > 0) {
         lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1);
       }
+      reduceValues = ((ReduceSinkDesc)reduceSinkOperatorInfo.getConf()).getValueCols();
     }
     int numDistinctUDFs = 0;
     for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
@@ -2565,9 +2612,19 @@ public class SemanticAnalyzer extends Ba
           getColumnInternalName(i-1);
 
         }
-        aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
+
+        ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
             paraExpression, paraExprInfo.getTabAlias(),
-            paraExprInfo.getIsVirtualCol()));
+            paraExprInfo.getIsVirtualCol());
+        ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
+            paraExprInfo.getInternalName(), reduceValues);
+
+        if (reduceValue != null) {
+          // this parameter is a constant
+          expr = reduceValue;
+        }
+
+        aggParameters.add(expr);
       }
 
       if (isDistinct) {
@@ -2653,12 +2710,14 @@ public class SemanticAnalyzer extends Ba
     // get the last colName for the reduce KEY
     // it represents the column name corresponding to distinct aggr, if any
     String lastKeyColName = null;
+    List<ExprNodeDesc> reduceValues = null;
     if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
       List<String> inputKeyCols = ((ReduceSinkDesc)
           reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
       if (inputKeyCols.size() > 0) {
         lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1);
       }
+      reduceValues = ((ReduceSinkDesc)reduceSinkOperatorInfo.getConf()).getValueCols();
     }
     int numDistinctUDFs = 0;
     for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
@@ -2699,9 +2758,20 @@ public class SemanticAnalyzer extends Ba
             + getColumnInternalName(i-1);
 
           }
-          aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
+
+          ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
               paraExpression, paraExprInfo.getTabAlias(),
-              paraExprInfo.getIsVirtualCol()));
+              paraExprInfo.getIsVirtualCol());
+          ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
+              paraExprInfo.getInternalName(), reduceValues);
+
+          if (reduceValue != null) {
+            // this parameter is a constant
+            expr = reduceValue;
+          }
+
+          aggParameters.add(expr);
+
         }
       } else {
         ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);

Modified: hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q Tue Oct  2 07:15:37 2012
@@ -1,6 +1,26 @@
 
 set mapred.reduce.tasks=4;
 set hive.exec.reducers.max=4;
+set hive.map.aggr=false;
+-- disable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src;
+
+SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src;
+
+SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src;
+
+SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src;
+
+set hive.map.aggr=true;
+-- enable map-side aggregation
 SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src;
 SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src;
 SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src;

Modified: hive/trunk/ql/src/test/results/clientpositive/count.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/count.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/count.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/count.q.out Tue Oct  2 07:15:37 2012
@@ -486,7 +486,7 @@ STAGE PLANS:
       Reduce Operator Tree:
         Group By Operator
           aggregations:
-                expr: count(VALUE._col0)
+                expr: count(1)
                 expr: count()
                 expr: count(KEY._col0:14._col0)
                 expr: count(KEY._col0:14._col1)

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out Tue Oct  2 07:15:37 2012
@@ -176,7 +176,7 @@ STAGE PLANS:
       Reduce Operator Tree:
         Group By Operator
           aggregations:
-                expr: count(VALUE._col0)
+                expr: count(1)
           bucketGroup: false
           mode: partial1
           outputColumnNames: _col0
@@ -264,7 +264,7 @@ STAGE PLANS:
       Reduce Operator Tree:
         Group By Operator
           aggregations:
-                expr: count(VALUE._col0)
+                expr: count(1)
           bucketGroup: false
           mode: complete
           outputColumnNames: _col0

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out Tue Oct  2 07:15:37 2012
@@ -251,7 +251,7 @@ STAGE PLANS:
       Reduce Operator Tree:
         Group By Operator
           aggregations:
-                expr: count(VALUE._col0)
+                expr: count(1)
           bucketGroup: false
           keys:
                 expr: KEY._col0
@@ -362,7 +362,7 @@ STAGE PLANS:
       Reduce Operator Tree:
         Group By Operator
           aggregations:
-                expr: count(VALUE._col0)
+                expr: count(1)
           bucketGroup: false
           keys:
                 expr: KEY._col0

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out Tue Oct  2 07:15:37 2012
@@ -246,7 +246,7 @@ STAGE PLANS:
       Reduce Operator Tree:
         Group By Operator
           aggregations:
-                expr: count(VALUE._col0)
+                expr: count(1)
                 expr: count(DISTINCT KEY._col0:0._col0)
           bucketGroup: false
           mode: partial1
@@ -347,7 +347,7 @@ STAGE PLANS:
       Reduce Operator Tree:
         Group By Operator
           aggregations:
-                expr: count(VALUE._col0)
+                expr: count(1)
                 expr: count(DISTINCT KEY._col0:0._col0)
           bucketGroup: false
           mode: complete

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out Tue Oct  2 07:15:37 2012
@@ -137,7 +137,7 @@ STAGE PLANS:
       Reduce Operator Tree:
         Group By Operator
           aggregations:
-                expr: count(VALUE._col0)
+                expr: count(1)
                 expr: count(DISTINCT KEY._col0:0._col0)
                 expr: count(DISTINCT KEY._col0:1._col0)
           bucketGroup: false

Modified: hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out Tue Oct  2 07:15:37 2012
@@ -1,8 +1,120 @@
-PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
+PREHOOK: query: -- disable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
+POSTHOOK: query: -- disable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+252.77777777777777
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+252.77777777777777
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[24.07,252.77777777777777,476.9444444444444,487.82]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[24.07,252.77777777777777,476.9444444444444,487.82]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: -- enable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- enable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####