You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/09/29 19:45:49 UTC

svn commit: r1628234 - in /hive/branches/branch-0.14: ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/ ql/src/test/queries/clientpositive/ q...

Author: hashutosh
Date: Mon Sep 29 17:45:49 2014
New Revision: 1628234

URL: http://svn.apache.org/r1628234
Log:
HIVE-8111 : CBO trunk merge: duplicated casts for arithmetic expressions in Hive and CBO (Sergey Shelukhin via Ashutosh Chauhan)

Modified:
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
    hive/branches/branch-0.14/ql/src/test/queries/clientpositive/decimal_udf.q
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/decimal_udf.q.out
    hive/branches/branch-0.14/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/HiveDecimalUtils.java

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1628234&r1=1628233&r2=1628234&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Mon Sep 29 17:45:49 2014
@@ -869,15 +869,7 @@ public final class FunctionRegistry {
             TypeInfoUtils.getCharacterLengthForType(b));
         return TypeInfoFactory.getVarcharTypeInfo(maxLength);
       case DECIMAL:
-          int prec1 = HiveDecimalUtils.getPrecisionForType(a);
-          int prec2 = HiveDecimalUtils.getPrecisionForType(b);
-          int scale1 = HiveDecimalUtils.getScaleForType(a);
-          int scale2 = HiveDecimalUtils.getScaleForType(b);
-          int intPart = Math.max(prec1 - scale1, prec2 - scale2);
-          int decPart = Math.max(scale1, scale2);
-          int prec =  Math.min(intPart + decPart, HiveDecimal.MAX_PRECISION);
-          int scale = Math.min(decPart, HiveDecimal.MAX_PRECISION - intPart);
-          return TypeInfoFactory.getDecimalTypeInfo(prec, scale);
+      return HiveDecimalUtils.getDecimalTypeForPrimitiveCategories(a, b);
       default:
         // Type doesn't require any qualifiers.
         return TypeInfoFactory.getPrimitiveTypeInfo(

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java?rev=1628234&r1=1628233&r2=1628234&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java Mon Sep 29 17:45:49 2014
@@ -146,39 +146,56 @@ public class RexNodeConverter {
   private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException {
     ExprNodeDesc tmpExprNode;
     RexNode tmpRN;
-    TypeInfo tgtDT = null;
 
     List<RexNode> childRexNodeLst = new LinkedList<RexNode>();
     Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType> builder();
 
-    // TODO: 1) Expand to other functions as needed 2) What about types other
-    // than primitive.
-    if (func.getGenericUDF() instanceof GenericUDFBaseNumeric) {
+    // TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
+    TypeInfo tgtDT = null;
+    GenericUDF tgtUdf = func.getGenericUDF();
+    boolean isNumeric = tgtUdf instanceof GenericUDFBaseNumeric,
+        isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
+    if (isNumeric) {
       tgtDT = func.getTypeInfo();
-    } else if (func.getGenericUDF() instanceof GenericUDFBaseCompare) {
-      if (func.getChildren().size() == 2) {
-        tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0)
+
+      assert func.getChildren().size() == 2;
+      // TODO: checking 2 children is useless, compare already does that.
+    } else if (isCompare && (func.getChildren().size() == 2)) {
+      tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0)
             .getTypeInfo(), func.getChildren().get(1).getTypeInfo());
-      }
     }
 
+
     for (ExprNodeDesc childExpr : func.getChildren()) {
       tmpExprNode = childExpr;
       if (tgtDT != null
           && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
-        tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
+        if (isCompare) {
+          // For compare, we will convert requisite children
+          tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
+        } else if (isNumeric) {
+          // For numeric, we'll do minimum necessary cast - if we cast to the type
+          // of expression, bad things will happen.
+          GenericUDFBaseNumeric numericUdf = (GenericUDFBaseNumeric)tgtUdf;
+          PrimitiveTypeInfo minArgType = numericUdf.deriveMinArgumentCast(childExpr, tgtDT);
+          tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType);
+        } else {
+          throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
+        }
+
       }
       argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), m_cluster.getTypeFactory()));
       tmpRN = convert(tmpExprNode);
       childRexNodeLst.add(tmpRN);
     }
 
-    // This is an explicit cast
+    // See if this is an explicit cast.
     RexNode expr = null;
     RelDataType retType = null;
     expr = handleExplicitCast(func, childRexNodeLst);
 
     if (expr == null) {
+      // This is not a cast; process the function.
       retType = TypeConverter.convert(func.getTypeInfo(), m_cluster.getTypeFactory());
       SqlOperator optiqOp = SqlFunctionConverter.getOptiqOperator(func.getFuncText(),
           func.getGenericUDF(), argTypeBldr.build(), retType);

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java?rev=1628234&r1=1628233&r2=1628234&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java Mon Sep 29 17:45:49 2014
@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.No
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -44,6 +45,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -318,4 +320,17 @@ public abstract class GenericUDFBaseNume
   public void setAnsiSqlArithmetic(boolean ansiSqlArithmetic) {
     this.ansiSqlArithmetic = ansiSqlArithmetic;
   }
+
+  public PrimitiveTypeInfo deriveMinArgumentCast(
+      ExprNodeDesc childExpr, TypeInfo targetType) {
+    assert targetType instanceof PrimitiveTypeInfo : "Not a primitive type" + targetType;
+    PrimitiveTypeInfo pti = (PrimitiveTypeInfo)targetType;
+    // We only do the minimum cast for decimals. Other types are assumed safe; fix if needed.
+    // We also don't do anything for non-primitive children (maybe we should assert).
+    if ((pti.getPrimitiveCategory() != PrimitiveCategory.DECIMAL)
+        || (!(childExpr.getTypeInfo() instanceof PrimitiveTypeInfo))) return pti;
+    PrimitiveTypeInfo childTi = (PrimitiveTypeInfo)childExpr.getTypeInfo();
+    // If the child is also decimal, no cast is needed (we hope - can target type be narrower?).
+    return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory(childTi);
+  }
 }

Modified: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/decimal_udf.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/decimal_udf.q?rev=1628234&r1=1628233&r2=1628234&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/decimal_udf.q (original)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/decimal_udf.q Mon Sep 29 17:45:49 2014
@@ -39,6 +39,9 @@ SELECT key - '1.0' FROM DECIMAL_UDF;
 EXPLAIN SELECT key * key FROM DECIMAL_UDF;
 SELECT key * key FROM DECIMAL_UDF;
 
+EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0;
+SELECT key, value FROM DECIMAL_UDF where key * value > 0;
+
 EXPLAIN SELECT key * value FROM DECIMAL_UDF;
 SELECT key * value FROM DECIMAL_UDF;
 

Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/decimal_udf.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/decimal_udf.q.out?rev=1628234&r1=1628233&r2=1628234&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/decimal_udf.q.out (original)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/decimal_udf.q.out Mon Sep 29 17:45:49 2014
@@ -633,6 +633,61 @@ NULL
 1.00000000000000000000
 NULL
 NULL
+PREHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: decimal_udf
+          Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: ((key * value) > 0) (type: boolean)
+            Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: decimal(20,10)), value (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE
+              ListSink
+
+PREHOOK: query: SELECT key, value FROM DECIMAL_UDF where key * value > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value FROM DECIMAL_UDF where key * value > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+100	100
+10	10
+1	1
+200	200
+20	20
+2	2
+1.0	1
+2	2
+3.14	3
+-1.12	-1
+-1.12	-1
+-1.122	-11
+1.12	1
+1.122	1
+124.00	124
+125.2	125
+-1255.49	-1255
+3.14	3
+3.14	3
+3.140	4
+1.0000000000	1
+-1234567890.1234567890	-1234567890
+1234567890.1234567800	1234567890
 PREHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF

Modified: hive/branches/branch-0.14/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/HiveDecimalUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/HiveDecimalUtils.java?rev=1628234&r1=1628233&r2=1628234&view=diff
==============================================================================
--- hive/branches/branch-0.14/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/HiveDecimalUtils.java (original)
+++ hive/branches/branch-0.14/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/HiveDecimalUtils.java Mon Sep 29 17:45:49 2014
@@ -22,6 +22,7 @@ import java.math.BigDecimal;
 
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 
 public class HiveDecimalUtils {
 
@@ -134,4 +135,25 @@ public class HiveDecimalUtils {
     }
   }
 
+  public static TypeInfo getDecimalTypeForPrimitiveCategories(
+      PrimitiveTypeInfo a, PrimitiveTypeInfo b) {
+    int prec1 = HiveDecimalUtils.getPrecisionForType(a);
+    int prec2 = HiveDecimalUtils.getPrecisionForType(b);
+    int scale1 = HiveDecimalUtils.getScaleForType(a);
+    int scale2 = HiveDecimalUtils.getScaleForType(b);
+    int intPart = Math.max(prec1 - scale1, prec2 - scale2);
+    int decPart = Math.max(scale1, scale2);
+    int prec =  Math.min(intPart + decPart, HiveDecimal.MAX_PRECISION);
+    int scale = Math.min(decPart, HiveDecimal.MAX_PRECISION - intPart);
+    return TypeInfoFactory.getDecimalTypeInfo(prec, scale);
+  }
+
+  public static DecimalTypeInfo getDecimalTypeForPrimitiveCategory(PrimitiveTypeInfo a) {
+    if (a instanceof DecimalTypeInfo) return (DecimalTypeInfo)a;
+    int prec = HiveDecimalUtils.getPrecisionForType(a);
+    int scale = HiveDecimalUtils.getScaleForType(a);
+    prec =  Math.min(prec, HiveDecimal.MAX_PRECISION);
+    scale = Math.min(scale, HiveDecimal.MAX_PRECISION - (prec - scale));
+    return TypeInfoFactory.getDecimalTypeInfo(prec, scale);
+  }
 }