You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sp...@apache.org on 2015/05/20 18:02:04 UTC

[45/50] [abbrv] hive git commit: HIVE-10745 : Better null handling by Vectorizer (Ashutosh Chauhan via Hari Sankar Sivarama Subramaniyan)

HIVE-10745 : Better null handling by Vectorizer (Ashutosh Chauhan via Hari Sankar Sivarama Subramaniyan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d1176128
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d1176128
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d1176128

Branch: refs/heads/parquet
Commit: d11761287daeedf4ca8dc8a5977667046722096c
Parents: b70a335
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Mon May 18 16:52:55 2015 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue May 19 22:17:55 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/exec/ExprNodeEvaluatorFactory.java  |  2 -
 .../ql/exec/vector/VectorizationContext.java    | 50 ++++++++++++--------
 2 files changed, 30 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d1176128/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
index f08321c..5a532c4 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
@@ -27,8 +27,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.io.NullWritable;
 
 /**
  * ExprNodeEvaluatorFactory.

http://git-wip-us.apache.org/repos/asf/hive/blob/d1176128/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 48f34a9..61d2972 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.lang.reflect.Constructor;
-import java.math.BigDecimal;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.ArrayList;
@@ -37,7 +36,6 @@ import java.util.regex.Pattern;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
@@ -129,8 +127,8 @@ public class VectorizationContext {
   private static final Log LOG = LogFactory.getLog(
       VectorizationContext.class.getName());
 
-  private String contextName;
-  private int level;
+  private final String contextName;
+  private final int level;
 
   VectorExpressionDescriptor vMap;
 
@@ -359,7 +357,7 @@ public class VectorizationContext {
     }
 
     public int[] currentScratchColumns() {
-      TreeSet<Integer> treeSet = new TreeSet();
+      TreeSet<Integer> treeSet = new TreeSet<Integer>();
       for (Integer col : usedOutputColumns) {
         treeSet.add(initialOutputCol + col);
       }
@@ -440,8 +438,6 @@ public class VectorizationContext {
         ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
             childExpressions, mode, exprDesc.getTypeInfo());
       }
-    } else if (exprDesc instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)exprDesc).getValue()) {
-      ve = getConstantVectorExpression(null, exprDesc.getTypeInfo(), mode);
     } else if (exprDesc instanceof ExprNodeConstantDesc) {
       ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(),
           mode);
@@ -691,6 +687,10 @@ public class VectorizationContext {
       case DECIMAL:
         genericUdf = new GenericUDFToDecimal();
         break;
+      case VOID:
+      case UNKNOWN:
+        // fall-through to throw exception, its not expected for execution to reach here.
+        break;
     }
     if (genericUdf == null) {
       if (udfClass == null) {
@@ -1345,8 +1345,11 @@ public class VectorizationContext {
   }
 
   private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException {
+
+    if (null == scalar) {
+      return null;
+    }
     PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
-    int scale = HiveDecimalUtils.getScaleForType(ptinfo);
     String typename = type.getTypeName();
     HiveDecimal rawDecimal;
     switch (ptinfo.getPrimitiveCategory()) {
@@ -1384,6 +1387,9 @@ public class VectorizationContext {
   }
 
   private String castConstantToString(Object scalar, TypeInfo type) throws HiveException {
+    if (null == scalar) {
+      return null;
+    }
     PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
     String typename = type.getTypeName();
     switch (ptinfo.getPrimitiveCategory()) {
@@ -1403,6 +1409,9 @@ public class VectorizationContext {
   }
 
   private Double castConstantToDouble(Object scalar, TypeInfo type) throws HiveException {
+    if (null == scalar) {
+      return null;
+    }
     PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
     String typename = type.getTypeName();
     switch (ptinfo.getPrimitiveCategory()) {
@@ -1422,6 +1431,9 @@ public class VectorizationContext {
   }
 
   private Long castConstantToLong(Object scalar, TypeInfo type) throws HiveException {
+    if (null == scalar) {
+      return null;
+    }
     PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
     String typename = type.getTypeName();
     switch (ptinfo.getPrimitiveCategory()) {
@@ -1449,8 +1461,6 @@ public class VectorizationContext {
       Object constantValue = ((ExprNodeConstantDesc) child).getValue();
       HiveDecimal decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo());
       return getConstantVectorExpression(decimalValue, returnType, Mode.PROJECTION);
-    } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) {
-      return getConstantVectorExpression(null, returnType, Mode.PROJECTION);
     }
     if (isIntFamily(inputType)) {
       return createVectorExpression(CastLongToDecimal.class, childExpr, Mode.PROJECTION, returnType);
@@ -1476,8 +1486,6 @@ public class VectorizationContext {
         Object constantValue = ((ExprNodeConstantDesc) child).getValue();
         String strValue = castConstantToString(constantValue, child.getTypeInfo());
         return getConstantVectorExpression(strValue, returnType, Mode.PROJECTION);
-    } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) {
-      return getConstantVectorExpression(null, returnType, Mode.PROJECTION);
     }
     if (inputType.equals("boolean")) {
       // Boolean must come before the integer family. It's a special case.
@@ -1563,8 +1571,6 @@ public class VectorizationContext {
         Object constantValue = ((ExprNodeConstantDesc) child).getValue();
         Double doubleValue = castConstantToDouble(constantValue, child.getTypeInfo());
         return getConstantVectorExpression(doubleValue, returnType, Mode.PROJECTION);
-    } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) {
-      return getConstantVectorExpression(null, returnType, Mode.PROJECTION);
     }
     if (isIntFamily(inputType)) {
       return createVectorExpression(CastLongToDouble.class, childExpr, Mode.PROJECTION, returnType);
@@ -1586,11 +1592,12 @@ public class VectorizationContext {
     ExprNodeDesc child = childExpr.get(0);
     String inputType = childExpr.get(0).getTypeString();
     if (child instanceof ExprNodeConstantDesc) {
+      if (null == ((ExprNodeConstantDesc)child).getValue()) {
+        return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION);
+      }
       // Don't do constant folding here.  Wait until the optimizer is changed to do it.
       // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
       return null;
-    } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) {
-      return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION);
     }
     // Long and double are handled using descriptors, string needs to be specially handled.
     if (isStringFamily(inputType)) {
@@ -1619,8 +1626,6 @@ public class VectorizationContext {
         Object constantValue = ((ExprNodeConstantDesc) child).getValue();
         Long longValue = castConstantToLong(constantValue, child.getTypeInfo());
         return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, Mode.PROJECTION);
-    } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) {
-      return getConstantVectorExpression(null, TypeInfoFactory.longTypeInfo, Mode.PROJECTION);
     }
     // Float family, timestamp are handled via descriptor based lookup, int family needs
     // special handling.
@@ -1770,11 +1775,16 @@ public class VectorizationContext {
         variableArgPositions.add(i);
         argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
       } else if (child instanceof ExprNodeConstantDesc) {
-
+         if (((ExprNodeConstantDesc) child).getValue() == null) {
+           // cannot handle constant null at the moment
+           throw new HiveException("Unable to vectorize custom UDF. Custom udf containing "
+               + "constant null argument cannot be currently vectorized.");
+         }
         // this is a constant
         argDescs[i].setConstant((ExprNodeConstantDesc) child);
       } else {
-        throw new HiveException("Unable to vectorize Custom UDF");
+        throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : "
+            + child);
       }
     }