You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by go...@apache.org on 2015/08/13 01:46:27 UTC

hive git commit: HIVE-11462: Constant fold struct() UDF (Gopal V, reviewed by Hari Sankar Sivarama Subramaniyan)

Repository: hive
Updated Branches:
  refs/heads/master 0140df748 -> 16546cc4b


HIVE-11462: Constant fold struct() UDF (Gopal V, reviewed by Hari Sankar Sivarama Subramaniyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/16546cc4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/16546cc4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/16546cc4

Branch: refs/heads/master
Commit: 16546cc4b8f6944f5ea4ad13f480dcc402e6757c
Parents: 0140df7
Author: Gopal V <go...@apache.org>
Authored: Wed Aug 12 16:45:58 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Wed Aug 12 16:45:58 2015 -0700

----------------------------------------------------------------------
 .../optimizer/ConstantPropagateProcFactory.java | 40 +++++++++++++++-----
 .../hive/ql/plan/ExprNodeConstantDesc.java      | 29 ++++++++++++--
 .../hive/ql/udf/generic/GenericUDFIn.java       |  3 +-
 .../hive/ql/udf/generic/GenericUDFStruct.java   | 25 +++++++++---
 .../test/results/clientpositive/null_cast.q.out |  2 +-
 .../test/results/clientpositive/structin.q.out  |  2 +-
 .../results/clientpositive/udf_inline.q.out     |  2 +-
 .../results/clientpositive/udf_struct.q.out     |  2 +-
 .../test/results/clientpositive/udf_union.q.out |  2 +-
 .../objectinspector/ObjectInspectorUtils.java   |  3 ++
 10 files changed, 87 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index cf10c52..55ad0ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -75,10 +75,14 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -758,6 +762,10 @@ public final class ConstantPropagateProcFactory {
             return null;
           }
         }
+        if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
+          // nested complex types cannot be folded cleanly 
+          return null;
+        }
         Object value = constant.getValue();
         PrimitiveTypeInfo pti = (PrimitiveTypeInfo) constant.getTypeInfo();
         Object writableValue = null == value ? value :
@@ -774,6 +782,10 @@ public final class ConstantPropagateProcFactory {
           return null;
         }
         ExprNodeConstantDesc constant = (ExprNodeConstantDesc) evaluatedFn;
+        if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
+          // nested complex types cannot be folded cleanly
+          return null;
+        }
         Object writableValue = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
           (PrimitiveTypeInfo) constant.getTypeInfo()).getPrimitiveWritableObject(constant.getValue());
         arguments[i] = new DeferredJavaObject(writableValue);
@@ -790,28 +802,38 @@ public final class ConstantPropagateProcFactory {
         LOG.debug(udf.getClass().getName() + "(" + exprs + ")=" + o);
       }
       if (o == null) {
-        return new ExprNodeConstantDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o);
+        return new ExprNodeConstantDesc(
+            TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o);
       }
       Class<?> clz = o.getClass();
       if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(clz)) {
         PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
         TypeInfo typeInfo = poi.getTypeInfo();
         o = poi.getPrimitiveJavaObject(o);
-        if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) ||
-            typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) ||
-            typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) {
+        if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME)
+            || typeInfo.getTypeName()
+                .contains(serdeConstants.VARCHAR_TYPE_NAME)
+            || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) {
           return new ExprNodeConstantDesc(typeInfo, o);
         }
-      } else if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) {
-
-      } else {
+      } else if (udf instanceof GenericUDFStruct
+          && oi instanceof StandardConstantStructObjectInspector) {
+        // do not fold named_struct, only struct()
+        ConstantObjectInspector coi = (ConstantObjectInspector) oi;
+        TypeInfo structType = TypeInfoUtils.getTypeInfoFromObjectInspector(coi);
+        return new ExprNodeConstantDesc(structType,
+            ObjectInspectorUtils.copyToStandardJavaObject(o, coi));
+      } else if (!PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) {
         if (LOG.isErrorEnabled()) {
-          LOG.error("Unable to evaluate " + udf + ". Return value unrecoginizable.");
+          LOG.error("Unable to evaluate " + udf
+              + ". Return value unrecoginizable.");
         }
         return null;
+      } else {
+        // fall through
       }
       String constStr = null;
-      if(arguments.length == 1 && FunctionRegistry.isOpCast(udf)) {
+      if (arguments.length == 1 && FunctionRegistry.isOpCast(udf)) {
         // remember original string representation of constant.
         constStr = arguments[0].get().toString();
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java
index 2674fe3..a5221a2 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java
@@ -19,12 +19,15 @@
 package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
+import java.util.List;
 
 import org.apache.commons.lang.builder.HashCodeBuilder;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -73,6 +76,7 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable {
   }
 
   public void setValue(Object value) {
+    // Kryo setter
     this.value = value;
   }
 
@@ -92,8 +96,7 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable {
     return "Const " + typeInfo.toString() + " " + value;
   }
 
-  @Override
-  public String getExprString() {
+  private static String getFormatted(TypeInfo typeInfo, Object value) {
     if (value == null) {
       return "null";
     }
@@ -109,8 +112,28 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable {
         hexChars[j * 2 + 1] = hexArray[v & 0x0F];
       }
       return new String(hexChars);
+    }
+    return value.toString();
+  }
+
+  @Override
+  public String getExprString() {
+    if (typeInfo.getCategory() == Category.PRIMITIVE) {
+      return getFormatted(typeInfo, value);
+    } else if (typeInfo.getCategory() == Category.STRUCT) {
+      StringBuilder sb = new StringBuilder();
+      sb.append("const struct(");
+      List<?> items = (List<?>) getWritableObjectInspector().getWritableConstantValue();
+      List<TypeInfo> structTypes = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
+      for (int i = 0; i < structTypes.size(); i++) {
+        final Object o = (i < items.size()) ? items.get(i) : null;
+        sb.append(getFormatted(structTypes.get(i), o)).append(",");
+      }
+      sb.setCharAt(sb.length() - 1, ')');
+      return sb.toString();
     } else {
-      return value.toString();
+      // unknown type
+      return toString();
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
index 56ac3e1..7660ca4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
@@ -60,7 +60,8 @@ import com.esotericsoftware.minlog.Log;
 public class GenericUDFIn extends GenericUDF {
 
   private transient ObjectInspector[] argumentOIs;
-  private Set<Object> constantInSet;
+  // this set is a copy of the arguments objects - avoid serializing
+  private transient Set<Object> constantInSet;
   private boolean isInSetConstant = true; //are variables from IN(...) constant
 
   private final BooleanWritable bw = new BooleanWritable();

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java
index 7df3f7d..7e286fb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java
@@ -21,12 +21,13 @@ package org.apache.hadoop.hive.ql.udf.generic;
 import java.util.ArrayList;
 import java.util.Arrays;
 
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 
 @Description(name = "struct",
     value = "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values")
@@ -44,9 +45,23 @@ public class GenericUDFStruct extends GenericUDF {
     for (int f = 1; f <= numFields; f++) {
       fname.add("col" + f);
     }
-    StructObjectInspector soi = 
-      ObjectInspectorFactory.getStandardStructObjectInspector(fname, Arrays.asList(arguments));
-    return soi;
+    boolean constantStruct = true;
+    for (int i = 0; i < arguments.length; i++) {
+      ObjectInspector oi = arguments[i];
+      constantStruct &= (oi.getCategory() == Category.PRIMITIVE)
+          && (oi instanceof ConstantObjectInspector);
+      if (constantStruct) {
+        // nested complex types trigger Kryo issue #216 in plan deserialization
+        ret[i] = ((ConstantObjectInspector) oi).getWritableConstantValue();
+      }
+    }
+    if (constantStruct) {
+      return ObjectInspectorFactory.getStandardConstantStructObjectInspector(fname,
+          Arrays.asList(arguments), Arrays.asList(ret));
+    } else {
+      return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
+          Arrays.asList(arguments));
+    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/null_cast.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/null_cast.q.out b/ql/src/test/results/clientpositive/null_cast.q.out
index b5af69b..ff37fe7 100644
--- a/ql/src/test/results/clientpositive/null_cast.q.out
+++ b/ql/src/test/results/clientpositive/null_cast.q.out
@@ -23,7 +23,7 @@ STAGE PLANS:
             Row Limit Per Split: 1
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
-              expressions: array(null,0) (type: array<int>), array(null,array()) (type: array<array<string>>), array(null,map()) (type: array<map<string,string>>), array(null,struct(0)) (type: array<struct<col1:int>>)
+              expressions: array(null,0) (type: array<int>), array(null,array()) (type: array<array<string>>), array(null,map()) (type: array<map<string,string>>), array(null,const struct(0)) (type: array<struct<col1:int>>)
               outputColumnNames: _col0, _col1, _col2, _col3
               Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column stats: COMPLETE
               File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/structin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out
index e36fceb..81c792a 100644
--- a/ql/src/test/results/clientpositive/structin.q.out
+++ b/ql/src/test/results/clientpositive/structin.q.out
@@ -44,7 +44,7 @@ STAGE PLANS:
             alias: t11
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: (struct(id,lineid)) IN (struct('1234-1111-0074578664','3'), struct('1234-1111-0074578695','1'), struct('1234-1111-0074580704','1'), struct('1234-1111-0074581619','2'), struct('1234-1111-0074582745','1'), struct('1234-1111-0074586625','1'), struct('1234-1111-0074019112','1'), struct('1234-1111-0074019610','1'), struct('1234-1111-0074022106','1')) (type: boolean)
+              predicate: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695','1'), const struct('1234-1111-0074580704','1'), const struct('1234-1111-0074581619','2'), const struct('1234-1111-0074582745','1'), const struct('1234-1111-0074586625','1'), const struct('1234-1111-0074019112','1'), const struct('1234-1111-0074019610','1'), const struct('1234-1111-0074022106','1')) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: id (type: string), lineid (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_inline.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_inline.q.out b/ql/src/test/results/clientpositive/udf_inline.q.out
index 7d372f3..f986abf 100644
--- a/ql/src/test/results/clientpositive/udf_inline.q.out
+++ b/ql/src/test/results/clientpositive/udf_inline.q.out
@@ -31,7 +31,7 @@ STAGE PLANS:
           alias: src
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
-            expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array<struct<col1:int,col2:string>>)
+            expressions: array(const struct(1,'dude!'),const struct(2,'Wheres'),const struct(3,'my car?')) (type: array<struct<col1:int,col2:string>>)
             outputColumnNames: _col0
             Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE
             UDTF Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_struct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_struct.q.out b/ql/src/test/results/clientpositive/udf_struct.q.out
index d0c56c7..0d2d71d 100644
--- a/ql/src/test/results/clientpositive/udf_struct.q.out
+++ b/ql/src/test/results/clientpositive/udf_struct.q.out
@@ -29,7 +29,7 @@ STAGE PLANS:
           Row Limit Per Split: 1
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
-            expressions: struct(1) (type: struct<col1:int>), struct(1,'a') (type: struct<col1:int,col2:string>), struct(1,'b',1.5).col1 (type: int), struct(1,struct('a',1.5)).col2.col1 (type: string)
+            expressions: const struct(1) (type: struct<col1:int>), const struct(1,'a') (type: struct<col1:int,col2:string>), struct(1,'b',1.5).col1 (type: int), struct(1,struct('a',1.5)).col2.col1 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
             Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
             ListSink

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_union.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_union.q.out b/ql/src/test/results/clientpositive/udf_union.q.out
index 73d4bdd..114040f 100644
--- a/ql/src/test/results/clientpositive/udf_union.q.out
+++ b/ql/src/test/results/clientpositive/udf_union.q.out
@@ -34,7 +34,7 @@ STAGE PLANS:
           Row Limit Per Split: 2
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           Select Operator
-            expressions: create_union(0,key) (type: uniontype<string>), create_union(if((key < 100), 0, 1),2.0,value) (type: uniontype<double,string>), create_union(1,'a',struct(2,'b')) (type: uniontype<string,struct<col1:int,col2:string>>)
+            expressions: create_union(0,key) (type: uniontype<string>), create_union(if((key < 100), 0, 1),2.0,value) (type: uniontype<double,string>), create_union(1,'a',const struct(2,'b')) (type: uniontype<string,struct<col1:int,col2:string>>)
             outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             ListSink

http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index 64dd512..00a6384 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -1084,6 +1084,9 @@ public final class ObjectInspectorUtils {
             fieldObjectInspectors.add(getStandardObjectInspector(f
             .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE));
           }
+          if (value != null && (writableValue.getClass().isArray())) {
+            writableValue = java.util.Arrays.asList((Object[])writableValue);
+          }
           return ObjectInspectorFactory.getStandardConstantStructObjectInspector(
             fieldNames,
             fieldObjectInspectors,