You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by go...@apache.org on 2015/08/13 01:46:27 UTC
hive git commit: HIVE-11462: Constant fold struct() UDF (Gopal V,
reviewed by Hari Sankar Sivarama Subramaniyan)
Repository: hive
Updated Branches:
refs/heads/master 0140df748 -> 16546cc4b
HIVE-11462: Constant fold struct() UDF (Gopal V, reviewed by Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/16546cc4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/16546cc4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/16546cc4
Branch: refs/heads/master
Commit: 16546cc4b8f6944f5ea4ad13f480dcc402e6757c
Parents: 0140df7
Author: Gopal V <go...@apache.org>
Authored: Wed Aug 12 16:45:58 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Wed Aug 12 16:45:58 2015 -0700
----------------------------------------------------------------------
.../optimizer/ConstantPropagateProcFactory.java | 40 +++++++++++++++-----
.../hive/ql/plan/ExprNodeConstantDesc.java | 29 ++++++++++++--
.../hive/ql/udf/generic/GenericUDFIn.java | 3 +-
.../hive/ql/udf/generic/GenericUDFStruct.java | 25 +++++++++---
.../test/results/clientpositive/null_cast.q.out | 2 +-
.../test/results/clientpositive/structin.q.out | 2 +-
.../results/clientpositive/udf_inline.q.out | 2 +-
.../results/clientpositive/udf_struct.q.out | 2 +-
.../test/results/clientpositive/udf_union.q.out | 2 +-
.../objectinspector/ObjectInspectorUtils.java | 3 ++
10 files changed, 87 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index cf10c52..55ad0ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -75,10 +75,14 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -758,6 +762,10 @@ public final class ConstantPropagateProcFactory {
return null;
}
}
+ if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
+ // nested complex types cannot be folded cleanly
+ return null;
+ }
Object value = constant.getValue();
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) constant.getTypeInfo();
Object writableValue = null == value ? value :
@@ -774,6 +782,10 @@ public final class ConstantPropagateProcFactory {
return null;
}
ExprNodeConstantDesc constant = (ExprNodeConstantDesc) evaluatedFn;
+ if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
+ // nested complex types cannot be folded cleanly
+ return null;
+ }
Object writableValue = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
(PrimitiveTypeInfo) constant.getTypeInfo()).getPrimitiveWritableObject(constant.getValue());
arguments[i] = new DeferredJavaObject(writableValue);
@@ -790,28 +802,38 @@ public final class ConstantPropagateProcFactory {
LOG.debug(udf.getClass().getName() + "(" + exprs + ")=" + o);
}
if (o == null) {
- return new ExprNodeConstantDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o);
+ return new ExprNodeConstantDesc(
+ TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o);
}
Class<?> clz = o.getClass();
if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(clz)) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
TypeInfo typeInfo = poi.getTypeInfo();
o = poi.getPrimitiveJavaObject(o);
- if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) ||
- typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) ||
- typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) {
+ if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME)
+ || typeInfo.getTypeName()
+ .contains(serdeConstants.VARCHAR_TYPE_NAME)
+ || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) {
return new ExprNodeConstantDesc(typeInfo, o);
}
- } else if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) {
-
- } else {
+ } else if (udf instanceof GenericUDFStruct
+ && oi instanceof StandardConstantStructObjectInspector) {
+ // do not fold named_struct, only struct()
+ ConstantObjectInspector coi = (ConstantObjectInspector) oi;
+ TypeInfo structType = TypeInfoUtils.getTypeInfoFromObjectInspector(coi);
+ return new ExprNodeConstantDesc(structType,
+ ObjectInspectorUtils.copyToStandardJavaObject(o, coi));
+ } else if (!PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) {
if (LOG.isErrorEnabled()) {
- LOG.error("Unable to evaluate " + udf + ". Return value unrecoginizable.");
+ LOG.error("Unable to evaluate " + udf
+ + ". Return value unrecoginizable.");
}
return null;
+ } else {
+ // fall through
}
String constStr = null;
- if(arguments.length == 1 && FunctionRegistry.isOpCast(udf)) {
+ if (arguments.length == 1 && FunctionRegistry.isOpCast(udf)) {
// remember original string representation of constant.
constStr = arguments[0].get().toString();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java
index 2674fe3..a5221a2 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java
@@ -19,12 +19,15 @@
package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
+import java.util.List;
import org.apache.commons.lang.builder.HashCodeBuilder;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -73,6 +76,7 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable {
}
public void setValue(Object value) {
+ // Kryo setter
this.value = value;
}
@@ -92,8 +96,7 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable {
return "Const " + typeInfo.toString() + " " + value;
}
- @Override
- public String getExprString() {
+ private static String getFormatted(TypeInfo typeInfo, Object value) {
if (value == null) {
return "null";
}
@@ -109,8 +112,28 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable {
hexChars[j * 2 + 1] = hexArray[v & 0x0F];
}
return new String(hexChars);
+ }
+ return value.toString();
+ }
+
+ @Override
+ public String getExprString() {
+ if (typeInfo.getCategory() == Category.PRIMITIVE) {
+ return getFormatted(typeInfo, value);
+ } else if (typeInfo.getCategory() == Category.STRUCT) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("const struct(");
+ List<?> items = (List<?>) getWritableObjectInspector().getWritableConstantValue();
+ List<TypeInfo> structTypes = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
+ for (int i = 0; i < structTypes.size(); i++) {
+ final Object o = (i < items.size()) ? items.get(i) : null;
+ sb.append(getFormatted(structTypes.get(i), o)).append(",");
+ }
+ sb.setCharAt(sb.length() - 1, ')');
+ return sb.toString();
} else {
- return value.toString();
+ // unknown type
+ return toString();
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
index 56ac3e1..7660ca4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
@@ -60,7 +60,8 @@ import com.esotericsoftware.minlog.Log;
public class GenericUDFIn extends GenericUDF {
private transient ObjectInspector[] argumentOIs;
- private Set<Object> constantInSet;
+ // this set is a copy of the arguments objects - avoid serializing
+ private transient Set<Object> constantInSet;
private boolean isInSetConstant = true; //are variables from IN(...) constant
private final BooleanWritable bw = new BooleanWritable();
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java
index 7df3f7d..7e286fb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java
@@ -21,12 +21,13 @@ package org.apache.hadoop.hive.ql.udf.generic;
import java.util.ArrayList;
import java.util.Arrays;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@Description(name = "struct",
value = "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values")
@@ -44,9 +45,23 @@ public class GenericUDFStruct extends GenericUDF {
for (int f = 1; f <= numFields; f++) {
fname.add("col" + f);
}
- StructObjectInspector soi =
- ObjectInspectorFactory.getStandardStructObjectInspector(fname, Arrays.asList(arguments));
- return soi;
+ boolean constantStruct = true;
+ for (int i = 0; i < arguments.length; i++) {
+ ObjectInspector oi = arguments[i];
+ constantStruct &= (oi.getCategory() == Category.PRIMITIVE)
+ && (oi instanceof ConstantObjectInspector);
+ if (constantStruct) {
+ // nested complex types trigger Kryo issue #216 in plan deserialization
+ ret[i] = ((ConstantObjectInspector) oi).getWritableConstantValue();
+ }
+ }
+ if (constantStruct) {
+ return ObjectInspectorFactory.getStandardConstantStructObjectInspector(fname,
+ Arrays.asList(arguments), Arrays.asList(ret));
+ } else {
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
+ Arrays.asList(arguments));
+ }
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/null_cast.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/null_cast.q.out b/ql/src/test/results/clientpositive/null_cast.q.out
index b5af69b..ff37fe7 100644
--- a/ql/src/test/results/clientpositive/null_cast.q.out
+++ b/ql/src/test/results/clientpositive/null_cast.q.out
@@ -23,7 +23,7 @@ STAGE PLANS:
Row Limit Per Split: 1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: array(null,0) (type: array<int>), array(null,array()) (type: array<array<string>>), array(null,map()) (type: array<map<string,string>>), array(null,struct(0)) (type: array<struct<col1:int>>)
+ expressions: array(null,0) (type: array<int>), array(null,array()) (type: array<array<string>>), array(null,map()) (type: array<map<string,string>>), array(null,const struct(0)) (type: array<struct<col1:int>>)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/structin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out
index e36fceb..81c792a 100644
--- a/ql/src/test/results/clientpositive/structin.q.out
+++ b/ql/src/test/results/clientpositive/structin.q.out
@@ -44,7 +44,7 @@ STAGE PLANS:
alias: t11
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: (struct(id,lineid)) IN (struct('1234-1111-0074578664','3'), struct('1234-1111-0074578695','1'), struct('1234-1111-0074580704','1'), struct('1234-1111-0074581619','2'), struct('1234-1111-0074582745','1'), struct('1234-1111-0074586625','1'), struct('1234-1111-0074019112','1'), struct('1234-1111-0074019610','1'), struct('1234-1111-0074022106','1')) (type: boolean)
+ predicate: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695','1'), const struct('1234-1111-0074580704','1'), const struct('1234-1111-0074581619','2'), const struct('1234-1111-0074582745','1'), const struct('1234-1111-0074586625','1'), const struct('1234-1111-0074019112','1'), const struct('1234-1111-0074019610','1'), const struct('1234-1111-0074022106','1')) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: id (type: string), lineid (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_inline.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_inline.q.out b/ql/src/test/results/clientpositive/udf_inline.q.out
index 7d372f3..f986abf 100644
--- a/ql/src/test/results/clientpositive/udf_inline.q.out
+++ b/ql/src/test/results/clientpositive/udf_inline.q.out
@@ -31,7 +31,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array<struct<col1:int,col2:string>>)
+ expressions: array(const struct(1,'dude!'),const struct(2,'Wheres'),const struct(3,'my car?')) (type: array<struct<col1:int,col2:string>>)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_struct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_struct.q.out b/ql/src/test/results/clientpositive/udf_struct.q.out
index d0c56c7..0d2d71d 100644
--- a/ql/src/test/results/clientpositive/udf_struct.q.out
+++ b/ql/src/test/results/clientpositive/udf_struct.q.out
@@ -29,7 +29,7 @@ STAGE PLANS:
Row Limit Per Split: 1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: struct(1) (type: struct<col1:int>), struct(1,'a') (type: struct<col1:int,col2:string>), struct(1,'b',1.5).col1 (type: int), struct(1,struct('a',1.5)).col2.col1 (type: string)
+ expressions: const struct(1) (type: struct<col1:int>), const struct(1,'a') (type: struct<col1:int,col2:string>), struct(1,'b',1.5).col1 (type: int), struct(1,struct('a',1.5)).col2.col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_union.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_union.q.out b/ql/src/test/results/clientpositive/udf_union.q.out
index 73d4bdd..114040f 100644
--- a/ql/src/test/results/clientpositive/udf_union.q.out
+++ b/ql/src/test/results/clientpositive/udf_union.q.out
@@ -34,7 +34,7 @@ STAGE PLANS:
Row Limit Per Split: 2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: create_union(0,key) (type: uniontype<string>), create_union(if((key < 100), 0, 1),2.0,value) (type: uniontype<double,string>), create_union(1,'a',struct(2,'b')) (type: uniontype<string,struct<col1:int,col2:string>>)
+ expressions: create_union(0,key) (type: uniontype<string>), create_union(if((key < 100), 0, 1),2.0,value) (type: uniontype<double,string>), create_union(1,'a',const struct(2,'b')) (type: uniontype<string,struct<col1:int,col2:string>>)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
ListSink
http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index 64dd512..00a6384 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -1084,6 +1084,9 @@ public final class ObjectInspectorUtils {
fieldObjectInspectors.add(getStandardObjectInspector(f
.getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE));
}
+ if (value != null && (writableValue.getClass().isArray())) {
+ writableValue = java.util.Arrays.asList((Object[])writableValue);
+ }
return ObjectInspectorFactory.getStandardConstantStructObjectInspector(
fieldNames,
fieldObjectInspectors,