You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/08/20 14:18:35 UTC
hive git commit: HIVE-11602: Support Struct with different field
types in query (Jesus Camacho Rodriguez,
reviewed by Hari Sankar Sivarama Subramaniyan)
Repository: hive
Updated Branches:
refs/heads/master ab03dc97b -> 0012864b8
HIVE-11602: Support Struct with different field types in query (Jesus Camacho Rodriguez, reviewed by Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0012864b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0012864b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0012864b
Branch: refs/heads/master
Commit: 0012864b8d370f948ac2cb875e4264100390d1d4
Parents: ab03dc9
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Aug 19 12:52:16 2015 +0300
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 20 15:17:40 2015 +0300
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FunctionRegistry.java | 61 ++++++++++++++++++--
.../ql/udf/generic/GenericUDFBaseNumeric.java | 4 +-
ql/src/test/queries/clientpositive/structin.q | 6 ++
.../test/results/clientpositive/structin.q.out | 44 ++++++++++++++
4 files changed, 109 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 9edcc4d..4c1c53e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -132,6 +132,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -765,10 +766,11 @@ public final class FunctionRegistry {
return null;
}
- public static PrimitiveCategory getCommonCategory(TypeInfo a, TypeInfo b) {
+ public static PrimitiveCategory getPrimitiveCommonCategory(TypeInfo a, TypeInfo b) {
if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
return null;
}
+
PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory();
PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory();
@@ -802,10 +804,61 @@ public final class FunctionRegistry {
return a;
}
- PrimitiveCategory commonCat = getCommonCategory(a, b);
- if (commonCat == null)
+ // We try to infer a common primitive category
+ PrimitiveCategory commonCat = getPrimitiveCommonCategory(a, b);
+ if (commonCat != null) {
+ return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat);
+ }
+ // It is not primitive; check if it is a struct and we can infer a common class
+ if (a.getCategory() == Category.STRUCT && b.getCategory() == Category.STRUCT) {
+ return getCommonClassForStruct((StructTypeInfo)a, (StructTypeInfo)b);
+ }
+ return null;
+ }
+
+ /**
+ * Find a common class that objects of both StructTypeInfo a and StructTypeInfo b can
+ * convert to. This is used for places other than comparison.
+ *
+ * @return null if no common class could be found.
+ */
+ public static TypeInfo getCommonClassForStruct(StructTypeInfo a, StructTypeInfo b) {
+ if (a == b || a.equals(b)) {
+ return a;
+ }
+
+ List<String> names = new ArrayList<String>();
+ List<TypeInfo> typeInfos = new ArrayList<TypeInfo>();
+
+ Iterator<String> namesIterator = a.getAllStructFieldNames().iterator();
+ Iterator<String> otherNamesIterator = b.getAllStructFieldNames().iterator();
+
+ // Compare the field names using ignore-case semantics
+ while (namesIterator.hasNext() && otherNamesIterator.hasNext()) {
+ String name = namesIterator.next();
+ if (!name.equalsIgnoreCase(otherNamesIterator.next())) {
+ return null;
+ }
+ names.add(name);
+ }
+
+ // Different number of field names
+ if (namesIterator.hasNext() || otherNamesIterator.hasNext()) {
return null;
- return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat);
+ }
+
+ // Compare the field types
+ ArrayList<TypeInfo> fromTypes = a.getAllStructFieldTypeInfos();
+ ArrayList<TypeInfo> toTypes = b.getAllStructFieldTypeInfos();
+ for (int i = 0; i < fromTypes.size(); i++) {
+ TypeInfo commonType = getCommonClass(fromTypes.get(i), toTypes.get(i));
+ if (commonType == null) {
+ return null;
+ }
+ typeInfos.add(commonType);
+ }
+
+ return TypeInfoFactory.getStructTypeInfo(names, typeInfos);
}
public static boolean implicitConvertible(PrimitiveCategory from, PrimitiveCategory to) {
http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
index ca5c459..ef6ef11 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
@@ -245,7 +245,7 @@ public abstract class GenericUDFBaseNumeric extends GenericUDFBaseBinary {
}
// Use type promotion
- PrimitiveCategory commonCat = FunctionRegistry.getCommonCategory(left, right);
+ PrimitiveCategory commonCat = FunctionRegistry.getPrimitiveCommonCategory(left, right);
if (commonCat == PrimitiveCategory.DECIMAL) {
// Hive 0.12 behavior where double * decimal -> decimal is gone.
return TypeInfoFactory.doubleTypeInfo;
@@ -267,7 +267,7 @@ public abstract class GenericUDFBaseNumeric extends GenericUDFBaseBinary {
PrimitiveTypeInfo right = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(rightOI);
// Now we are handling exact types. Base implementation handles type promotion.
- PrimitiveCategory commonCat = FunctionRegistry.getCommonCategory(left, right);
+ PrimitiveCategory commonCat = FunctionRegistry.getPrimitiveCommonCategory(left, right);
if (commonCat == PrimitiveCategory.DECIMAL) {
return deriveResultDecimalTypeInfo();
} else {
http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/test/queries/clientpositive/structin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/structin.q b/ql/src/test/queries/clientpositive/structin.q
index 48b31f3..35498bf 100644
--- a/ql/src/test/queries/clientpositive/structin.q
+++ b/ql/src/test/queries/clientpositive/structin.q
@@ -15,3 +15,9 @@ struct('1234-1111-0074019112','1'),
struct('1234-1111-0074019610','1'),
struct('1234-1111-0074022106','1')
);
+
+explain select * from t11 where struct(`id`, `lineid`)
+IN (
+struct('1234-1111-0074578664','3'),
+struct('1234-1111-0074578695',1)
+);
http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/test/results/clientpositive/structin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out
index 81c792a..f176e79 100644
--- a/ql/src/test/results/clientpositive/structin.q.out
+++ b/ql/src/test/results/clientpositive/structin.q.out
@@ -64,3 +64,47 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain select * from t11 where struct(`id`, `lineid`)
+IN (
+struct('1234-1111-0074578664','3'),
+struct('1234-1111-0074578695',1)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from t11 where struct(`id`, `lineid`)
+IN (
+struct('1234-1111-0074578664','3'),
+struct('1234-1111-0074578695',1)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t11
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695',1)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+