You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/08/20 14:18:35 UTC

hive git commit: HIVE-11602: Support Struct with different field types in query (Jesus Camacho Rodriguez, reviewed by Hari Sankar Sivarama Subramaniyan)

Repository: hive
Updated Branches:
  refs/heads/master ab03dc97b -> 0012864b8


HIVE-11602: Support Struct with different field types in query (Jesus Camacho Rodriguez, reviewed by Hari Sankar Sivarama Subramaniyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0012864b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0012864b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0012864b

Branch: refs/heads/master
Commit: 0012864b8d370f948ac2cb875e4264100390d1d4
Parents: ab03dc9
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Aug 19 12:52:16 2015 +0300
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 20 15:17:40 2015 +0300

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FunctionRegistry.java   | 61 ++++++++++++++++++--
 .../ql/udf/generic/GenericUDFBaseNumeric.java   |  4 +-
 ql/src/test/queries/clientpositive/structin.q   |  6 ++
 .../test/results/clientpositive/structin.q.out  | 44 ++++++++++++++
 4 files changed, 109 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 9edcc4d..4c1c53e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -132,6 +132,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -765,10 +766,11 @@ public final class FunctionRegistry {
     return null;
   }
 
-  public static PrimitiveCategory getCommonCategory(TypeInfo a, TypeInfo b) {
+  public static PrimitiveCategory getPrimitiveCommonCategory(TypeInfo a, TypeInfo b) {
     if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
       return null;
     }
+
     PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory();
     PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory();
 
@@ -802,10 +804,61 @@ public final class FunctionRegistry {
       return a;
     }
 
-    PrimitiveCategory commonCat = getCommonCategory(a, b);
-    if (commonCat == null)
+    // We try to infer a common primitive category
+    PrimitiveCategory commonCat = getPrimitiveCommonCategory(a, b);
+    if (commonCat != null) {
+      return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat);
+    }
+    // It is not primitive; check if it is a struct and we can infer a common class
+    if (a.getCategory() == Category.STRUCT && b.getCategory() == Category.STRUCT) {
+      return getCommonClassForStruct((StructTypeInfo)a, (StructTypeInfo)b);
+    }
+    return null;
+  }
+
+  /**
+   * Find a common class that objects of both StructTypeInfo a and StructTypeInfo b can
+   * convert to. This is used for places other than comparison.
+   *
+   * @return null if no common class could be found.
+   */
+  public static TypeInfo getCommonClassForStruct(StructTypeInfo a, StructTypeInfo b) {
+    if (a == b || a.equals(b)) {
+      return a;
+    }
+
+    List<String> names = new ArrayList<String>();
+    List<TypeInfo> typeInfos = new ArrayList<TypeInfo>();
+
+    Iterator<String> namesIterator = a.getAllStructFieldNames().iterator();
+    Iterator<String> otherNamesIterator = b.getAllStructFieldNames().iterator();
+
+    // Compare the field names using ignore-case semantics
+    while (namesIterator.hasNext() && otherNamesIterator.hasNext()) {
+      String name = namesIterator.next();
+      if (!name.equalsIgnoreCase(otherNamesIterator.next())) {
+        return null;
+      }
+      names.add(name);
+    }
+
+    // Different number of field names
+    if (namesIterator.hasNext() || otherNamesIterator.hasNext()) {
       return null;
-    return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat);
+    }
+
+    // Compare the field types
+    ArrayList<TypeInfo> fromTypes = a.getAllStructFieldTypeInfos();
+    ArrayList<TypeInfo> toTypes = b.getAllStructFieldTypeInfos();
+    for (int i = 0; i < fromTypes.size(); i++) {
+      TypeInfo commonType = getCommonClass(fromTypes.get(i), toTypes.get(i));
+      if (commonType == null) {
+        return null;
+      }
+      typeInfos.add(commonType);
+    }
+
+    return TypeInfoFactory.getStructTypeInfo(names, typeInfos);
   }
 
   public static boolean implicitConvertible(PrimitiveCategory from, PrimitiveCategory to) {

http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
index ca5c459..ef6ef11 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java
@@ -245,7 +245,7 @@ public abstract class GenericUDFBaseNumeric extends GenericUDFBaseBinary {
     }    
 
     // Use type promotion
-    PrimitiveCategory commonCat = FunctionRegistry.getCommonCategory(left, right);
+    PrimitiveCategory commonCat = FunctionRegistry.getPrimitiveCommonCategory(left, right);
     if (commonCat == PrimitiveCategory.DECIMAL) {
       // Hive 0.12 behavior where double * decimal -> decimal is gone.
       return TypeInfoFactory.doubleTypeInfo;
@@ -267,7 +267,7 @@ public abstract class GenericUDFBaseNumeric extends GenericUDFBaseBinary {
     PrimitiveTypeInfo right = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(rightOI);
 
     // Now we are handling exact types. Base implementation handles type promotion.
-    PrimitiveCategory commonCat = FunctionRegistry.getCommonCategory(left, right);
+    PrimitiveCategory commonCat = FunctionRegistry.getPrimitiveCommonCategory(left, right);
     if (commonCat == PrimitiveCategory.DECIMAL) {
       return deriveResultDecimalTypeInfo();
     } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/test/queries/clientpositive/structin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/structin.q b/ql/src/test/queries/clientpositive/structin.q
index 48b31f3..35498bf 100644
--- a/ql/src/test/queries/clientpositive/structin.q
+++ b/ql/src/test/queries/clientpositive/structin.q
@@ -15,3 +15,9 @@ struct('1234-1111-0074019112','1'),
 struct('1234-1111-0074019610','1'),
 struct('1234-1111-0074022106','1')
 );
+
+explain select * from t11 where struct(`id`, `lineid`)
+IN (
+struct('1234-1111-0074578664','3'),
+struct('1234-1111-0074578695',1)
+);

http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/test/results/clientpositive/structin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out
index 81c792a..f176e79 100644
--- a/ql/src/test/results/clientpositive/structin.q.out
+++ b/ql/src/test/results/clientpositive/structin.q.out
@@ -64,3 +64,47 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain select * from t11 where struct(`id`, `lineid`)
+IN (
+struct('1234-1111-0074578664','3'),
+struct('1234-1111-0074578695',1)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from t11 where struct(`id`, `lineid`)
+IN (
+struct('1234-1111-0074578664','3'),
+struct('1234-1111-0074578695',1)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t11
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695',1)) (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: id (type: string), lineid (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+