You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2022/05/26 04:42:44 UTC

[hive] branch master updated: HIVE-26235: OR Condition on binary column is returning empty result (#3305) (Peter Vary, reviewed by Laszlo Bodor)

This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new a22ba9dafad HIVE-26235: OR Condition on binary column is returning empty result (#3305) (Peter Vary, reviewed by Laszlo Bodor)
a22ba9dafad is described below

commit a22ba9dafad4bfda0c5c0d2c63eaf83293d6fd64
Author: pvary <pv...@cloudera.com>
AuthorDate: Thu May 26 06:42:34 2022 +0200

    HIVE-26235: OR Condition on binary column is returning empty result (#3305) (Peter Vary, reviewed by Laszlo Bodor)
---
 .../hadoop/hive/ql/udf/generic/GenericUDFIn.java   | 22 ++++--
 ql/src/test/queries/clientpositive/udf_in_binary.q |  8 +++
 .../clientpositive/llap/udf_in_binary.q.out        | 79 ++++++++++++++++++++++
 3 files changed, 103 insertions(+), 6 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
index 0a2ae14502f..24852e1b728 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.udf.generic;
 
+import java.nio.ByteBuffer;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -26,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
@@ -123,9 +125,14 @@ public class GenericUDFIn extends GenericUDF {
     constantInSet = new HashSet<Object>();
     if (compareOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
       for (int i = 1; i < arguments.length; ++i) {
-        constantInSet.add(((PrimitiveObjectInspector) compareOI)
+        Object constant = ((PrimitiveObjectInspector) compareOI)
             .getPrimitiveJavaObject(conversionHelper
-                .convertIfNecessary(arguments[i].get(), argumentOIs[i])));
+                .convertIfNecessary(arguments[i].get(), argumentOIs[i]));
+        if (compareOI.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) {
+          constantInSet.add(ByteBuffer.wrap((byte[]) constant));
+        } else {
+          constantInSet.add(constant);
+        }
       }
     } else {
       for (int i = 1; i < arguments.length; ++i) {
@@ -148,9 +155,13 @@ public class GenericUDFIn extends GenericUDF {
       }
       switch (compareOI.getCategory()) {
       case PRIMITIVE: {
-        if (constantInSet.contains(((PrimitiveObjectInspector) compareOI)
-            .getPrimitiveJavaObject(conversionHelper.convertIfNecessary(arguments[0].get(),
-                argumentOIs[0])))) {
+        Object arg = ((PrimitiveObjectInspector) compareOI)
+                .getPrimitiveJavaObject(conversionHelper.convertIfNecessary(arguments[0].get(),
+                    argumentOIs[0]));
+        if (compareOI.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) {
+          arg = ByteBuffer.wrap((byte[]) arg);
+        }
+        if (constantInSet.contains(arg)) {
           bw.set(true);
           return bw;
         }
@@ -226,5 +237,4 @@ public class GenericUDFIn extends GenericUDF {
     sb.append(")");
     return sb.toString();
   }
-
 }
diff --git a/ql/src/test/queries/clientpositive/udf_in_binary.q b/ql/src/test/queries/clientpositive/udf_in_binary.q
new file mode 100644
index 00000000000..a27dcb586cb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_in_binary.q
@@ -0,0 +1,8 @@
+create table test_binary(data_col timestamp, binary_col binary) partitioned by (ts string);
+insert into test_binary partition(ts='202204200000') values ('2022-04-20 00:00:00.0', 'a'),
+('2022-04-20 00:00:00.0', 'b'),('2022-04-20 00:00:00.0', 'c'),('2022-04-20 00:00:00.0', NULL);
+select * from test_binary where ts='202204200000' and binary_col = unhex('61');
+select * from test_binary where ts='202204200000' and binary_col between unhex('61') and unhex('62');
+select * from test_binary where binary_col = unhex('61') or binary_col = unhex('62');
+select * from test_binary where ts='202204200000' and (binary_col = unhex('61') or binary_col = unhex('62'));
+select * from test_binary where binary_col in (unhex('61'), unhex('62'));
diff --git a/ql/src/test/results/clientpositive/llap/udf_in_binary.q.out b/ql/src/test/results/clientpositive/llap/udf_in_binary.q.out
new file mode 100644
index 00000000000..0cd8876219e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/udf_in_binary.q.out
@@ -0,0 +1,79 @@
+PREHOOK: query: create table test_binary(data_col timestamp, binary_col binary) partitioned by (ts string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_binary
+POSTHOOK: query: create table test_binary(data_col timestamp, binary_col binary) partitioned by (ts string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_binary
+PREHOOK: query: insert into test_binary partition(ts='202204200000') values ('2022-04-20 00:00:00.0', 'a'),
+('2022-04-20 00:00:00.0', 'b'),('2022-04-20 00:00:00.0', 'c'),('2022-04-20 00:00:00.0', NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_binary@ts=202204200000
+POSTHOOK: query: insert into test_binary partition(ts='202204200000') values ('2022-04-20 00:00:00.0', 'a'),
+('2022-04-20 00:00:00.0', 'b'),('2022-04-20 00:00:00.0', 'c'),('2022-04-20 00:00:00.0', NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_binary@ts=202204200000
+POSTHOOK: Lineage: test_binary PARTITION(ts=202204200000).binary_col SCRIPT []
+POSTHOOK: Lineage: test_binary PARTITION(ts=202204200000).data_col SCRIPT []
+PREHOOK: query: select * from test_binary where ts='202204200000' and binary_col = unhex('61')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where ts='202204200000' and binary_col = unhex('61')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00	a	202204200000
+PREHOOK: query: select * from test_binary where ts='202204200000' and binary_col between unhex('61') and unhex('62')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where ts='202204200000' and binary_col between unhex('61') and unhex('62')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00	a	202204200000
+2022-04-20 00:00:00	b	202204200000
+PREHOOK: query: select * from test_binary where binary_col = unhex('61') or binary_col = unhex('62')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where binary_col = unhex('61') or binary_col = unhex('62')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00	a	202204200000
+2022-04-20 00:00:00	b	202204200000
+PREHOOK: query: select * from test_binary where ts='202204200000' and (binary_col = unhex('61') or binary_col = unhex('62'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where ts='202204200000' and (binary_col = unhex('61') or binary_col = unhex('62'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00	a	202204200000
+2022-04-20 00:00:00	b	202204200000
+PREHOOK: query: select * from test_binary where binary_col in (unhex('61'), unhex('62'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where binary_col in (unhex('61'), unhex('62'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00	a	202204200000
+2022-04-20 00:00:00	b	202204200000