You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2022/05/26 04:42:44 UTC
[hive] branch master updated: HIVE-26235: OR Condition on binary column is returning empty result (#3305) (Peter Vary, reviewed by Laszlo Bodor)
This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new a22ba9dafad HIVE-26235: OR Condition on binary column is returning empty result (#3305) (Peter Vary, reviewed by Laszlo Bodor)
a22ba9dafad is described below
commit a22ba9dafad4bfda0c5c0d2c63eaf83293d6fd64
Author: pvary <pv...@cloudera.com>
AuthorDate: Thu May 26 06:42:34 2022 +0200
HIVE-26235: OR Condition on binary column is returning empty result (#3305) (Peter Vary, reviewed by Laszlo Bodor)
---
.../hadoop/hive/ql/udf/generic/GenericUDFIn.java | 22 ++++--
ql/src/test/queries/clientpositive/udf_in_binary.q | 8 +++
.../clientpositive/llap/udf_in_binary.q.out | 79 ++++++++++++++++++++++
3 files changed, 103 insertions(+), 6 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
index 0a2ae14502f..24852e1b728 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.nio.ByteBuffer;
import java.util.HashSet;
import java.util.Set;
@@ -26,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
@@ -123,9 +125,14 @@ public class GenericUDFIn extends GenericUDF {
constantInSet = new HashSet<Object>();
if (compareOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
for (int i = 1; i < arguments.length; ++i) {
- constantInSet.add(((PrimitiveObjectInspector) compareOI)
+ Object constant = ((PrimitiveObjectInspector) compareOI)
.getPrimitiveJavaObject(conversionHelper
- .convertIfNecessary(arguments[i].get(), argumentOIs[i])));
+ .convertIfNecessary(arguments[i].get(), argumentOIs[i]));
+ if (compareOI.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) {
+ constantInSet.add(ByteBuffer.wrap((byte[]) constant));
+ } else {
+ constantInSet.add(constant);
+ }
}
} else {
for (int i = 1; i < arguments.length; ++i) {
@@ -148,9 +155,13 @@ public class GenericUDFIn extends GenericUDF {
}
switch (compareOI.getCategory()) {
case PRIMITIVE: {
- if (constantInSet.contains(((PrimitiveObjectInspector) compareOI)
- .getPrimitiveJavaObject(conversionHelper.convertIfNecessary(arguments[0].get(),
- argumentOIs[0])))) {
+ Object arg = ((PrimitiveObjectInspector) compareOI)
+ .getPrimitiveJavaObject(conversionHelper.convertIfNecessary(arguments[0].get(),
+ argumentOIs[0]));
+ if (compareOI.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) {
+ arg = ByteBuffer.wrap((byte[]) arg);
+ }
+ if (constantInSet.contains(arg)) {
bw.set(true);
return bw;
}
@@ -226,5 +237,4 @@ public class GenericUDFIn extends GenericUDF {
sb.append(")");
return sb.toString();
}
-
}
diff --git a/ql/src/test/queries/clientpositive/udf_in_binary.q b/ql/src/test/queries/clientpositive/udf_in_binary.q
new file mode 100644
index 00000000000..a27dcb586cb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_in_binary.q
@@ -0,0 +1,8 @@
+create table test_binary(data_col timestamp, binary_col binary) partitioned by (ts string);
+insert into test_binary partition(ts='202204200000') values ('2022-04-20 00:00:00.0', 'a'),
+('2022-04-20 00:00:00.0', 'b'),('2022-04-20 00:00:00.0', 'c'),('2022-04-20 00:00:00.0', NULL);
+select * from test_binary where ts='202204200000' and binary_col = unhex('61');
+select * from test_binary where ts='202204200000' and binary_col between unhex('61') and unhex('62');
+select * from test_binary where binary_col = unhex('61') or binary_col = unhex('62');
+select * from test_binary where ts='202204200000' and (binary_col = unhex('61') or binary_col = unhex('62'));
+select * from test_binary where binary_col in (unhex('61'), unhex('62'));
diff --git a/ql/src/test/results/clientpositive/llap/udf_in_binary.q.out b/ql/src/test/results/clientpositive/llap/udf_in_binary.q.out
new file mode 100644
index 00000000000..0cd8876219e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/udf_in_binary.q.out
@@ -0,0 +1,79 @@
+PREHOOK: query: create table test_binary(data_col timestamp, binary_col binary) partitioned by (ts string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_binary
+POSTHOOK: query: create table test_binary(data_col timestamp, binary_col binary) partitioned by (ts string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_binary
+PREHOOK: query: insert into test_binary partition(ts='202204200000') values ('2022-04-20 00:00:00.0', 'a'),
+('2022-04-20 00:00:00.0', 'b'),('2022-04-20 00:00:00.0', 'c'),('2022-04-20 00:00:00.0', NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_binary@ts=202204200000
+POSTHOOK: query: insert into test_binary partition(ts='202204200000') values ('2022-04-20 00:00:00.0', 'a'),
+('2022-04-20 00:00:00.0', 'b'),('2022-04-20 00:00:00.0', 'c'),('2022-04-20 00:00:00.0', NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_binary@ts=202204200000
+POSTHOOK: Lineage: test_binary PARTITION(ts=202204200000).binary_col SCRIPT []
+POSTHOOK: Lineage: test_binary PARTITION(ts=202204200000).data_col SCRIPT []
+PREHOOK: query: select * from test_binary where ts='202204200000' and binary_col = unhex('61')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where ts='202204200000' and binary_col = unhex('61')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00 a 202204200000
+PREHOOK: query: select * from test_binary where ts='202204200000' and binary_col between unhex('61') and unhex('62')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where ts='202204200000' and binary_col between unhex('61') and unhex('62')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00 a 202204200000
+2022-04-20 00:00:00 b 202204200000
+PREHOOK: query: select * from test_binary where binary_col = unhex('61') or binary_col = unhex('62')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where binary_col = unhex('61') or binary_col = unhex('62')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00 a 202204200000
+2022-04-20 00:00:00 b 202204200000
+PREHOOK: query: select * from test_binary where ts='202204200000' and (binary_col = unhex('61') or binary_col = unhex('62'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where ts='202204200000' and (binary_col = unhex('61') or binary_col = unhex('62'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00 a 202204200000
+2022-04-20 00:00:00 b 202204200000
+PREHOOK: query: select * from test_binary where binary_col in (unhex('61'), unhex('62'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_binary
+PREHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_binary where binary_col in (unhex('61'), unhex('62'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_binary
+POSTHOOK: Input: default@test_binary@ts=202204200000
+#### A masked pattern was here ####
+2022-04-20 00:00:00 a 202204200000
+2022-04-20 00:00:00 b 202204200000