You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2022/08/05 18:52:52 UTC

[hive] branch master updated: HIVE-26447: Vectorization: wrong results when filter on repeating map key orc table (#3492)

This is an automated email from the ASF dual-hosted git repository.

sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 4b1f01f5b9 HIVE-26447: Vectorization: wrong results when filter on repeating map key orc table (#3492)
4b1f01f5b9 is described below

commit 4b1f01f5b97b0028047b63d1922db335dbaf5d8d
Author: yigress <10...@users.noreply.github.com>
AuthorDate: Fri Aug 5 11:52:45 2022 -0700

    HIVE-26447: Vectorization: wrong results when filter on repeating map key orc table (#3492)
---
 .../expressions/VectorUDFMapIndexStringScalar.java |  5 +-
 .../queries/clientpositive/orc_map_key_repeating.q | 17 +++++
 .../clientpositive/orc_map_key_repeating.q.out     | 84 ++++++++++++++++++++++
 3 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
index 0d9b5ba631..02274924b9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
@@ -66,10 +66,13 @@ public class VectorUDFMapIndexStringScalar extends VectorUDFMapIndexBaseScalar {
     byte[][] keyVector = keyColVector.vector;
     int[] keyStart = keyColVector.start;
     int[] keyLength = keyColVector.length;
+    final boolean isRepeating = keyColVector.isRepeating;
     for (int i = 0; i < count; i++) {
       final int keyOffset = offset + i;
+      final int len = isRepeating? keyLength[0]: keyLength[keyOffset];
+      byte[] rowKey = isRepeating? keyVector[0]: keyVector[keyOffset];
       if (StringExpr.equal(key, 0, key.length,
-          keyVector[keyOffset], keyStart[keyOffset], keyLength[keyOffset])) {
+          rowKey, keyStart[keyOffset], len)) {
         return offset + i;
       }
     }
diff --git a/ql/src/test/queries/clientpositive/orc_map_key_repeating.q b/ql/src/test/queries/clientpositive/orc_map_key_repeating.q
new file mode 100644
index 0000000000..ac29159406
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_map_key_repeating.q
@@ -0,0 +1,17 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.fetch.task.conversion=none;
+
+-- test single repeating key
+create temporary table foo (id int, x map<string,int>) stored as orc;
+insert into foo values(1, map('ABC', 9)), (2, map('ABC', 7)), (3, map('ABC', 8)), (4, map('ABC', 9));
+select id from foo where x['ABC']=9;
+
+-- test multiple repeating keys
+create temporary table bar (id int, x map<string,int>) stored as orc;
+insert into bar values(1, map('A', 9, 'B', 1)), (2, map('A', 7, 'B', 2)), (3, map('A', 8, 'B', 3)), (4, map('A', 9, 'B', 4));
+select id from bar where x['A']=9;
+
+-- test mixed keys
+create temporary table doo (id int, x map<string,int>) stored as orc;
+insert into doo values(1, map('ABC', 9, 'B', 1)), (2, map('AB', 7)), (3, map('A', 8, 'C', 3)), (4, map('D', 7, 'ABC', 9, 'E', 4));
+select id from doo where x['ABC']=9;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/orc_map_key_repeating.q.out b/ql/src/test/results/clientpositive/orc_map_key_repeating.q.out
new file mode 100644
index 0000000000..745f80a42d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_map_key_repeating.q.out
@@ -0,0 +1,84 @@
+PREHOOK: query: create temporary table foo (id int, x map<string,int>) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create temporary table foo (id int, x map<string,int>) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: insert into foo values(1, map('ABC', 9)), (2, map('ABC', 7)), (3, map('ABC', 8)), (4, map('ABC', 9))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@foo
+POSTHOOK: query: insert into foo values(1, map('ABC', 9)), (2, map('ABC', 7)), (3, map('ABC', 8)), (4, map('ABC', 9))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@foo
+POSTHOOK: Lineage: foo.id SCRIPT []
+POSTHOOK: Lineage: foo.x SCRIPT []
+PREHOOK: query: select id from foo where x['ABC']=9
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: select id from foo where x['ABC']=9
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+1
+4
+PREHOOK: query: create temporary table bar (id int, x map<string,int>) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bar
+POSTHOOK: query: create temporary table bar (id int, x map<string,int>) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bar
+PREHOOK: query: insert into bar values(1, map('A', 9, 'B', 1)), (2, map('A', 7, 'B', 2)), (3, map('A', 8, 'B', 3)), (4, map('A', 9, 'B', 4))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@bar
+POSTHOOK: query: insert into bar values(1, map('A', 9, 'B', 1)), (2, map('A', 7, 'B', 2)), (3, map('A', 8, 'B', 3)), (4, map('A', 9, 'B', 4))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@bar
+POSTHOOK: Lineage: bar.id SCRIPT []
+POSTHOOK: Lineage: bar.x SCRIPT []
+PREHOOK: query: select id from bar where x['A']=9
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bar
+#### A masked pattern was here ####
+POSTHOOK: query: select id from bar where x['A']=9
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bar
+#### A masked pattern was here ####
+1
+4
+PREHOOK: query: create temporary table doo (id int, x map<string,int>) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@doo
+POSTHOOK: query: create temporary table doo (id int, x map<string,int>) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@doo
+PREHOOK: query: insert into doo values(1, map('ABC', 9, 'B', 1)), (2, map('AB', 7)), (3, map('A', 8, 'C', 3)), (4, map('D', 7, 'ABC', 9, 'E', 4))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@doo
+POSTHOOK: query: insert into doo values(1, map('ABC', 9, 'B', 1)), (2, map('AB', 7)), (3, map('A', 8, 'C', 3)), (4, map('D', 7, 'ABC', 9, 'E', 4))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@doo
+POSTHOOK: Lineage: doo.id SCRIPT []
+POSTHOOK: Lineage: doo.x SCRIPT []
+PREHOOK: query: select id from doo where x['ABC']=9
+PREHOOK: type: QUERY
+PREHOOK: Input: default@doo
+#### A masked pattern was here ####
+POSTHOOK: query: select id from doo where x['ABC']=9
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@doo
+#### A masked pattern was here ####
+1
+4