You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2021/11/23 08:03:11 UTC
[hive] branch master updated: HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository.
kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new cb23045 HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich)
cb23045 is described below
commit cb23045f92c62bc43ef5739532b486b524d99e03
Author: dengzh <de...@gmail.com>
AuthorDate: Tue Nov 23 16:02:58 2021 +0800
HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich)
---
.../test/resources/testconfiguration.properties | 1 +
.../apache/hadoop/hive/ql/exec/ObjectCache.java | 1 -
.../apache/hadoop/hive/ql/exec/mr/ObjectCache.java | 23 ++++--
.../test/queries/clientpositive/offset_limit_mr.q | 12 +++
.../results/clientpositive/offset_limit_mr.q.out | 88 ++++++++++++++++++++++
5 files changed, 118 insertions(+), 7 deletions(-)
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 638af07..6b887f5 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -239,6 +239,7 @@ mr.query.files=\
masking_5.q,\
nonmr_fetch.q,\
nonreserved_keywords_input37.q,\
+ offset_limit_mr.q,\
parenthesis_star_by.q,\
partition_vs_table_metadata.q,\
row__id.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
index cf04e1d..c9282b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
@@ -48,7 +48,6 @@ public interface ObjectCache {
*
* @param <T>
* @param key
- * function to generate the object if it's not there
* @return the last cached object with the key, null if none.
*/
public <T> T retrieve(String key) throws HiveException;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
index 5bb96e3..0acf6d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
@@ -18,7 +18,9 @@
package org.apache.hadoop.hive.ql.exec.mr;
+import java.util.Map;
import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
@@ -26,36 +28,45 @@ import java.util.concurrent.TimeoutException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+
import org.apache.hadoop.hive.ql.metadata.HiveException;
/**
- * ObjectCache. No-op implementation on MR we don't have a means to reuse
- * Objects between runs of the same task.
+ * ObjectCache. Simple implementation on MR we don't have a means to reuse
+ * Objects between runs of the same task, this acts as a local cache.
*
*/
public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache {
private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class.getName());
+ private final Map<String, Object> cache = new ConcurrentHashMap<>();
+
@Override
public void release(String key) {
- // nothing to do
LOG.debug("{} no longer needed", key);
+ cache.remove(key);
}
@Override
public <T> T retrieve(String key) throws HiveException {
- return retrieve(key, null);
+ return (T) cache.get(key);
}
@Override
public <T> T retrieve(String key, Callable<T> fn) throws HiveException {
+ T value = (T) cache.get(key);
+ if (value != null || fn == null) {
+ return value;
+ }
try {
LOG.debug("Creating {}", key);
- return fn.call();
+ value = fn.call();
} catch (Exception e) {
throw new HiveException(e);
}
+ T previous = (T) cache.putIfAbsent(key, value);
+ return previous != null ? previous : value;
}
@Override
@@ -94,6 +105,6 @@ public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache {
@Override
public void remove(String key) {
- // nothing to do
+ cache.remove(key);
}
}
diff --git a/ql/src/test/queries/clientpositive/offset_limit_mr.q b/ql/src/test/queries/clientpositive/offset_limit_mr.q
new file mode 100644
index 0000000..caba496
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit_mr.q
@@ -0,0 +1,12 @@
+--! qt:dataset:src
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300;
+
diff --git a/ql/src/test/results/clientpositive/offset_limit_mr.q.out b/ql/src/test/results/clientpositive/offset_limit_mr.q.out
new file mode 100644
index 0000000..44f5491
--- /dev/null
+++ b/ql/src/test/results/clientpositive/offset_limit_mr.q.out
@@ -0,0 +1,88 @@
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+116 116.0
+118 236.0
+119 357.0
+12 24.0
+120 240.0
+125 250.0
+126 126.0
+128 384.0
+129 258.0
+131 131.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+116 116.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0