You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2021/11/23 08:03:11 UTC

[hive] branch master updated: HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new cb23045  HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich)
cb23045 is described below

commit cb23045f92c62bc43ef5739532b486b524d99e03
Author: dengzh <de...@gmail.com>
AuthorDate: Tue Nov 23 16:02:58 2021 +0800

    HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich)
---
 .../test/resources/testconfiguration.properties    |  1 +
 .../apache/hadoop/hive/ql/exec/ObjectCache.java    |  1 -
 .../apache/hadoop/hive/ql/exec/mr/ObjectCache.java | 23 ++++--
 .../test/queries/clientpositive/offset_limit_mr.q  | 12 +++
 .../results/clientpositive/offset_limit_mr.q.out   | 88 ++++++++++++++++++++++
 5 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 638af07..6b887f5 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -239,6 +239,7 @@ mr.query.files=\
   masking_5.q,\
   nonmr_fetch.q,\
   nonreserved_keywords_input37.q,\
+  offset_limit_mr.q,\
   parenthesis_star_by.q,\
   partition_vs_table_metadata.q,\
   row__id.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
index cf04e1d..c9282b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
@@ -48,7 +48,6 @@ public interface ObjectCache {
    *
    * @param <T>
    * @param key
-   *          function to generate the object if it's not there
    * @return the last cached object with the key, null if none.
    */
   public <T> T retrieve(String key) throws HiveException;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
index 5bb96e3..0acf6d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
@@ -18,7 +18,9 @@
 
 package org.apache.hadoop.hive.ql.exec.mr;
 
+import java.util.Map;
 import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
@@ -26,36 +28,45 @@ import java.util.concurrent.TimeoutException;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 
 /**
- * ObjectCache. No-op implementation on MR we don't have a means to reuse
- * Objects between runs of the same task.
+ * ObjectCache. Simple implementation on MR we don't have a means to reuse
+ * Objects between runs of the same task, this acts as a local cache.
  *
  */
 public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache {
 
   private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class.getName());
 
+  private final Map<String, Object> cache = new ConcurrentHashMap<>();
+
   @Override
   public void release(String key) {
-    // nothing to do
     LOG.debug("{} no longer needed", key);
+    cache.remove(key);
   }
 
   @Override
   public <T> T retrieve(String key) throws HiveException {
-    return retrieve(key, null);
+    return (T) cache.get(key);
   }
 
   @Override
   public <T> T retrieve(String key, Callable<T> fn) throws HiveException {
+    T value = (T) cache.get(key);
+    if (value != null || fn == null) {
+      return value;
+    }
     try {
       LOG.debug("Creating {}", key);
-      return fn.call();
+      value = fn.call();
     } catch (Exception e) {
       throw new HiveException(e);
     }
+    T previous = (T) cache.putIfAbsent(key, value);
+    return previous != null ? previous : value;
   }
 
   @Override
@@ -94,6 +105,6 @@ public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache {
 
   @Override
   public void remove(String key) {
-    // nothing to do
+    cache.remove(key);
   }
 }
diff --git a/ql/src/test/queries/clientpositive/offset_limit_mr.q b/ql/src/test/queries/clientpositive/offset_limit_mr.q
new file mode 100644
index 0000000..caba496
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit_mr.q
@@ -0,0 +1,12 @@
+--! qt:dataset:src
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300;
+
diff --git a/ql/src/test/results/clientpositive/offset_limit_mr.q.out b/ql/src/test/results/clientpositive/offset_limit_mr.q.out
new file mode 100644
index 0000000..44f5491
--- /dev/null
+++ b/ql/src/test/results/clientpositive/offset_limit_mr.q.out
@@ -0,0 +1,88 @@
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+116	116.0
+118	236.0
+119	357.0
+12	24.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	0.0
+10	10.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+11	11.0
+111	111.0
+113	226.0
+114	114.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+10	10.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+11	11.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86	86.0
+87	87.0
+9	9.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86	86.0
+87	87.0
+9	9.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0