You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/07/27 23:07:38 UTC

hive git commit: HIVE-11305 : LLAP: Hybrid Map-join cache returns invalid data (Sergey Shelukhin, reviewed by Gopal V)

Repository: hive
Updated Branches:
  refs/heads/llap 1e3b59d37 -> 6bdb903e4


HIVE-11305 : LLAP: Hybrid Map-join cache returns invalid data (Sergey Shelukhin, reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6bdb903e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6bdb903e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6bdb903e

Branch: refs/heads/llap
Commit: 6bdb903e49bc8271ff52097dcf87ec8e12b8304d
Parents: 1e3b59d
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Jul 27 14:07:27 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Jul 27 14:07:27 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/MapJoinOperator.java    |  8 ++++++--
 .../apache/hadoop/hive/ql/exec/ObjectCacheFactory.java | 10 ++++++----
 .../hadoop/hive/ql/exec/tez/LlapObjectCache.java       | 13 ++++++-------
 .../vector/mapjoin/VectorMapJoinCommonOperator.java    |  2 --
 4 files changed, 18 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6bdb903e/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
index 31c5723..ed75639 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
@@ -134,7 +134,6 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
     // the task in. On MR: The cache is a no-op.
     String queryId = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVEQUERYID);
     cacheKey = "HASH_MAP_" + this.getOperatorId() + "_container";
-
     cache = ObjectCacheFactory.getCache(hconf, queryId);
     loader = getHashTableLoader(hconf);
 
@@ -196,10 +195,15 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
 
       if (!loadCalled && spilled) {
         // we can't use the cached table because it has spilled.
+
         loadHashTable(getExecContext(), MapredContext.get());
       } else {
         if (LOG.isInfoEnabled()) {
-          LOG.info("Using tables from cache: " + pair.getLeft());
+          String s = "Using tables from cache: [";
+          for (MapJoinTableContainer c : pair.getLeft()) {
+            s += ((c == null) ? "null" : c.getClass().getSimpleName()) + ", ";
+          }
+          LOG.info(s + "]");
         }
         // let's use the table from the cache.
         mapJoinTables = pair.getLeft();

http://git-wip-us.apache.org/repos/asf/hive/blob/6bdb903e/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java
index dcf16f9..22853bd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java
@@ -48,6 +48,7 @@ public class ObjectCacheFactory {
     if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
       if (LlapIoProxy.isDaemon()) { // daemon
         if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_OBJECT_CACHE_ENABLED)) {
+          // LLAP object cache, unlike others, does not use globals. Thus, get the existing one.
           return getLlapObjectCache(queryId);
         } else { // no cache
           return new ObjectCacheWrapper(
@@ -66,19 +67,20 @@ public class ObjectCacheFactory {
   private static ObjectCache getLlapObjectCache(String queryId) {
     // If order of events (i.e. dagstart and fragmentstart) was guaranteed, we could just
     // create the cache when dag starts, and blindly return it to execution here.
+    if (queryId == null) throw new RuntimeException("Query ID cannot be null");
     ObjectCache result = llapQueryCaches.get(queryId);
     if (result != null) return result;
     result = new LlapObjectCache();
     ObjectCache old = llapQueryCaches.putIfAbsent(queryId, result);
-    if (old == null && LOG.isDebugEnabled()) {
-      LOG.debug("Created object cache for " + queryId);
+    if (old == null && LOG.isInfoEnabled()) {
+      LOG.info("Created object cache for " + queryId);
     }
     return (old != null) ? old : result;
   }
 
   public static void removeLlapQueryCache(String queryId) {
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Removing object cache for " + queryId);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Removing object cache for " + queryId);
     }
     llapQueryCaches.remove(queryId);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/6bdb903e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java
index 00f3c54..a7936a4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java
@@ -42,17 +42,16 @@ public class LlapObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCac
 
   private static final Log LOG = LogFactory.getLog(LlapObjectCache.class.getName());
 
-  private static final Cache<String, Object> registry
-    = CacheBuilder.newBuilder().softValues().build();
+  private static ExecutorService staticPool = Executors.newCachedThreadPool();
 
-  private static final Map<String, ReentrantLock> locks
-    = new HashMap<String, ReentrantLock>();
+  private static final boolean isLogInfoEnabled = LOG.isInfoEnabled();
 
-  private static final ReentrantLock lock = new ReentrantLock();
+  private final Cache<String, Object> registry = CacheBuilder.newBuilder().softValues().build();
 
-  private static ExecutorService staticPool = Executors.newCachedThreadPool();
+  private final Map<String, ReentrantLock> locks = new HashMap<String, ReentrantLock>();
+
+  private final ReentrantLock lock = new ReentrantLock();
 
-  private static final boolean isLogInfoEnabled = LOG.isInfoEnabled();
 
   @Override
   public void release(String key) {

http://git-wip-us.apache.org/repos/asf/hive/blob/6bdb903e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index 63f664d..81ee41b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -530,7 +530,6 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
    */
   @Override
   protected HashTableLoader getHashTableLoader(Configuration hconf) {
-
     VectorMapJoinDesc vectorDesc = conf.getVectorDesc();
     HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType();
     HashTableLoader hashTableLoader;
@@ -621,7 +620,6 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
 
     VectorMapJoinDesc vectorDesc = conf.getVectorDesc();
     HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType();
-    HashTableLoader hashTableLoader;
     switch (vectorDesc.hashTableImplementationType()) {
     case OPTIMIZED:
       {