You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/16 09:07:04 UTC

[hive] branch master updated: HIVE-23375: Introduce TezCounter to track MJ HashTable Load time

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 31f20c8  HIVE-23375: Introduce TezCounter to track MJ HashTable Load time
31f20c8 is described below

commit 31f20c8a29cd75bbf244ce9da5b5dfef37900280
Author: Panos Garefalakis <pg...@cloudera.com>
AuthorDate: Wed May 6 12:00:35 2020 +0100

    HIVE-23375: Introduce TezCounter to track MJ HashTable Load time
    
    Change-Id: I277696aa46b8a6343f53c36f773ee53575f513ac
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java  |  4 ++++
 .../org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java   | 11 +++++++++++
 .../vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java | 11 +++++++++++
 .../hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java       |  6 +++++-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
index 02cdae0..6681522 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
@@ -29,6 +29,10 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
  */
 public interface HashTableLoader {
 
+  enum HashTableLoaderCounters {
+    HASHTABLE_LOAD_TIME_MS
+  };
+
   void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf,
       MapJoinOperator joinOp);
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
index 151d1b3..9cb0000 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
@@ -26,7 +26,10 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.llap.LlapDaemonInfo;
 import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError;
+import org.apache.tez.common.counters.TezCounter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -66,6 +69,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
   private MapJoinDesc desc;
   private TezContext tezContext;
   private String cacheKey;
+  private TezCounter htLoadCounter;
 
   @Override
   public void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf,
@@ -74,6 +78,10 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
     this.hconf = hconf;
     this.desc = joinOp.getConf();
     this.cacheKey = joinOp.getCacheKey();
+    String counterGroup = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVECOUNTERGROUP);
+    String vertexName = hconf.get(Operator.CONTEXT_NAME_KEY, "");
+    String counterName = Utilities.getVertexCounterName(HashTableLoaderCounters.HASHTABLE_LOAD_TIME_MS.name(), vertexName);
+    this.htLoadCounter = tezContext.getTezProcessorContext().getCounters().findCounter(counterGroup, counterName);
   }
 
   @Override
@@ -238,6 +246,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
           cacheKey, tableContainer.getClass().getSimpleName(), pos);
 
         tableContainer.setSerde(keyCtx, valCtx);
+        long startTime = System.currentTimeMillis();
         while (kvReader.next()) {
           tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
           numEntries++;
@@ -258,6 +267,8 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
             }
           }
         }
+        long delta = System.currentTimeMillis() - startTime;
+        htLoadCounter.increment(delta);
         tableContainer.seal();
         mapJoinTables[pos] = tableContainer;
         if (doMemCheck) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
index 8d9c546..a011027 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
@@ -23,7 +23,10 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.llap.LlapDaemonInfo;
 import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError;
+import org.apache.tez.common.counters.TezCounter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -54,6 +57,7 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
   protected MapJoinDesc desc;
   private TezContext tezContext;
   private String cacheKey;
+  private TezCounter htLoadCounter;
 
   @Override
   public void init(ExecMapperContext context, MapredContext mrContext,
@@ -62,6 +66,10 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
     this.hconf = hconf;
     this.desc = joinOp.getConf();
     this.cacheKey = joinOp.getCacheKey();
+    String counterGroup = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVECOUNTERGROUP);
+    String vertexName = hconf.get(Operator.CONTEXT_NAME_KEY, "");
+    String counterName = Utilities.getVertexCounterName(HashTableLoaderCounters.HASHTABLE_LOAD_TIME_MS.name(), vertexName);
+    this.htLoadCounter = tezContext.getTezProcessorContext().getCounters().findCounter(counterGroup, counterName);
   }
 
   @Override
@@ -126,6 +134,7 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
           cacheKey, vectorMapJoinFastTableContainer.getClass().getSimpleName(), pos);
 
         vectorMapJoinFastTableContainer.setSerde(null, null); // No SerDes here.
+        long startTime = System.currentTimeMillis();
         while (kvReader.next()) {
           vectorMapJoinFastTableContainer.putRow((BytesWritable)kvReader.getCurrentKey(),
               (BytesWritable)kvReader.getCurrentValue());
@@ -147,6 +156,8 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
               }
           }
         }
+        long delta = System.currentTimeMillis() - startTime;
+        htLoadCounter.increment(delta);
 
         vectorMapJoinFastTableContainer.seal();
         mapJoinTables[pos] = vectorMapJoinFastTableContainer;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
index 8b5ed93..102b2b5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
@@ -67,7 +67,11 @@ public class PostExecTezSummaryPrinter implements ExecuteWithHookContext {
           if (hiveCountersGroup.equals(group.getDisplayName())) {
             console.printInfo(tezTask.getId() + " HIVE COUNTERS:", false);
             for (TezCounter counter : group) {
-              console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
+              // HIVE Counter names are picked at runtime so cannot rely on testSafeCounterNames like in LlapIOCounters
+              // Here we just filter out time counters (like HASHTABLE_LOAD_TIME_MS) that may differ across runs
+              if (!counter.getName().contains("TIME")) {
+                console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
+              }
             }
           }  else if (group.getName().equals(HiveInputCounters.class.getName())) {
             console.printInfo(tezTask.getId() + " INPUT COUNTERS:", false);