You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/16 09:07:04 UTC
[hive] branch master updated: HIVE-23375: Introduce TezCounter to
track MJ HashTable Load time
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 31f20c8 HIVE-23375: Introduce TezCounter to track MJ HashTable Load time
31f20c8 is described below
commit 31f20c8a29cd75bbf244ce9da5b5dfef37900280
Author: Panos Garefalakis <pg...@cloudera.com>
AuthorDate: Wed May 6 12:00:35 2020 +0100
HIVE-23375: Introduce TezCounter to track MJ HashTable Load time
Change-Id: I277696aa46b8a6343f53c36f773ee53575f513ac
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
.../java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java | 4 ++++
.../org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java | 11 +++++++++++
.../vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java | 11 +++++++++++
.../hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java | 6 +++++-
4 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
index 02cdae0..6681522 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
@@ -29,6 +29,10 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
*/
public interface HashTableLoader {
+ enum HashTableLoaderCounters {
+ HASHTABLE_LOAD_TIME_MS
+ };
+
void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf,
MapJoinOperator joinOp);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
index 151d1b3..9cb0000 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
@@ -26,7 +26,10 @@ import java.util.Map;
import org.apache.hadoop.hive.llap.LlapDaemonInfo;
import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError;
+import org.apache.tez.common.counters.TezCounter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -66,6 +69,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
private MapJoinDesc desc;
private TezContext tezContext;
private String cacheKey;
+ private TezCounter htLoadCounter;
@Override
public void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf,
@@ -74,6 +78,10 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
this.hconf = hconf;
this.desc = joinOp.getConf();
this.cacheKey = joinOp.getCacheKey();
+ String counterGroup = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVECOUNTERGROUP);
+ String vertexName = hconf.get(Operator.CONTEXT_NAME_KEY, "");
+ String counterName = Utilities.getVertexCounterName(HashTableLoaderCounters.HASHTABLE_LOAD_TIME_MS.name(), vertexName);
+ this.htLoadCounter = tezContext.getTezProcessorContext().getCounters().findCounter(counterGroup, counterName);
}
@Override
@@ -238,6 +246,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
cacheKey, tableContainer.getClass().getSimpleName(), pos);
tableContainer.setSerde(keyCtx, valCtx);
+ long startTime = System.currentTimeMillis();
while (kvReader.next()) {
tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
numEntries++;
@@ -258,6 +267,8 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
}
}
}
+ long delta = System.currentTimeMillis() - startTime;
+ htLoadCounter.increment(delta);
tableContainer.seal();
mapJoinTables[pos] = tableContainer;
if (doMemCheck) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
index 8d9c546..a011027 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
@@ -23,7 +23,10 @@ import java.util.Map;
import org.apache.hadoop.hive.llap.LlapDaemonInfo;
import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError;
+import org.apache.tez.common.counters.TezCounter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -54,6 +57,7 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
protected MapJoinDesc desc;
private TezContext tezContext;
private String cacheKey;
+ private TezCounter htLoadCounter;
@Override
public void init(ExecMapperContext context, MapredContext mrContext,
@@ -62,6 +66,10 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
this.hconf = hconf;
this.desc = joinOp.getConf();
this.cacheKey = joinOp.getCacheKey();
+ String counterGroup = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVECOUNTERGROUP);
+ String vertexName = hconf.get(Operator.CONTEXT_NAME_KEY, "");
+ String counterName = Utilities.getVertexCounterName(HashTableLoaderCounters.HASHTABLE_LOAD_TIME_MS.name(), vertexName);
+ this.htLoadCounter = tezContext.getTezProcessorContext().getCounters().findCounter(counterGroup, counterName);
}
@Override
@@ -126,6 +134,7 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
cacheKey, vectorMapJoinFastTableContainer.getClass().getSimpleName(), pos);
vectorMapJoinFastTableContainer.setSerde(null, null); // No SerDes here.
+ long startTime = System.currentTimeMillis();
while (kvReader.next()) {
vectorMapJoinFastTableContainer.putRow((BytesWritable)kvReader.getCurrentKey(),
(BytesWritable)kvReader.getCurrentValue());
@@ -147,6 +156,8 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
}
}
}
+ long delta = System.currentTimeMillis() - startTime;
+ htLoadCounter.increment(delta);
vectorMapJoinFastTableContainer.seal();
mapJoinTables[pos] = vectorMapJoinFastTableContainer;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
index 8b5ed93..102b2b5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java
@@ -67,7 +67,11 @@ public class PostExecTezSummaryPrinter implements ExecuteWithHookContext {
if (hiveCountersGroup.equals(group.getDisplayName())) {
console.printInfo(tezTask.getId() + " HIVE COUNTERS:", false);
for (TezCounter counter : group) {
- console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
+ // HIVE Counter names are picked at runtime so cannot rely on testSafeCounterNames like in LlapIOCounters
+ // Here we just filter out time counters (like HASHTABLE_LOAD_TIME_MS) that may differ across runs
+ if (!counter.getName().contains("TIME")) {
+ console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
+ }
}
} else if (group.getName().equals(HiveInputCounters.class.getName())) {
console.printInfo(tezTask.getId() + " INPUT COUNTERS:", false);