You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by st...@apache.org on 2018/02/23 18:25:33 UTC
hive git commit: HIVE-18663: Logged Spark Job Id contains a UUID
instead of the actual id (Sahil Takiar, reviewed by Vihang Karajgaonkar)
Repository: hive
Updated Branches:
refs/heads/master e3c4d51a1 -> 571ef51ac
HIVE-18663: Logged Spark Job Id contains a UUID instead of the actual id (Sahil Takiar, reviewed by Vihang Karajgaonkar)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/571ef51a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/571ef51a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/571ef51a
Branch: refs/heads/master
Commit: 571ef51ac22b9828f39acce1a61298ce9d76e966
Parents: e3c4d51
Author: Sahil Takiar <ta...@gmail.com>
Authored: Fri Feb 23 10:25:11 2018 -0800
Committer: Sahil Takiar <st...@cloudera.com>
Committed: Fri Feb 23 10:25:11 2018 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/spark/SparkTask.java | 14 ++++++++------
.../exec/spark/status/impl/RemoteSparkJobStatus.java | 2 +-
2 files changed, 9 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/571ef51a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
index 62daaaa..76f6ecc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
@@ -77,7 +77,7 @@ public class SparkTask extends Task<SparkWork> {
private static final LogHelper console = new LogHelper(LOG);
private PerfLogger perfLogger;
private static final long serialVersionUID = 1L;
- private transient String sparkJobID;
+ private transient int sparkJobID;
private transient SparkStatistics sparkStatistics;
private transient long submitTime;
private transient long startTime;
@@ -123,18 +123,19 @@ public class SparkTask extends Task<SparkWork> {
}
addToHistory(jobRef);
- sparkJobID = jobRef.getJobId();
this.jobID = jobRef.getSparkJobStatus().getAppID();
rc = jobRef.monitorJob();
SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus();
+ sparkJobID = sparkJobStatus.getJobId();
getSparkJobInfo(sparkJobStatus, rc);
if (rc == 0) {
sparkStatistics = sparkJobStatus.getSparkStatistics();
if (LOG.isInfoEnabled() && sparkStatistics != null) {
- LOG.info(String.format("=====Spark Job[%s] statistics=====", jobRef.getJobId()));
+ LOG.info(String.format("=====Spark Job[%s] statistics=====", sparkJobID));
logSparkStatistic(sparkStatistics);
}
- LOG.info("Execution completed successfully");
+ LOG.info("Successfully completed Spark Job " + sparkJobID + " with application ID " +
+ jobID + " and task ID " + getId());
} else if (rc == 2) { // Cancel job if the monitor found job submission timeout.
// TODO: If the timeout is because of lack of resources in the cluster, we should
// ideally also cancel the app request here. But w/o facilities from Spark or YARN,
@@ -192,7 +193,8 @@ public class SparkTask extends Task<SparkWork> {
console.printInfo("Starting Spark Job = " + jobRef.getJobId());
if (SessionState.get() != null) {
SessionState.get().getHiveHistory()
- .setQueryProperty(queryState.getQueryId(), Keys.SPARK_JOB_ID, jobRef.getJobId());
+ .setQueryProperty(queryState.getQueryId(), Keys.SPARK_JOB_ID,
+ Integer.toString(jobRef.getSparkJobStatus().getJobId()));
}
}
@@ -277,7 +279,7 @@ public class SparkTask extends Task<SparkWork> {
return ((ReduceWork) children.get(0)).getReducer();
}
- public String getSparkJobID() {
+ public int getSparkJobID() {
return sparkJobID;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/571ef51a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/RemoteSparkJobStatus.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/RemoteSparkJobStatus.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/RemoteSparkJobStatus.java
index e950452..ec7ca40 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/RemoteSparkJobStatus.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/RemoteSparkJobStatus.java
@@ -129,7 +129,7 @@ public class RemoteSparkJobStatus implements SparkJobStatus {
// add Hive operator level statistics.
sparkStatisticsBuilder.add(getCounter());
// add spark job metrics.
- String jobIdentifier = "Spark Job[" + jobHandle.getClientJobId() + "] Metrics";
+ String jobIdentifier = "Spark Job[" + getJobId() + "] Metrics";
Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(
metricsCollection.getAllMetrics());