You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by bi...@apache.org on 2015/08/06 22:40:50 UTC
tez git commit: TEZ-2699. Internalize strings in ATF parser (bikas)
Repository: tez
Updated Branches:
refs/heads/master 28cd991b8 -> eadbfec44
TEZ-2699. Internalize strings in ATF parser (bikas)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/eadbfec4
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/eadbfec4
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/eadbfec4
Branch: refs/heads/master
Commit: eadbfec4456b5edc318a5c94f90a2a78e172ebf4
Parents: 28cd991
Author: Bikas Saha <bi...@apache.org>
Authored: Thu Aug 6 13:40:37 2015 -0700
Committer: Bikas Saha <bi...@apache.org>
Committed: Thu Aug 6 13:40:37 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../tez/history/parser/datamodel/DagInfo.java | 7 +++---
.../parser/datamodel/TaskAttemptInfo.java | 24 +++++++++-----------
.../tez/history/parser/datamodel/TaskInfo.java | 9 +++++---
.../history/parser/datamodel/VertexInfo.java | 14 +++++++++---
tez-tools/analyzers/job-analyzer/pom.xml | 7 +++++-
6 files changed, 39 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 5d3c4f4..b37eb9e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -13,6 +13,7 @@ INCOMPATIBLE CHANGES
TEZ-2633. Allow VertexManagerPlugins to receive and report based on attempts
instead of tasks
TEZ-2650. Timing details on Vertex state changes
+ TEZ-2699. Internalize strings in ATF parser
ALL CHANGES:
TEZ-2663. SessionNotRunning exceptions are wrapped in a ServiceException from a dying AM.
http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java
index fe596f0..5ea94d6 100644
--- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java
+++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java
@@ -32,6 +32,7 @@ import org.apache.commons.collections.BidiMap;
import org.apache.commons.collections.bidimap.DualHashBidiMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.StringInterner;
import org.apache.tez.dag.api.event.VertexState;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
@@ -91,7 +92,7 @@ public class DagInfo extends BaseInfo {
Preconditions.checkArgument(jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase
(Constants.TEZ_DAG_ID));
- dagId = jsonObject.getString(Constants.ENTITY);
+ dagId = StringInterner.weakIntern(jsonObject.getString(Constants.ENTITY));
//Parse additional Info
JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
@@ -102,7 +103,7 @@ public class DagInfo extends BaseInfo {
diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
failedTasks = otherInfoNode.optInt(Constants.NUM_FAILED_TASKS);
JSONObject dagPlan = otherInfoNode.optJSONObject(Constants.DAG_PLAN);
- name = (dagPlan != null) ? (dagPlan.optString(Constants.DAG_NAME)) : null;
+ name = StringInterner.weakIntern((dagPlan != null) ? (dagPlan.optString(Constants.DAG_NAME)) : null);
if (dagPlan != null) {
JSONArray vertices = dagPlan.optJSONArray(Constants.VERTICES);
if (vertices != null) {
@@ -114,7 +115,7 @@ public class DagInfo extends BaseInfo {
} else {
numVertices = 0;
}
- status = otherInfoNode.optString(Constants.STATUS);
+ status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS));
//parse name id mapping
JSONObject vertexIDMappingJson = otherInfoNode.optJSONObject(Constants.VERTEX_NAME_ID_MAPPING);
http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java
index 8f7ec23..916df95 100644
--- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java
+++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java
@@ -21,6 +21,7 @@ package org.apache.tez.history.parser.datamodel;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
+import org.apache.hadoop.util.StringInterner;
import org.apache.tez.common.ATSConstants;
import org.apache.tez.common.counters.DAGCounter;
import org.apache.tez.common.counters.TaskCounter;
@@ -41,7 +42,7 @@ public class TaskAttemptInfo extends BaseInfo {
private final long startTime;
private final long endTime;
private final String diagnostics;
- private final String successfulAttemptId;
+
private final long scheduledTime;
private final String containerId;
private final String nodeId;
@@ -62,26 +63,27 @@ public class TaskAttemptInfo extends BaseInfo {
jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase
(Constants.TEZ_TASK_ATTEMPT_ID));
- taskAttemptId = jsonObject.optString(Constants.ENTITY);
+ taskAttemptId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY));
//Parse additional Info
final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
startTime = otherInfoNode.optLong(Constants.START_TIME);
endTime = otherInfoNode.optLong(Constants.FINISH_TIME);
diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
- successfulAttemptId = otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID);
scheduledTime = otherInfoNode.optLong(Constants.SCHEDULED_TIME);
- schedulingCausalTA = otherInfoNode.optString(Constants.SCHEDULING_CAUSAL_ATTEMPT);
+ schedulingCausalTA = StringInterner.weakIntern(
+ otherInfoNode.optString(Constants.SCHEDULING_CAUSAL_ATTEMPT));
- containerId = otherInfoNode.optString(Constants.CONTAINER_ID);
+ containerId = StringInterner.weakIntern(otherInfoNode.optString(Constants.CONTAINER_ID));
String id = otherInfoNode.optString(Constants.NODE_ID);
- nodeId = (id != null) ? (id.split(":")[0]) : "";
+ nodeId = StringInterner.weakIntern((id != null) ? (id.split(":")[0]) : "");
logUrl = otherInfoNode.optString(Constants.COMPLETED_LOGS_URL);
- status = otherInfoNode.optString(Constants.STATUS);
+ status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS));
container = new Container(containerId, nodeId);
lastDataEventTime = otherInfoNode.optLong(ATSConstants.LAST_DATA_EVENT_TIME);
- lastDataEventSourceTA = otherInfoNode.optString(ATSConstants.LAST_DATA_EVENT_SOURCE_TA);
+ lastDataEventSourceTA = StringInterner.weakIntern(
+ otherInfoNode.optString(ATSConstants.LAST_DATA_EVENT_SOURCE_TA));
}
void setTaskInfo(TaskInfo taskInfo) {
@@ -132,6 +134,7 @@ public class TaskAttemptInfo extends BaseInfo {
return schedulingCausalTA;
}
+
@Override
public final String getDiagnostics() {
return diagnostics;
@@ -186,10 +189,6 @@ public class TaskAttemptInfo extends BaseInfo {
return taskAttemptId;
}
- public final String getSuccessfulAttemptId() {
- return successfulAttemptId;
- }
-
public final String getNodeId() {
return nodeId;
}
@@ -261,7 +260,6 @@ public class TaskAttemptInfo extends BaseInfo {
sb.append("timeTaken=").append(getTimeTaken()).append(", ");
sb.append("events=").append(getEvents()).append(", ");
sb.append("diagnostics=").append(getDiagnostics()).append(", ");
- sb.append("successfulAttempId=").append(getSuccessfulAttemptId()).append(", ");
sb.append("container=").append(getContainer()).append(", ");
sb.append("nodeId=").append(getNodeId()).append(", ");
sb.append("logURL=").append(getLogURL()).append(", ");
http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java
index 9705b73..5e63efa 100644
--- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java
+++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java
@@ -27,6 +27,8 @@ import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Ordering;
+
+import org.apache.hadoop.util.StringInterner;
import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
@@ -63,16 +65,17 @@ public class TaskInfo extends BaseInfo {
jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase
(Constants.TEZ_TASK_ID));
- taskId = jsonObject.optString(Constants.ENTITY);
+ taskId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY));
//Parse additional Info
final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
startTime = otherInfoNode.optLong(Constants.START_TIME);
endTime = otherInfoNode.optLong(Constants.FINISH_TIME);
diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
- successfulAttemptId = otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID);
+ successfulAttemptId = StringInterner.weakIntern(
+ otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID));
scheduledTime = otherInfoNode.optLong(Constants.SCHEDULED_TIME);
- status = otherInfoNode.optString(Constants.STATUS);
+ status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS));
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java
index 554f94b..d2dac7d 100644
--- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java
+++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java
@@ -27,6 +27,8 @@ import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Ordering;
+
+import org.apache.hadoop.util.StringInterner;
import org.apache.tez.dag.api.oldrecords.TaskState;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
@@ -43,6 +45,7 @@ import static org.apache.hadoop.classification.InterfaceStability.Evolving;
@Evolving
public class VertexInfo extends BaseInfo {
+ private final String vertexId;
private final String vertexName;
private final long finishTime;
private final long initTime;
@@ -80,6 +83,7 @@ public class VertexInfo extends BaseInfo {
jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase
(Constants.TEZ_VERTEX_ID));
+ vertexId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY_TYPE));
taskInfoMap = Maps.newHashMap();
inEdgeList = Lists.newLinkedList();
@@ -104,9 +108,9 @@ public class VertexInfo extends BaseInfo {
killedTasks = otherInfoNode.optInt(Constants.NUM_KILLED_TASKS);
numFailedTaskAttempts =
otherInfoNode.optInt(Constants.NUM_FAILED_TASKS_ATTEMPTS);
- vertexName = otherInfoNode.optString(Constants.VERTEX_NAME);
- processorClass = otherInfoNode.optString(Constants.PROCESSOR_CLASS_NAME);
- status = otherInfoNode.optString(Constants.STATUS);
+ vertexName = StringInterner.weakIntern(otherInfoNode.optString(Constants.VERTEX_NAME));
+ processorClass = StringInterner.weakIntern(otherInfoNode.optString(Constants.PROCESSOR_CLASS_NAME));
+ status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS));
}
public static VertexInfo create(JSONObject vertexInfoObject) throws
@@ -217,6 +221,10 @@ public class VertexInfo extends BaseInfo {
public final String getVertexName() {
return vertexName;
}
+
+ public final String getVertexId() {
+ return vertexId;
+ }
//Quite possible that getFinishTime is not yet recorded for failed vertices (or killed vertices)
//Start time of vertex infers that the dependencies are done and AM has inited it.
http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-tools/analyzers/job-analyzer/pom.xml
----------------------------------------------------------------------
diff --git a/tez-tools/analyzers/job-analyzer/pom.xml b/tez-tools/analyzers/job-analyzer/pom.xml
index 21ee6a2..fe28b14 100644
--- a/tez-tools/analyzers/job-analyzer/pom.xml
+++ b/tez-tools/analyzers/job-analyzer/pom.xml
@@ -26,6 +26,10 @@
<dependencies>
<dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
<groupId>io.dropwizard.metrics</groupId>
<artifactId>metrics-core</artifactId>
</dependency>
@@ -43,5 +47,6 @@
<artifactId>apache-rat-plugin</artifactId>
</plugin>
</plugins>
- </build>
+ </build>
+
</project>