You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ha...@apache.org on 2018/10/16 20:47:01 UTC
hadoop git commit: MAPREDUCE-7150. Optimize collections used by MR
JHS to reduce its memory. (Contributed by Misha Dmitriev)
Repository: hadoop
Updated Branches:
refs/heads/trunk c2288ac45 -> babd1449b
MAPREDUCE-7150. Optimize collections used by MR JHS to reduce its memory. (Contributed by Misha Dmitriev)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/babd1449
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/babd1449
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/babd1449
Branch: refs/heads/trunk
Commit: babd1449bf8898f44c434c852e67240721c0eb00
Parents: c2288ac
Author: Haibo Chen <ha...@apache.org>
Authored: Tue Oct 16 13:44:41 2018 -0700
Committer: Haibo Chen <ha...@apache.org>
Committed: Tue Oct 16 13:44:41 2018 -0700
----------------------------------------------------------------------
.../counters/FileSystemCounterGroup.java | 56 ++++++++++++--------
.../mapreduce/jobhistory/JobHistoryParser.java | 2 +-
.../hadoop/mapreduce/v2/hs/CompletedTask.java | 5 +-
.../mapreduce/v2/hs/CompletedTaskAttempt.java | 2 +-
4 files changed, 38 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/babd1449/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/counters/FileSystemCounterGroup.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/counters/FileSystemCounterGroup.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/counters/FileSystemCounterGroup.java
index 046368e..ed7f271 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/counters/FileSystemCounterGroup.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/counters/FileSystemCounterGroup.java
@@ -61,8 +61,9 @@ public abstract class FileSystemCounterGroup<C extends Counter>
// C[] would need Array.newInstance which requires a Class<C> reference.
// Just a few local casts probably worth not having to carry it around.
- private final Map<String, Object[]> map =
- new ConcurrentSkipListMap<String, Object[]>();
+ // Initialized lazily, since in some situations millions of empty maps can
+ // waste a substantial (e.g. 4% as we observed) portion of the heap
+ private Map<String, Object[]> map;
private String displayName;
private static final Joiner NAME_JOINER = Joiner.on('_');
@@ -214,6 +215,9 @@ public abstract class FileSystemCounterGroup<C extends Counter>
@SuppressWarnings("unchecked")
public synchronized C findCounter(String scheme, FileSystemCounter key) {
final String canonicalScheme = checkScheme(scheme);
+ if (map == null) {
+ map = new ConcurrentSkipListMap<>();
+ }
Object[] counters = map.get(canonicalScheme);
int ord = key.ordinal();
if (counters == null) {
@@ -247,10 +251,12 @@ public abstract class FileSystemCounterGroup<C extends Counter>
protected abstract C newCounter(String scheme, FileSystemCounter key);
@Override
- public int size() {
+ public synchronized int size() {
int n = 0;
- for (Object[] counters : map.values()) {
- n += numSetCounters(counters);
+ if (map != null) {
+ for (Object[] counters : map.values()) {
+ n += numSetCounters(counters);
+ }
}
return n;
}
@@ -271,19 +277,23 @@ public abstract class FileSystemCounterGroup<C extends Counter>
* FileSystemGroup ::= #scheme (scheme #counter (key value)*)*
*/
@Override
- public void write(DataOutput out) throws IOException {
- WritableUtils.writeVInt(out, map.size()); // #scheme
- for (Map.Entry<String, Object[]> entry : map.entrySet()) {
- WritableUtils.writeString(out, entry.getKey()); // scheme
- // #counter for the above scheme
- WritableUtils.writeVInt(out, numSetCounters(entry.getValue()));
- for (Object counter : entry.getValue()) {
- if (counter == null) continue;
- @SuppressWarnings("unchecked")
- FSCounter c = (FSCounter) ((Counter)counter).getUnderlyingCounter();
- WritableUtils.writeVInt(out, c.key.ordinal()); // key
- WritableUtils.writeVLong(out, c.getValue()); // value
+ public synchronized void write(DataOutput out) throws IOException {
+ if (map != null) {
+ WritableUtils.writeVInt(out, map.size()); // #scheme
+ for (Map.Entry<String, Object[]> entry : map.entrySet()) {
+ WritableUtils.writeString(out, entry.getKey()); // scheme
+ // #counter for the above scheme
+ WritableUtils.writeVInt(out, numSetCounters(entry.getValue()));
+ for (Object counter : entry.getValue()) {
+ if (counter == null) continue;
+ @SuppressWarnings("unchecked")
+ FSCounter c = (FSCounter) ((Counter) counter).getUnderlyingCounter();
+ WritableUtils.writeVInt(out, c.key.ordinal()); // key
+ WritableUtils.writeVLong(out, c.getValue()); // value
+ }
}
+ } else {
+ WritableUtils.writeVInt(out, 0);
}
}
@@ -310,8 +320,8 @@ public abstract class FileSystemCounterGroup<C extends Counter>
@Override
public Iterator<C> iterator() {
return new AbstractIterator<C>() {
- Iterator<Object[]> it = map.values().iterator();
- Object[] counters = it.hasNext() ? it.next() : null;
+ Iterator<Object[]> it = map != null ? map.values().iterator() : null;
+ Object[] counters = (it != null && it.hasNext()) ? it.next() : null;
int i = 0;
@Override
protected C computeNext() {
@@ -322,7 +332,7 @@ public abstract class FileSystemCounterGroup<C extends Counter>
if (counter != null) return counter;
}
i = 0;
- counters = it.hasNext() ? it.next() : null;
+ counters = (it != null && it.hasNext()) ? it.next() : null;
}
return endOfData();
}
@@ -343,8 +353,10 @@ public abstract class FileSystemCounterGroup<C extends Counter>
public synchronized int hashCode() {
// need to be deep as counters is an array
int hash = FileSystemCounter.class.hashCode();
- for (Object[] counters : map.values()) {
- if (counters != null) hash ^= Arrays.hashCode(counters);
+ if (map != null) {
+ for (Object[] counters : map.values()) {
+ if (counters != null) hash ^= Arrays.hashCode(counters);
+ }
}
return hash;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/babd1449/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java
index 6efb4f7..57c58ba 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java
@@ -600,7 +600,7 @@ public class JobHistoryParser implements HistoryEventHandler {
public TaskInfo() {
startTime = finishTime = -1;
error = splitLocations = "";
- attemptsMap = new HashMap<TaskAttemptID, TaskAttemptInfo>();
+ attemptsMap = new HashMap<TaskAttemptID, TaskAttemptInfo>(2);
}
public void printAll() {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/babd1449/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTask.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTask.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTask.java
index 81fddaf..63b3600 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTask.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTask.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.mapreduce.v2.hs;
import java.util.ArrayList;
import java.util.LinkedHashMap;
-import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -48,11 +47,11 @@ public class CompletedTask implements Task {
private final TaskInfo taskInfo;
private TaskReport report;
private TaskAttemptId successfulAttempt;
- private List<String> reportDiagnostics = new LinkedList<String>();
+ private List<String> reportDiagnostics = new ArrayList<String>(2);
private Lock taskAttemptsLock = new ReentrantLock();
private AtomicBoolean taskAttemptsLoaded = new AtomicBoolean(false);
private final Map<TaskAttemptId, TaskAttempt> attempts =
- new LinkedHashMap<TaskAttemptId, TaskAttempt>();
+ new LinkedHashMap<TaskAttemptId, TaskAttempt>(2);
CompletedTask(TaskId taskId, TaskInfo taskInfo) {
//TODO JobHistoryParser.handleTaskFailedAttempt should use state from the event.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/babd1449/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java
index c87d82b..17ec017 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java
@@ -39,7 +39,7 @@ public class CompletedTaskAttempt implements TaskAttempt {
private final TaskAttemptInfo attemptInfo;
private final TaskAttemptId attemptId;
private final TaskAttemptState state;
- private final List<String> diagnostics = new ArrayList<String>();
+ private final List<String> diagnostics = new ArrayList<String>(2);
private TaskAttemptReport report;
private String localDiagMessage;
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org