You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by yl...@apache.org on 2015/09/29 10:32:19 UTC
hadoop git commit: HDFS-8859. Improve DataNode ReplicaMap memory
footprint to save about 45%. (yliu)
Repository: hadoop
Updated Branches:
refs/heads/branch-2 1a5f3e93c -> 9c47ab32b
HDFS-8859. Improve DataNode ReplicaMap memory footprint to save about 45%. (yliu)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9c47ab32
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9c47ab32
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9c47ab32
Branch: refs/heads/branch-2
Commit: 9c47ab32b14fa9e589d54ad739b3721bb8d444f9
Parents: 1a5f3e9
Author: yliu <yl...@apache.org>
Authored: Tue Sep 29 16:23:50 2015 +0800
Committer: yliu <yl...@apache.org>
Committed: Tue Sep 29 16:23:50 2015 +0800
----------------------------------------------------------------------
.../main/java/org/apache/hadoop/util/GSet.java | 14 ++
.../org/apache/hadoop/util/GSetByHashMap.java | 6 +
.../org/apache/hadoop/util/LightWeightGSet.java | 82 ++++--
.../hadoop/util/LightWeightResizableGSet.java | 129 ++++++++++
.../java/org/apache/hadoop/util/TestGSet.java | 69 ++++-
.../hadoop/util/TestLightWeightCache.java | 6 +
.../util/TestLightWeightResizableGSet.java | 252 +++++++++++++++++++
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +
.../hdfs/server/datanode/ReplicaInfo.java | 27 +-
.../datanode/fsdataset/impl/BlockPoolSlice.java | 7 +-
.../datanode/fsdataset/impl/ReplicaMap.java | 38 +--
11 files changed, 569 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
index 26e73cf..e4a8d0f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.util;
+import java.util.Collection;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -86,5 +88,17 @@ public interface GSet<K, E extends K> extends Iterable<E> {
*/
E remove(K key);
+ /**
+ * Clear the set.
+ */
void clear();
+
+ /**
+ * Returns a {@link Collection} view of the values contained in this set.
+ * The collection is backed by the set, so changes to the set are
+ * reflected in the collection, and vice-versa.
+ *
+ * @return the collection of values.
+ */
+ Collection<E> values();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
index 87488db..e341c74 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.util;
+import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
@@ -70,4 +71,9 @@ public class GSetByHashMap<K, E extends K> implements GSet<K, E> {
public void clear() {
m.clear();
}
+
+ @Override
+ public Collection<E> values() {
+ return m.values();
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
index 1767d85..7c7878a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
@@ -18,12 +18,14 @@
package org.apache.hadoop.util;
import java.io.PrintStream;
+import java.util.AbstractCollection;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.util.StringUtils;
import com.google.common.annotations.VisibleForTesting;
@@ -49,12 +51,12 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
/**
* Elements of {@link LightWeightGSet}.
*/
- public static interface LinkedElement {
+ public interface LinkedElement {
/** Set the next element. */
- public void setNext(LinkedElement next);
+ void setNext(LinkedElement next);
/** Get the next element. */
- public LinkedElement getNext();
+ LinkedElement getNext();
}
static final int MAX_ARRAY_LENGTH = 1 << 30; //prevent int overflow problem
@@ -64,15 +66,20 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
* An internal array of entries, which are the rows of the hash table.
* The size must be a power of two.
*/
- private final LinkedElement[] entries;
+ protected LinkedElement[] entries;
/** A mask for computing the array index from the hash value of an element. */
- private final int hash_mask;
+ protected int hash_mask;
/** The size of the set (not the entry array). */
- private int size = 0;
+ protected int size = 0;
/** Modification version for fail-fast.
* @see ConcurrentModificationException
*/
- private int modification = 0;
+ protected int modification = 0;
+
+ private Collection<E> values;
+
+ protected LightWeightGSet() {
+ }
/**
* @param recommended_length Recommended size of the internal array.
@@ -87,7 +94,7 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
}
//compute actual length
- private static int actualArrayLength(int recommended) {
+ protected static int actualArrayLength(int recommended) {
if (recommended > MAX_ARRAY_LENGTH) {
return MAX_ARRAY_LENGTH;
} else if (recommended < MIN_ARRAY_LENGTH) {
@@ -103,11 +110,11 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
return size;
}
- private int getIndex(final K key) {
+ protected int getIndex(final K key) {
return key.hashCode() & hash_mask;
}
- private E convert(final LinkedElement e){
+ protected E convert(final LinkedElement e){
@SuppressWarnings("unchecked")
final E r = (E)e;
return r;
@@ -138,24 +145,26 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
@Override
public E put(final E element) {
- //validate element
+ // validate element
if (element == null) {
throw new NullPointerException("Null element is not supported.");
}
- if (!(element instanceof LinkedElement)) {
+ LinkedElement e = null;
+ try {
+ e = (LinkedElement)element;
+ } catch (ClassCastException ex) {
throw new HadoopIllegalArgumentException(
"!(element instanceof LinkedElement), element.getClass()="
+ element.getClass());
}
- final LinkedElement e = (LinkedElement)element;
- //find index
+ // find index
final int index = getIndex(element);
- //remove if it already exists
+ // remove if it already exists
final E existing = remove(index, element);
- //insert the element to the head of the linked list
+ // insert the element to the head of the linked list
modification++;
size++;
e.setNext(entries[index]);
@@ -171,7 +180,7 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
* @return If such element exists, return it.
* Otherwise, return null.
*/
- private E remove(final int index, final K key) {
+ protected E remove(final int index, final K key) {
if (entries[index] == null) {
return null;
} else if (entries[index].equals(key)) {
@@ -214,6 +223,38 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
}
@Override
+ public Collection<E> values() {
+ if (values == null) {
+ values = new Values();
+ }
+ return values;
+ }
+
+ private final class Values extends AbstractCollection<E> {
+
+ @Override
+ public Iterator<E> iterator() {
+ return LightWeightGSet.this.iterator();
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public boolean contains(Object o) {
+ return LightWeightGSet.this.contains((K)o);
+ }
+
+ @Override
+ public void clear() {
+ LightWeightGSet.this.clear();
+ }
+ }
+
+ @Override
public Iterator<E> iterator() {
return new SetIterator();
}
@@ -363,9 +404,8 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
}
public void clear() {
- for (int i = 0; i < entries.length; i++) {
- entries[i] = null;
- }
+ modification++;
+ Arrays.fill(entries, null);
size = 0;
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java
new file mode 100644
index 0000000..0abcf98
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * A low memory footprint {@link GSet} implementation,
+ * which uses an array for storing the elements
+ * and linked lists for collision resolution.
+ *
+ * If the size of elements exceeds the threshold,
+ * the internal array will be resized to double length.
+ *
+ * This class does not support null element.
+ *
+ * This class is not thread safe.
+ *
+ * @param <K> Key type for looking up the elements
+ * @param <E> Element type, which must be
+ * (1) a subclass of K, and
+ * (2) implementing {@link LinkedElement} interface.
+ */
+@InterfaceAudience.Private
+public class LightWeightResizableGSet<K, E extends K>
+ extends LightWeightGSet<K, E> {
+
+ /**
+ * The default initial capacity - MUST be a power of two.
+ */
+ static final int DEFAULT_INITIAL_CAPACITY = 1 << 4;
+
+ /**
+ * The load factor used when none specified in constructor.
+ */
+ static final float DEFAULT_LOAD_FACTOR = 0.75f;
+
+ /** Size of the entry table. */
+ private int capacity;
+
+ /**
+ * The load factor for the hash set.
+ */
+ private final float loadFactor;
+ private int threshold;
+
+ public LightWeightResizableGSet(int initCapacity, float loadFactor) {
+ if (initCapacity < 0) {
+ throw new HadoopIllegalArgumentException("Illegal initial capacity: " +
+ initCapacity);
+ }
+ if (loadFactor <= 0 || loadFactor > 1.0f) {
+ throw new HadoopIllegalArgumentException("Illegal load factor: " +
+ loadFactor);
+ }
+ this.capacity = actualArrayLength(initCapacity);
+ this.hash_mask = capacity - 1;
+ this.loadFactor = loadFactor;
+ this.threshold = (int) (capacity * loadFactor);
+
+ entries = new LinkedElement[capacity];
+ }
+
+ public LightWeightResizableGSet() {
+ this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
+ }
+
+ public LightWeightResizableGSet(int initCapacity) {
+ this(initCapacity, DEFAULT_LOAD_FACTOR);
+ }
+
+ @Override
+ public E put(final E element) {
+ E existing = super.put(element);
+ expandIfNecessary();
+ return existing;
+ }
+
+ /**
+ * Resize the internal table to given capacity.
+ */
+ @SuppressWarnings("unchecked")
+ protected void resize(int cap) {
+ int newCapacity = actualArrayLength(cap);
+ if (newCapacity == this.capacity) {
+ return;
+ }
+ this.capacity = newCapacity;
+ this.threshold = (int) (capacity * loadFactor);
+ this.hash_mask = capacity - 1;
+ LinkedElement[] oldEntries = entries;
+ entries = new LinkedElement[capacity];
+ for (int i = 0; i < oldEntries.length; i++) {
+ LinkedElement e = oldEntries[i];
+ while (e != null) {
+ LinkedElement next = e.getNext();
+ int index = getIndex((E)e);
+ e.setNext(entries[index]);
+ entries[index] = e;
+ e = next;
+ }
+ }
+ }
+
+ /**
+ * Checks if we need to expand, and expands if necessary.
+ */
+ protected void expandIfNecessary() {
+ if (size > this.threshold && capacity < MAX_ARRAY_LENGTH) {
+ resize(capacity * 2);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
index af880ee..2d39f3d 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.util;
+import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Random;
@@ -41,10 +42,15 @@ public class TestGSet {
@Test
public void testExceptionCases() {
+ testExceptionCases(false);
+ testExceptionCases(true);
+ }
+
+ private void testExceptionCases(boolean resizable) {
{
//test contains
final LightWeightGSet<Integer, Integer> gset
- = new LightWeightGSet<Integer, Integer>(16);
+ = createGSet(16, resizable);
try {
//test contains with a null element
gset.contains(null);
@@ -57,7 +63,7 @@ public class TestGSet {
{
//test get
final LightWeightGSet<Integer, Integer> gset
- = new LightWeightGSet<Integer, Integer>(16);
+ = createGSet(16, resizable);
try {
//test get with a null element
gset.get(null);
@@ -70,7 +76,7 @@ public class TestGSet {
{
//test put
final LightWeightGSet<Integer, Integer> gset
- = new LightWeightGSet<Integer, Integer>(16);
+ = createGSet(16, resizable);
try {
//test put with a null element
gset.put(null);
@@ -97,7 +103,7 @@ public class TestGSet {
for(int v = 1; v < data.length-1; v++) {
{
//test remove while iterating
- final GSet<IntElement, IntElement> gset = createGSet(data);
+ final GSet<IntElement, IntElement> gset = createGSet(data, resizable);
for(IntElement i : gset) {
if (i.value == v) {
//okay because data[0] is not in gset
@@ -120,7 +126,7 @@ public class TestGSet {
{
//test put new element while iterating
- final GSet<IntElement, IntElement> gset = createGSet(data);
+ final GSet<IntElement, IntElement> gset = createGSet(data, resizable);
try {
for(IntElement i : gset) {
if (i.value == v) {
@@ -135,7 +141,7 @@ public class TestGSet {
{
//test put existing element while iterating
- final GSet<IntElement, IntElement> gset = createGSet(data);
+ final GSet<IntElement, IntElement> gset = createGSet(data, resizable);
try {
for(IntElement i : gset) {
if (i.value == v) {
@@ -151,9 +157,17 @@ public class TestGSet {
}
}
- private static GSet<IntElement, IntElement> createGSet(final IntElement[] data) {
+ private static LightWeightGSet<Integer, Integer> createGSet(
+ int size, boolean resizable) {
+ return resizable ? new LightWeightResizableGSet<Integer, Integer>(size) :
+ new LightWeightGSet<Integer, Integer>(size);
+ }
+
+ private static GSet<IntElement, IntElement> createGSet(
+ final IntElement[] data, boolean resizable) {
final GSet<IntElement, IntElement> gset
- = new LightWeightGSet<IntElement, IntElement>(8);
+ = resizable ? new LightWeightResizableGSet<IntElement, IntElement>(8) :
+ new LightWeightGSet<IntElement, IntElement>(8);
for(int i = 1; i < data.length; i++) {
gset.put(data[i]);
}
@@ -168,6 +182,14 @@ public class TestGSet {
check(new GSetTestCase(255, 1 << 10, 65537));
}
+ @Test
+ public void testResizableGSet() {
+ //The parameters are: table length, data size, modulus, resizable.
+ check(new GSetTestCase(1, 1 << 4, 65537, true));
+ check(new GSetTestCase(17, 1 << 16, 17, true));
+ check(new GSetTestCase(255, 1 << 10, 65537, true));
+ }
+
/**
* A long running test with various data sets and parameters.
* It may take ~5 hours,
@@ -177,14 +199,25 @@ public class TestGSet {
//@Test
public void runMultipleTestGSet() {
for(int offset = -2; offset <= 2; offset++) {
- runTestGSet(1, offset);
+ runTestGSet(1, offset, false);
+ for(int i = 1; i < Integer.SIZE - 1; i++) {
+ runTestGSet((1 << i) + 1, offset, false);
+ }
+ }
+ }
+
+ //@Test
+ public void runMultipleTestResizableGSet() {
+ for(int offset = -2; offset <= 2; offset++) {
+ runTestGSet(1, offset, true);
for(int i = 1; i < Integer.SIZE - 1; i++) {
- runTestGSet((1 << i) + 1, offset);
+ runTestGSet((1 << i) + 1, offset, true);
}
}
}
- private static void runTestGSet(final int modulus, final int offset) {
+ private static void runTestGSet(final int modulus, final int offset,
+ boolean resizable) {
println("\n\nmodulus=" + modulus + ", offset=" + offset);
for(int i = 0; i <= 16; i += 4) {
final int tablelength = (1 << i) + offset;
@@ -194,7 +227,7 @@ public class TestGSet {
for(int j = 0; j <= upper; j += steps) {
final int datasize = 1 << j;
- check(new GSetTestCase(tablelength, datasize, modulus));
+ check(new GSetTestCase(tablelength, datasize, modulus, resizable));
}
}
}
@@ -265,6 +298,10 @@ public class TestGSet {
int contain_count = 0;
GSetTestCase(int tablelength, int datasize, int modulus) {
+ this(tablelength, datasize, modulus, false);
+ }
+
+ GSetTestCase(int tablelength, int datasize, int modulus, boolean resizable) {
denominator = Math.min((datasize >> 7) + 1, 1 << 16);
info = getClass().getSimpleName()
+ ": tablelength=" + tablelength
@@ -274,7 +311,8 @@ public class TestGSet {
println(info);
data = new IntData(datasize, modulus);
- gset = new LightWeightGSet<IntElement, IntElement>(tablelength);
+ gset = resizable ? new LightWeightResizableGSet<IntElement, IntElement>() :
+ new LightWeightGSet<IntElement, IntElement>(tablelength);
Assert.assertEquals(0, gset.size());
}
@@ -392,6 +430,11 @@ public class TestGSet {
gset.clear();
Assert.assertEquals(0, size());
}
+
+ @Override
+ public Collection<IntElement> values() {
+ throw new UnsupportedOperationException();
+ }
}
/** Test data set */
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java
index 68d484f..dff6937 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.util;
+import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.Random;
@@ -379,6 +380,11 @@ public class TestLightWeightCache {
cache.clear();
Assert.assertEquals(0, size());
}
+
+ @Override
+ public Collection<IntEntry> values() {
+ throw new UnsupportedOperationException();
+ }
}
private static class IntData {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightResizableGSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightResizableGSet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightResizableGSet.java
new file mode 100644
index 0000000..3250092
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightResizableGSet.java
@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/** Testing {@link LightWeightResizableGSet} */
+public class TestLightWeightResizableGSet {
+ public static final Log LOG = LogFactory.getLog(TestLightWeightResizableGSet.class);
+ private Random random = new Random();
+
+ private TestElement[] generateElements(int length) {
+ TestElement[] elements = new TestElement[length];
+ Set<Long> keys = new HashSet<>();
+ long k = 0;
+ for (int i = 0; i < length; i++) {
+ while (keys.contains(k = random.nextLong()));
+ elements[i] = new TestElement(k, random.nextLong());
+ keys.add(k);
+ }
+ return elements;
+ }
+
+ private TestKey[] getKeys(TestElement[] elements) {
+ TestKey[] keys = new TestKey[elements.length];
+ for (int i = 0; i < elements.length; i++) {
+ keys[i] = new TestKey(elements[i].getKey());
+ }
+ return keys;
+ }
+
+ private TestElement[] generateElements(TestKey[] keys) {
+ TestElement[] elements = new TestElement[keys.length];
+ for (int i = 0; i < keys.length; i++) {
+ elements[i] = new TestElement(keys[i], random.nextLong());
+ }
+ return elements;
+ }
+
+ private static class TestKey {
+ private final long key;
+
+ TestKey(long key) {
+ this.key = key;
+ }
+
+ TestKey(TestKey other) {
+ this.key = other.key;
+ }
+
+ long getKey() {
+ return key;
+ }
+
+ @Override
+ public int hashCode() {
+ return (int)(key^(key>>>32));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof TestKey)) {
+ return false;
+ }
+ TestKey other = (TestKey)o;
+ return key == other.key;
+ }
+ }
+
+ private static class TestElement extends TestKey
+ implements LightWeightResizableGSet.LinkedElement {
+ private final long data;
+ private LightWeightResizableGSet.LinkedElement next;
+
+ TestElement(long key, long data) {
+ super(key);
+ this.data = data;
+ }
+
+ TestElement(TestKey key, long data) {
+ super(key);
+ this.data = data;
+ }
+
+ long getData() {
+ return data;
+ }
+
+ @Override
+ public void setNext(LightWeightResizableGSet.LinkedElement next) {
+ this.next = next;
+ }
+
+ @Override
+ public LightWeightResizableGSet.LinkedElement getNext() {
+ return next;
+ }
+ }
+
+ @Test(timeout = 60000)
+ public void testBasicOperations() {
+ TestElement[] elements = generateElements(1 << 16);
+ final LightWeightResizableGSet<TestKey, TestElement> set =
+ new LightWeightResizableGSet<TestKey, TestElement>();
+
+ assertEquals(set.size(), 0);
+
+ // put all elements
+ for (int i = 0; i < elements.length; i++) {
+ TestElement element = set.put(elements[i]);
+ assertTrue(element == null);
+ }
+
+ // check the set size
+ assertEquals(set.size(), elements.length);
+
+ // check all elements exist in the set and the data is correct
+ for (int i = 0; i < elements.length; i++) {
+ assertTrue(set.contains(elements[i]));
+
+ TestElement element = set.get(elements[i]);
+ assertEquals(elements[i].getData(), element.getData());
+ }
+
+ TestKey[] keys = getKeys(elements);
+ // generate new elements with same key, but new data
+ TestElement[] newElements = generateElements(keys);
+ // update the set
+ for (int i = 0; i < newElements.length; i++) {
+ TestElement element = set.put(newElements[i]);
+ assertTrue(element != null);
+ }
+
+ // check the set size
+ assertEquals(set.size(), elements.length);
+
+ // check all elements exist in the set and the data is updated to new value
+ for (int i = 0; i < keys.length; i++) {
+ assertTrue(set.contains(keys[i]));
+
+ TestElement element = set.get(keys[i]);
+ assertEquals(newElements[i].getData(), element.getData());
+ }
+
+ // test LightWeightHashGSet#values
+ Collection<TestElement> cElements = set.values();
+ assertEquals(cElements.size(), elements.length);
+ for (TestElement element : cElements) {
+ assertTrue(set.contains(element));
+ }
+
+ // remove elements
+ for (int i = 0; i < keys.length; i++) {
+ TestElement element = set.remove(keys[i]);
+
+ assertTrue(element != null);
+
+ // the element should not exist after remove
+ assertFalse(set.contains(keys[i]));
+ }
+
+ // check the set size
+ assertEquals(set.size(), 0);
+ }
+
+ @Test(timeout = 60000)
+ public void testRemoveAll() {
+ TestElement[] elements = generateElements(1 << 16);
+ final LightWeightResizableGSet<TestKey, TestElement> set =
+ new LightWeightResizableGSet<TestKey, TestElement>();
+
+ assertEquals(set.size(), 0);
+
+ // put all elements
+ for (int i = 0; i < elements.length; i++) {
+ TestElement element = set.put(elements[i]);
+ assertTrue(element == null);
+ }
+
+ // check the set size
+ assertEquals(set.size(), elements.length);
+
+ // remove all through clear
+ {
+ set.clear();
+ assertEquals(set.size(), 0);
+
+ // check all elements removed
+ for (int i = 0; i < elements.length; i++) {
+ assertFalse(set.contains(elements[i]));
+ }
+ assertFalse(set.iterator().hasNext());
+ }
+
+ // put all elements back
+ for (int i = 0; i < elements.length; i++) {
+ TestElement element = set.put(elements[i]);
+ assertTrue(element == null);
+ }
+
+ // remove all through iterator
+ {
+ for (Iterator<TestElement> iter = set.iterator(); iter.hasNext(); ) {
+ TestElement element = iter.next();
+ // element should be there before removing
+ assertTrue(set.contains(element));
+ iter.remove();
+ // element should not be there now
+ assertFalse(set.contains(element));
+ }
+
+ // the deleted elements should not be there
+ for (int i = 0; i < elements.length; i++) {
+ assertFalse(set.contains(elements[i]));
+ }
+
+ // iterator should not have next
+ assertFalse(set.iterator().hasNext());
+
+ // check the set size
+ assertEquals(set.size(), 0);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 2042869..0461766 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -643,6 +643,9 @@ Release 2.8.0 - UNRELEASED
HDFS-9148. Incorrect assert message in TestWriteToReplica#testWriteToTemporary
(Tony Wu via lei)
+ HDFS-8859. Improve DataNode ReplicaMap memory footprint to save about 45%.
+ (yliu)
+
OPTIMIZATIONS
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
index 31b14fa..d19e656 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
@@ -18,20 +18,13 @@
package org.apache.hadoop.hdfs.server.datanode;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.HardLink;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
-import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.LightWeightResizableGSet;
import com.google.common.annotations.VisibleForTesting;
@@ -40,8 +33,12 @@ import com.google.common.annotations.VisibleForTesting;
* It provides a general interface for meta information of a replica.
*/
@InterfaceAudience.Private
-abstract public class ReplicaInfo extends Block implements Replica {
-
+abstract public class ReplicaInfo extends Block
+ implements Replica, LightWeightResizableGSet.LinkedElement {
+
+ /** For implementing {@link LightWeightResizableGSet.LinkedElement} interface */
+ private LightWeightResizableGSet.LinkedElement next;
+
/** volume where the replica belongs */
private FsVolumeSpi volume;
@@ -229,4 +226,14 @@ abstract public class ReplicaInfo extends Block implements Replica {
public boolean isOnTransientStorage() {
return volume.isTransientStorage();
}
+
+ @Override
+ public LightWeightResizableGSet.LinkedElement getNext() {
+ return next;
+ }
+
+ @Override
+ public void setNext(LightWeightResizableGSet.LinkedElement next) {
+ this.next = next;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
index e12d69a..33a88df 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
@@ -749,7 +749,12 @@ class BlockPoolSlice {
// Now it is safe to add the replica into volumeMap
// In case of any exception during parsing this cache file, fall back
// to scan all the files on disk.
- for (ReplicaInfo info: tmpReplicaMap.replicas(bpid)) {
+ for (Iterator<ReplicaInfo> iter =
+ tmpReplicaMap.replicas(bpid).iterator(); iter.hasNext(); ) {
+ ReplicaInfo info = iter.next();
+ // We use a lightweight GSet to store replicaInfo, we need to remove
+ // it from one GSet before adding to another.
+ iter.remove();
volumeMap.add(bpid, info);
}
LOG.info("Successfully read replica from cache file : "
http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java
index 617e0fd..6f0b8a7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java
@@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo;
+import org.apache.hadoop.util.LightWeightResizableGSet;
/**
* Maintains the replica map.
@@ -33,9 +34,9 @@ class ReplicaMap {
private final Object mutex;
// Map of block pool Id to another map of block Id to ReplicaInfo.
- private final Map<String, Map<Long, ReplicaInfo>> map =
- new HashMap<String, Map<Long, ReplicaInfo>>();
-
+ private final Map<String, LightWeightResizableGSet<Block, ReplicaInfo>> map =
+ new HashMap<String, LightWeightResizableGSet<Block, ReplicaInfo>>();
+
ReplicaMap(Object mutex) {
if (mutex == null) {
throw new HadoopIllegalArgumentException(
@@ -91,8 +92,8 @@ class ReplicaMap {
ReplicaInfo get(String bpid, long blockId) {
checkBlockPool(bpid);
synchronized(mutex) {
- Map<Long, ReplicaInfo> m = map.get(bpid);
- return m != null ? m.get(blockId) : null;
+ LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
+ return m != null ? m.get(new Block(blockId)) : null;
}
}
@@ -108,13 +109,13 @@ class ReplicaMap {
checkBlockPool(bpid);
checkBlock(replicaInfo);
synchronized(mutex) {
- Map<Long, ReplicaInfo> m = map.get(bpid);
+ LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
if (m == null) {
// Add an entry for block pool if it does not exist already
- m = new HashMap<Long, ReplicaInfo>();
+ m = new LightWeightResizableGSet<Block, ReplicaInfo>();
map.put(bpid, m);
}
- return m.put(replicaInfo.getBlockId(), replicaInfo);
+ return m.put(replicaInfo);
}
}
@@ -137,14 +138,13 @@ class ReplicaMap {
checkBlockPool(bpid);
checkBlock(block);
synchronized(mutex) {
- Map<Long, ReplicaInfo> m = map.get(bpid);
+ LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
if (m != null) {
- Long key = Long.valueOf(block.getBlockId());
- ReplicaInfo replicaInfo = m.get(key);
+ ReplicaInfo replicaInfo = m.get(block);
if (replicaInfo != null &&
block.getGenerationStamp() == replicaInfo.getGenerationStamp()) {
- return m.remove(key);
- }
+ return m.remove(block);
+ }
}
}
@@ -160,9 +160,9 @@ class ReplicaMap {
ReplicaInfo remove(String bpid, long blockId) {
checkBlockPool(bpid);
synchronized(mutex) {
- Map<Long, ReplicaInfo> m = map.get(bpid);
+ LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
if (m != null) {
- return m.remove(blockId);
+ return m.remove(new Block(blockId));
}
}
return null;
@@ -174,7 +174,7 @@ class ReplicaMap {
* @return the number of replicas in the map
*/
int size(String bpid) {
- Map<Long, ReplicaInfo> m = null;
+ LightWeightResizableGSet<Block, ReplicaInfo> m = null;
synchronized(mutex) {
m = map.get(bpid);
return m != null ? m.size() : 0;
@@ -192,7 +192,7 @@ class ReplicaMap {
* @return a collection of the replicas belonging to the block pool
*/
Collection<ReplicaInfo> replicas(String bpid) {
- Map<Long, ReplicaInfo> m = null;
+ LightWeightResizableGSet<Block, ReplicaInfo> m = null;
m = map.get(bpid);
return m != null ? m.values() : null;
}
@@ -200,10 +200,10 @@ class ReplicaMap {
void initBlockPool(String bpid) {
checkBlockPool(bpid);
synchronized(mutex) {
- Map<Long, ReplicaInfo> m = map.get(bpid);
+ LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
if (m == null) {
// Add an entry for block pool if it does not exist already
- m = new HashMap<Long, ReplicaInfo>();
+ m = new LightWeightResizableGSet<Block, ReplicaInfo>();
map.put(bpid, m);
}
}