You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by yl...@apache.org on 2015/09/29 10:32:19 UTC

hadoop git commit: HDFS-8859. Improve DataNode ReplicaMap memory footprint to save about 45%. (yliu)

Repository: hadoop
Updated Branches:
  refs/heads/branch-2 1a5f3e93c -> 9c47ab32b


HDFS-8859. Improve DataNode ReplicaMap memory footprint to save about 45%. (yliu)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9c47ab32
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9c47ab32
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9c47ab32

Branch: refs/heads/branch-2
Commit: 9c47ab32b14fa9e589d54ad739b3721bb8d444f9
Parents: 1a5f3e9
Author: yliu <yl...@apache.org>
Authored: Tue Sep 29 16:23:50 2015 +0800
Committer: yliu <yl...@apache.org>
Committed: Tue Sep 29 16:23:50 2015 +0800

----------------------------------------------------------------------
 .../main/java/org/apache/hadoop/util/GSet.java  |  14 ++
 .../org/apache/hadoop/util/GSetByHashMap.java   |   6 +
 .../org/apache/hadoop/util/LightWeightGSet.java |  82 ++++--
 .../hadoop/util/LightWeightResizableGSet.java   | 129 ++++++++++
 .../java/org/apache/hadoop/util/TestGSet.java   |  69 ++++-
 .../hadoop/util/TestLightWeightCache.java       |   6 +
 .../util/TestLightWeightResizableGSet.java      | 252 +++++++++++++++++++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |   3 +
 .../hdfs/server/datanode/ReplicaInfo.java       |  27 +-
 .../datanode/fsdataset/impl/BlockPoolSlice.java |   7 +-
 .../datanode/fsdataset/impl/ReplicaMap.java     |  38 +--
 11 files changed, 569 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
index 26e73cf..e4a8d0f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.util;
 
+import java.util.Collection;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -86,5 +88,17 @@ public interface GSet<K, E extends K> extends Iterable<E> {
   */
   E remove(K key);
 
+  /**
+   * Clear the set.
+   */
   void clear();
+
+  /**
+   * Returns a {@link Collection} view of the values contained in this set.
+   * The collection is backed by the set, so changes to the set are
+   * reflected in the collection, and vice-versa.
+   *
+   * @return the collection of values.
+   */
+  Collection<E> values();
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
index 87488db..e341c74 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.util;
 
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
 
@@ -70,4 +71,9 @@ public class GSetByHashMap<K, E extends K> implements GSet<K, E> {
   public void clear() {
     m.clear();
   }
+
+  @Override
+  public Collection<E> values() {
+    return m.values();
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
index 1767d85..7c7878a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
@@ -18,12 +18,14 @@
 package org.apache.hadoop.util;
 
 import java.io.PrintStream;
+import java.util.AbstractCollection;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.ConcurrentModificationException;
 import java.util.Iterator;
 
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.util.StringUtils;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -49,12 +51,12 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
   /**
    * Elements of {@link LightWeightGSet}.
    */
-  public static interface LinkedElement {
+  public interface LinkedElement {
     /** Set the next element. */
-    public void setNext(LinkedElement next);
+    void setNext(LinkedElement next);
 
     /** Get the next element. */
-    public LinkedElement getNext();
+    LinkedElement getNext();
   }
 
   static final int MAX_ARRAY_LENGTH = 1 << 30; //prevent int overflow problem
@@ -64,15 +66,20 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
    * An internal array of entries, which are the rows of the hash table.
    * The size must be a power of two.
    */
-  private final LinkedElement[] entries;
+  protected LinkedElement[] entries;
   /** A mask for computing the array index from the hash value of an element. */
-  private final int hash_mask;
+  protected int hash_mask;
   /** The size of the set (not the entry array). */
-  private int size = 0;
+  protected int size = 0;
   /** Modification version for fail-fast.
    * @see ConcurrentModificationException
    */
-  private int modification = 0;
+  protected int modification = 0;
+
+  private Collection<E> values;
+
+  protected LightWeightGSet() {
+  }
 
   /**
    * @param recommended_length Recommended size of the internal array.
@@ -87,7 +94,7 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
   }
 
   //compute actual length
-  private static int actualArrayLength(int recommended) {
+  protected static int actualArrayLength(int recommended) {
     if (recommended > MAX_ARRAY_LENGTH) {
       return MAX_ARRAY_LENGTH;
     } else if (recommended < MIN_ARRAY_LENGTH) {
@@ -103,11 +110,11 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
     return size;
   }
 
-  private int getIndex(final K key) {
+  protected int getIndex(final K key) {
     return key.hashCode() & hash_mask;
   }
 
-  private E convert(final LinkedElement e){
+  protected E convert(final LinkedElement e){
     @SuppressWarnings("unchecked")
     final E r = (E)e;
     return r;
@@ -138,24 +145,26 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
 
   @Override
   public E put(final E element) {
-    //validate element
+    // validate element
     if (element == null) {
       throw new NullPointerException("Null element is not supported.");
     }
-    if (!(element instanceof LinkedElement)) {
+    LinkedElement e = null;
+    try {
+      e = (LinkedElement)element;
+    } catch (ClassCastException ex) {
       throw new HadoopIllegalArgumentException(
           "!(element instanceof LinkedElement), element.getClass()="
           + element.getClass());
     }
-    final LinkedElement e = (LinkedElement)element;
 
-    //find index
+    // find index
     final int index = getIndex(element);
 
-    //remove if it already exists
+    // remove if it already exists
     final E existing = remove(index, element);
 
-    //insert the element to the head of the linked list
+    // insert the element to the head of the linked list
     modification++;
     size++;
     e.setNext(entries[index]);
@@ -171,7 +180,7 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
    * @return If such element exists, return it.
    *         Otherwise, return null.
    */
-  private E remove(final int index, final K key) {
+  protected E remove(final int index, final K key) {
     if (entries[index] == null) {
       return null;
     } else if (entries[index].equals(key)) {
@@ -214,6 +223,38 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
   }
 
   @Override
+  public Collection<E> values() {
+    if (values == null) {
+      values = new Values();
+    }
+    return values;
+  }
+
+  private final class Values extends AbstractCollection<E> {
+
+    @Override
+    public Iterator<E> iterator() {
+      return LightWeightGSet.this.iterator();
+    }
+
+    @Override
+    public int size() {
+      return size;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public boolean contains(Object o) {
+      return LightWeightGSet.this.contains((K)o);
+    }
+
+    @Override
+    public void clear() {
+      LightWeightGSet.this.clear();
+    }
+  }
+
+  @Override
   public Iterator<E> iterator() {
     return new SetIterator();
   }
@@ -363,9 +404,8 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
   }
   
   public void clear() {
-    for (int i = 0; i < entries.length; i++) {
-      entries[i] = null;
-    }
+    modification++;
+    Arrays.fill(entries, null);
     size = 0;
   }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java
new file mode 100644
index 0000000..0abcf98
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * A low memory footprint {@link GSet} implementation,
+ * which uses an array for storing the elements
+ * and linked lists for collision resolution.
+ *
+ * If the size of elements exceeds the threshold,
+ * the internal array will be resized to double length.
+ *
+ * This class does not support null element.
+ *
+ * This class is not thread safe.
+ *
+ * @param <K> Key type for looking up the elements
+ * @param <E> Element type, which must be
+ *       (1) a subclass of K, and
+ *       (2) implementing {@link LinkedElement} interface.
+ */
+@InterfaceAudience.Private
+public class LightWeightResizableGSet<K, E extends K>
+    extends LightWeightGSet<K, E> {
+
+  /**
+   * The default initial capacity - MUST be a power of two.
+   */
+  static final int DEFAULT_INITIAL_CAPACITY = 1 << 4;
+
+  /**
+   * The load factor used when none specified in constructor.
+   */
+  static final float DEFAULT_LOAD_FACTOR = 0.75f;
+
+  /** Size of the entry table. */
+  private int capacity;
+
+  /**
+   * The load factor for the hash set.
+   */
+  private final float loadFactor;
+  private int threshold;
+
+  public LightWeightResizableGSet(int initCapacity, float loadFactor) {
+    if (initCapacity < 0) {
+      throw new HadoopIllegalArgumentException("Illegal initial capacity: " +
+          initCapacity);
+    }
+    if (loadFactor <= 0 || loadFactor > 1.0f) {
+      throw new HadoopIllegalArgumentException("Illegal load factor: " +
+          loadFactor);
+    }
+    this.capacity = actualArrayLength(initCapacity);
+    this.hash_mask = capacity - 1;
+    this.loadFactor = loadFactor;
+    this.threshold = (int) (capacity * loadFactor);
+
+    entries = new LinkedElement[capacity];
+  }
+
+  public LightWeightResizableGSet() {
+    this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
+  }
+
+  public LightWeightResizableGSet(int initCapacity) {
+    this(initCapacity, DEFAULT_LOAD_FACTOR);
+  }
+
+  @Override
+  public E put(final E element) {
+    E existing = super.put(element);
+    expandIfNecessary();
+    return existing;
+  }
+
+  /**
+   * Resize the internal table to given capacity.
+   */
+  @SuppressWarnings("unchecked")
+  protected void resize(int cap) {
+    int newCapacity = actualArrayLength(cap);
+    if (newCapacity == this.capacity) {
+      return;
+    }
+    this.capacity = newCapacity;
+    this.threshold = (int) (capacity * loadFactor);
+    this.hash_mask = capacity - 1;
+    LinkedElement[] oldEntries = entries;
+    entries = new LinkedElement[capacity];
+    for (int i = 0; i < oldEntries.length; i++) {
+      LinkedElement e = oldEntries[i];
+      while (e != null) {
+        LinkedElement next = e.getNext();
+        int index = getIndex((E)e);
+        e.setNext(entries[index]);
+        entries[index] = e;
+        e = next;
+      }
+    }
+  }
+
+  /**
+   * Checks if we need to expand, and expands if necessary.
+   */
+  protected void expandIfNecessary() {
+    if (size > this.threshold && capacity < MAX_ARRAY_LENGTH) {
+      resize(capacity * 2);
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
index af880ee..2d39f3d 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.util;
 
+import java.util.Collection;
 import java.util.ConcurrentModificationException;
 import java.util.Iterator;
 import java.util.Random;
@@ -41,10 +42,15 @@ public class TestGSet {
 
   @Test
   public void testExceptionCases() {
+    testExceptionCases(false);
+    testExceptionCases(true);
+  }
+
+  private void testExceptionCases(boolean resizable) {
     {
       //test contains
       final LightWeightGSet<Integer, Integer> gset
-        = new LightWeightGSet<Integer, Integer>(16);
+        = createGSet(16, resizable);
       try {
         //test contains with a null element
         gset.contains(null);
@@ -57,7 +63,7 @@ public class TestGSet {
     {
       //test get
       final LightWeightGSet<Integer, Integer> gset
-        = new LightWeightGSet<Integer, Integer>(16);
+        = createGSet(16, resizable);
       try {
         //test get with a null element
         gset.get(null);
@@ -70,7 +76,7 @@ public class TestGSet {
     {
       //test put
       final LightWeightGSet<Integer, Integer> gset
-        = new LightWeightGSet<Integer, Integer>(16);
+        = createGSet(16, resizable);
       try {
         //test put with a null element
         gset.put(null);
@@ -97,7 +103,7 @@ public class TestGSet {
       for(int v = 1; v < data.length-1; v++) {
         {
           //test remove while iterating
-          final GSet<IntElement, IntElement> gset = createGSet(data);
+          final GSet<IntElement, IntElement> gset = createGSet(data, resizable);
           for(IntElement i : gset) {
             if (i.value == v) {
               //okay because data[0] is not in gset
@@ -120,7 +126,7 @@ public class TestGSet {
 
         {
           //test put new element while iterating
-          final GSet<IntElement, IntElement> gset = createGSet(data);
+          final GSet<IntElement, IntElement> gset = createGSet(data, resizable);
           try {
             for(IntElement i : gset) {
               if (i.value == v) {
@@ -135,7 +141,7 @@ public class TestGSet {
 
         {
           //test put existing element while iterating
-          final GSet<IntElement, IntElement> gset = createGSet(data);
+          final GSet<IntElement, IntElement> gset = createGSet(data, resizable);
           try {
             for(IntElement i : gset) {
               if (i.value == v) {
@@ -151,9 +157,17 @@ public class TestGSet {
     }
   }
 
-  private static GSet<IntElement, IntElement> createGSet(final IntElement[] data) {
+  private static LightWeightGSet<Integer, Integer> createGSet(
+      int size, boolean resizable) {
+    return resizable ? new LightWeightResizableGSet<Integer, Integer>(size) :
+      new LightWeightGSet<Integer, Integer>(size);
+  }
+
+  private static GSet<IntElement, IntElement> createGSet(
+      final IntElement[] data, boolean resizable) {
     final GSet<IntElement, IntElement> gset
-      = new LightWeightGSet<IntElement, IntElement>(8);
+      = resizable ? new LightWeightResizableGSet<IntElement, IntElement>(8) :
+        new LightWeightGSet<IntElement, IntElement>(8);
     for(int i = 1; i < data.length; i++) {
       gset.put(data[i]);
     }
@@ -168,6 +182,14 @@ public class TestGSet {
     check(new GSetTestCase(255, 1 << 10, 65537));
   }
 
+  @Test
+  public void testResizableGSet() {
+    //The parameters are: table length, data size, modulus, resizable.
+    check(new GSetTestCase(1, 1 << 4, 65537, true));
+    check(new GSetTestCase(17, 1 << 16, 17, true));
+    check(new GSetTestCase(255, 1 << 10, 65537, true));
+  }
+
   /**
    * A long running test with various data sets and parameters.
    * It may take ~5 hours, 
@@ -177,14 +199,25 @@ public class TestGSet {
   //@Test
   public void runMultipleTestGSet() {
     for(int offset = -2; offset <= 2; offset++) {
-      runTestGSet(1, offset);
+      runTestGSet(1, offset, false);
+      for(int i = 1; i < Integer.SIZE - 1; i++) {
+        runTestGSet((1 << i) + 1, offset, false);
+      }
+    }
+  }
+
+  //@Test
+  public void runMultipleTestResizableGSet() {
+    for(int offset = -2; offset <= 2; offset++) {
+      runTestGSet(1, offset, true);
       for(int i = 1; i < Integer.SIZE - 1; i++) {
-        runTestGSet((1 << i) + 1, offset);
+        runTestGSet((1 << i) + 1, offset, true);
       }
     }
   }
 
-  private static void runTestGSet(final int modulus, final int offset) {
+  private static void runTestGSet(final int modulus, final int offset,
+      boolean resizable) {
     println("\n\nmodulus=" + modulus + ", offset=" + offset);
     for(int i = 0; i <= 16; i += 4) {
       final int tablelength = (1 << i) + offset;
@@ -194,7 +227,7 @@ public class TestGSet {
 
       for(int j = 0; j <= upper; j += steps) {
         final int datasize = 1 << j;
-        check(new GSetTestCase(tablelength, datasize, modulus));
+        check(new GSetTestCase(tablelength, datasize, modulus, resizable));
       }
     }
   }
@@ -265,6 +298,10 @@ public class TestGSet {
     int contain_count = 0;
 
     GSetTestCase(int tablelength, int datasize, int modulus) {
+      this(tablelength, datasize, modulus, false);
+    }
+
+    GSetTestCase(int tablelength, int datasize, int modulus, boolean resizable) {
       denominator = Math.min((datasize >> 7) + 1, 1 << 16);
       info = getClass().getSimpleName()
           + ": tablelength=" + tablelength
@@ -274,7 +311,8 @@ public class TestGSet {
       println(info);
 
       data  = new IntData(datasize, modulus);
-      gset = new LightWeightGSet<IntElement, IntElement>(tablelength);
+      gset = resizable ? new LightWeightResizableGSet<IntElement, IntElement>() :
+        new LightWeightGSet<IntElement, IntElement>(tablelength);
 
       Assert.assertEquals(0, gset.size());
     }
@@ -392,6 +430,11 @@ public class TestGSet {
       gset.clear();
       Assert.assertEquals(0, size());
     }
+
+    @Override
+    public Collection<IntElement> values() {
+      throw new UnsupportedOperationException();
+    }
   }
 
   /** Test data set */

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java
index 68d484f..dff6937 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightCache.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.util;
 
+import java.util.Collection;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.Random;
@@ -379,6 +380,11 @@ public class TestLightWeightCache {
       cache.clear();
       Assert.assertEquals(0, size());
     }
+
+    @Override
+    public Collection<IntEntry> values() {
+      throw new UnsupportedOperationException();
+    }
   }
 
   private static class IntData {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightResizableGSet.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightResizableGSet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightResizableGSet.java
new file mode 100644
index 0000000..3250092
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLightWeightResizableGSet.java
@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/** Testing {@link LightWeightResizableGSet} */
+public class TestLightWeightResizableGSet {
+  public static final Log LOG = LogFactory.getLog(TestLightWeightResizableGSet.class);
+  private Random random = new Random();
+
+  private TestElement[] generateElements(int length) {
+    TestElement[] elements = new TestElement[length];
+    Set<Long> keys = new HashSet<>();
+    long k = 0;
+    for (int i = 0; i < length; i++) {
+      while (keys.contains(k = random.nextLong()));
+      elements[i] = new TestElement(k, random.nextLong());
+      keys.add(k);
+    }
+    return elements;
+  }
+
+  private TestKey[] getKeys(TestElement[] elements) {
+    TestKey[] keys = new TestKey[elements.length];
+    for (int i = 0; i < elements.length; i++) {
+      keys[i] = new TestKey(elements[i].getKey());
+    }
+    return keys;
+  }
+
+  private TestElement[] generateElements(TestKey[] keys) {
+    TestElement[] elements = new TestElement[keys.length];
+    for (int i = 0; i < keys.length; i++) {
+      elements[i] = new TestElement(keys[i], random.nextLong());
+    }
+    return elements;
+  }
+
+  private static class TestKey {
+    private final long key;
+
+    TestKey(long key) {
+      this.key = key;
+    }
+
+    TestKey(TestKey other) {
+      this.key = other.key;
+    }
+
+    long getKey() {
+      return key;
+    }
+
+    @Override
+    public int hashCode() {
+      return (int)(key^(key>>>32));
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof TestKey)) {
+        return false;
+      }
+      TestKey other = (TestKey)o;
+      return key == other.key;
+    }
+  }
+
+  private static class TestElement extends TestKey
+      implements LightWeightResizableGSet.LinkedElement {
+    private final long data;
+    private LightWeightResizableGSet.LinkedElement next;
+
+    TestElement(long key, long data) {
+      super(key);
+      this.data = data;
+    }
+
+    TestElement(TestKey key, long data) {
+      super(key);
+      this.data = data;
+    }
+
+    long getData() {
+      return data;
+    }
+
+    @Override
+    public void setNext(LightWeightResizableGSet.LinkedElement next) {
+      this.next = next;
+    }
+
+    @Override
+    public LightWeightResizableGSet.LinkedElement getNext() {
+      return next;
+    }
+  }
+
+  @Test(timeout = 60000)
+  public void testBasicOperations() {
+    TestElement[] elements = generateElements(1 << 16);
+    final LightWeightResizableGSet<TestKey, TestElement> set =
+        new LightWeightResizableGSet<TestKey, TestElement>();
+
+    assertEquals(set.size(), 0);
+
+    // put all elements
+    for (int i = 0; i < elements.length; i++) {
+      TestElement element = set.put(elements[i]);
+      assertTrue(element == null);
+    }
+
+    // check the set size
+    assertEquals(set.size(), elements.length);
+
+    // check all elements exist in the set and the data is correct
+    for (int i = 0; i < elements.length; i++) {
+      assertTrue(set.contains(elements[i]));
+
+      TestElement element = set.get(elements[i]);
+      assertEquals(elements[i].getData(), element.getData());
+    }
+
+    TestKey[] keys = getKeys(elements);
+    // generate new elements with same key, but new data
+    TestElement[] newElements = generateElements(keys);
+    // update the set
+    for (int i = 0; i < newElements.length; i++) {
+      TestElement element = set.put(newElements[i]);
+      assertTrue(element != null);
+    }
+
+    // check the set size
+    assertEquals(set.size(), elements.length);
+
+    // check all elements exist in the set and the data is updated to new value
+    for (int i = 0; i < keys.length; i++) {
+      assertTrue(set.contains(keys[i]));
+
+      TestElement element = set.get(keys[i]);
+      assertEquals(newElements[i].getData(), element.getData());
+    }
+
+    // test LightWeightHashGSet#values
+    Collection<TestElement> cElements = set.values();
+    assertEquals(cElements.size(), elements.length);
+    for (TestElement element : cElements) {
+      assertTrue(set.contains(element));
+    }
+
+    // remove elements
+    for (int i = 0; i < keys.length; i++) {
+      TestElement element = set.remove(keys[i]);
+
+      assertTrue(element != null);
+
+      // the element should not exist after remove
+      assertFalse(set.contains(keys[i]));
+    }
+
+    // check the set size
+    assertEquals(set.size(), 0);
+  }
+
+  @Test(timeout = 60000)
+  public void testRemoveAll() {
+    TestElement[] elements = generateElements(1 << 16);
+    final LightWeightResizableGSet<TestKey, TestElement> set =
+        new LightWeightResizableGSet<TestKey, TestElement>();
+
+    assertEquals(set.size(), 0);
+
+    // put all elements
+    for (int i = 0; i < elements.length; i++) {
+      TestElement element = set.put(elements[i]);
+      assertTrue(element == null);
+    }
+
+    // check the set size
+    assertEquals(set.size(), elements.length);
+
+    // remove all through clear
+    {
+      set.clear();
+      assertEquals(set.size(), 0);
+
+      // check all elements removed
+      for (int i = 0; i < elements.length; i++) {
+        assertFalse(set.contains(elements[i]));
+      }
+      assertFalse(set.iterator().hasNext());
+    }
+
+    // put all elements back
+    for (int i = 0; i < elements.length; i++) {
+      TestElement element = set.put(elements[i]);
+      assertTrue(element == null);
+    }
+
+    // remove all through iterator
+    {
+      for (Iterator<TestElement> iter = set.iterator(); iter.hasNext(); ) {
+        TestElement element = iter.next();
+        // element should be there before removing
+        assertTrue(set.contains(element));
+        iter.remove();
+        // element should not be there now
+        assertFalse(set.contains(element));
+      }
+
+      // the deleted elements should not be there
+      for (int i = 0; i < elements.length; i++) {
+        assertFalse(set.contains(elements[i]));
+      }
+
+      // iterator should not have next
+      assertFalse(set.iterator().hasNext());
+
+      // check the set size
+      assertEquals(set.size(), 0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 2042869..0461766 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -643,6 +643,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-9148. Incorrect assert message in TestWriteToReplica#testWriteToTemporary
     (Tony Wu via lei)
 
+    HDFS-8859. Improve DataNode ReplicaMap memory footprint to save about 45%.
+    (yliu)
+
   OPTIMIZATIONS
 
     HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
index 31b14fa..d19e656 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
@@ -18,20 +18,13 @@
 package org.apache.hadoop.hdfs.server.datanode;
 
 import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.HardLink;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
-import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.LightWeightResizableGSet;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -40,8 +33,12 @@ import com.google.common.annotations.VisibleForTesting;
  * It provides a general interface for meta information of a replica.
  */
 @InterfaceAudience.Private
-abstract public class ReplicaInfo extends Block implements Replica {
-  
+abstract public class ReplicaInfo extends Block
+    implements Replica, LightWeightResizableGSet.LinkedElement {
+
+  /** For implementing {@link LightWeightResizableGSet.LinkedElement} interface */
+  private LightWeightResizableGSet.LinkedElement next;
+
   /** volume where the replica belongs */
   private FsVolumeSpi volume;
   
@@ -229,4 +226,14 @@ abstract public class ReplicaInfo extends Block implements Replica {
   public boolean isOnTransientStorage() {
     return volume.isTransientStorage();
   }
+
+  @Override
+  public LightWeightResizableGSet.LinkedElement getNext() {
+    return next;
+  }
+
+  @Override
+  public void setNext(LightWeightResizableGSet.LinkedElement next) {
+    this.next = next;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
index e12d69a..33a88df 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
@@ -749,7 +749,12 @@ class BlockPoolSlice {
       // Now it is safe to add the replica into volumeMap
       // In case of any exception during parsing this cache file, fall back
       // to scan all the files on disk.
-      for (ReplicaInfo info: tmpReplicaMap.replicas(bpid)) {
+      for (Iterator<ReplicaInfo> iter =
+          tmpReplicaMap.replicas(bpid).iterator(); iter.hasNext(); ) {
+        ReplicaInfo info = iter.next();
+        // We use a lightweight GSet to store replicaInfo, we need to remove
+        // it from one GSet before adding to another.
+        iter.remove();
         volumeMap.add(bpid, info);
       }
       LOG.info("Successfully read replica from cache file : " 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9c47ab32/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java
index 617e0fd..6f0b8a7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java
@@ -24,6 +24,7 @@ import java.util.Map;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo;
+import org.apache.hadoop.util.LightWeightResizableGSet;
 
 /**
  * Maintains the replica map. 
@@ -33,9 +34,9 @@ class ReplicaMap {
   private final Object mutex;
   
   // Map of block pool Id to another map of block Id to ReplicaInfo.
-  private final Map<String, Map<Long, ReplicaInfo>> map =
-    new HashMap<String, Map<Long, ReplicaInfo>>();
-  
+  private final Map<String, LightWeightResizableGSet<Block, ReplicaInfo>> map =
+    new HashMap<String, LightWeightResizableGSet<Block, ReplicaInfo>>();
+
   ReplicaMap(Object mutex) {
     if (mutex == null) {
       throw new HadoopIllegalArgumentException(
@@ -91,8 +92,8 @@ class ReplicaMap {
   ReplicaInfo get(String bpid, long blockId) {
     checkBlockPool(bpid);
     synchronized(mutex) {
-      Map<Long, ReplicaInfo> m = map.get(bpid);
-      return m != null ? m.get(blockId) : null;
+      LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
+      return m != null ? m.get(new Block(blockId)) : null;
     }
   }
   
@@ -108,13 +109,13 @@ class ReplicaMap {
     checkBlockPool(bpid);
     checkBlock(replicaInfo);
     synchronized(mutex) {
-      Map<Long, ReplicaInfo> m = map.get(bpid);
+      LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
       if (m == null) {
         // Add an entry for block pool if it does not exist already
-        m = new HashMap<Long, ReplicaInfo>();
+        m = new LightWeightResizableGSet<Block, ReplicaInfo>();
         map.put(bpid, m);
       }
-      return  m.put(replicaInfo.getBlockId(), replicaInfo);
+      return  m.put(replicaInfo);
     }
   }
 
@@ -137,14 +138,13 @@ class ReplicaMap {
     checkBlockPool(bpid);
     checkBlock(block);
     synchronized(mutex) {
-      Map<Long, ReplicaInfo> m = map.get(bpid);
+      LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
       if (m != null) {
-        Long key = Long.valueOf(block.getBlockId());
-        ReplicaInfo replicaInfo = m.get(key);
+        ReplicaInfo replicaInfo = m.get(block);
         if (replicaInfo != null &&
             block.getGenerationStamp() == replicaInfo.getGenerationStamp()) {
-          return m.remove(key);
-        } 
+          return m.remove(block);
+        }
       }
     }
     
@@ -160,9 +160,9 @@ class ReplicaMap {
   ReplicaInfo remove(String bpid, long blockId) {
     checkBlockPool(bpid);
     synchronized(mutex) {
-      Map<Long, ReplicaInfo> m = map.get(bpid);
+      LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
       if (m != null) {
-        return m.remove(blockId);
+        return m.remove(new Block(blockId));
       }
     }
     return null;
@@ -174,7 +174,7 @@ class ReplicaMap {
    * @return the number of replicas in the map
    */
   int size(String bpid) {
-    Map<Long, ReplicaInfo> m = null;
+    LightWeightResizableGSet<Block, ReplicaInfo> m = null;
     synchronized(mutex) {
       m = map.get(bpid);
       return m != null ? m.size() : 0;
@@ -192,7 +192,7 @@ class ReplicaMap {
    * @return a collection of the replicas belonging to the block pool
    */
   Collection<ReplicaInfo> replicas(String bpid) {
-    Map<Long, ReplicaInfo> m = null;
+    LightWeightResizableGSet<Block, ReplicaInfo> m = null;
     m = map.get(bpid);
     return m != null ? m.values() : null;
   }
@@ -200,10 +200,10 @@ class ReplicaMap {
   void initBlockPool(String bpid) {
     checkBlockPool(bpid);
     synchronized(mutex) {
-      Map<Long, ReplicaInfo> m = map.get(bpid);
+      LightWeightResizableGSet<Block, ReplicaInfo> m = map.get(bpid);
       if (m == null) {
         // Add an entry for block pool if it does not exist already
-        m = new HashMap<Long, ReplicaInfo>();
+        m = new LightWeightResizableGSet<Block, ReplicaInfo>();
         map.put(bpid, m);
       }
     }