You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2013/07/23 03:40:59 UTC
svn commit: r1505875 - in
/hadoop/common/trunk/hadoop-common-project/hadoop-common: ./
src/main/java/org/apache/hadoop/util/ src/test/java/org/apache/hadoop/util/
Author: suresh
Date: Tue Jul 23 01:40:58 2013
New Revision: 1505875
URL: http://svn.apache.org/r1505875
Log:
HADOOP-9760. Move GSet and related classes to common from HDFS. Contributed by Suresh Srinivas.
Added:
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
Modified:
hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1505875&r1=1505874&r2=1505875&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt Tue Jul 23 01:40:58 2013
@@ -484,6 +484,9 @@ Release 2.1.0-beta - 2013-07-02
HADOOP-9754. Remove unnecessary "throws IOException/InterruptedException",
and fix generic and other javac warnings. (szetszwo)
+ HADOOP-9760. Move GSet and related classes to common from HDFS.
+ (suresh)
+
OPTIMIZATIONS
HADOOP-9150. Avoid unnecessary DNS resolution attempts for logical URIs
Added: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java?rev=1505875&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java (added)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSet.java Tue Jul 23 01:40:58 2013
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * A {@link GSet} is set,
+ * which supports the {@link #get(Object)} operation.
+ * The {@link #get(Object)} operation uses a key to lookup an element.
+ *
+ * Null element is not supported.
+ *
+ * @param <K> The type of the keys.
+ * @param <E> The type of the elements, which must be a subclass of the keys.
+ */
+@InterfaceAudience.Private
+public interface GSet<K, E extends K> extends Iterable<E> {
+ /**
+ * @return The size of this set.
+ */
+ int size();
+
+ /**
+ * Does this set contain an element corresponding to the given key?
+ * @param key The given key.
+ * @return true if the given key equals to a stored element.
+ * Otherwise, return false.
+ * @throws NullPointerException if key == null.
+ */
+ boolean contains(K key);
+
+ /**
+ * Return the stored element which is equal to the given key.
+ * This operation is similar to {@link java.util.Map#get(Object)}.
+ * @param key The given key.
+ * @return The stored element if it exists.
+ * Otherwise, return null.
+ * @throws NullPointerException if key == null.
+ */
+ E get(K key);
+
+ /**
+ * Add/replace an element.
+ * If the element does not exist, add it to the set.
+ * Otherwise, replace the existing element.
+ *
+ * Note that this operation
+ * is similar to {@link java.util.Map#put(Object, Object)}
+ * but is different from {@link java.util.Set#add(Object)}
+ * which does not replace the existing element if there is any.
+ *
+ * @param element The element being put.
+ * @return the previous stored element if there is any.
+ * Otherwise, return null.
+ * @throws NullPointerException if element == null.
+ */
+ E put(E element);
+
+ /**
+ * Remove the element corresponding to the given key.
+ * This operation is similar to {@link java.util.Map#remove(Object)}.
+ * @param key The key of the element being removed.
+ * @return If such element exists, return it.
+ * Otherwise, return null.
+ * @throws NullPointerException if key == null.
+ */
+ E remove(K key);
+
+ void clear();
+}
Added: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java?rev=1505875&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java (added)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GSetByHashMap.java Tue Jul 23 01:40:58 2013
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.util.HashMap;
+import java.util.Iterator;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * A {@link GSet} implementation by {@link HashMap}.
+ */
+@InterfaceAudience.Private
+public class GSetByHashMap<K, E extends K> implements GSet<K, E> {
+ private final HashMap<K, E> m;
+
+ public GSetByHashMap(int initialCapacity, float loadFactor) {
+ m = new HashMap<K, E>(initialCapacity, loadFactor);
+ }
+
+ @Override
+ public int size() {
+ return m.size();
+ }
+
+ @Override
+ public boolean contains(K k) {
+ return m.containsKey(k);
+ }
+
+ @Override
+ public E get(K k) {
+ return m.get(k);
+ }
+
+ @Override
+ public E put(E element) {
+ if (element == null) {
+ throw new UnsupportedOperationException("Null element is not supported.");
+ }
+ return m.put(element, element);
+ }
+
+ @Override
+ public E remove(K k) {
+ return m.remove(k);
+ }
+
+ @Override
+ public Iterator<E> iterator() {
+ return m.values().iterator();
+ }
+
+ @Override
+ public void clear() {
+ m.clear();
+ }
+}
Added: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java?rev=1505875&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java (added)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java Tue Jul 23 01:40:58 2013
@@ -0,0 +1,345 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.io.PrintStream;
+import java.util.ConcurrentModificationException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.HadoopIllegalArgumentException;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * A low memory footprint {@link GSet} implementation,
+ * which uses an array for storing the elements
+ * and linked lists for collision resolution.
+ *
+ * No rehash will be performed.
+ * Therefore, the internal array will never be resized.
+ *
+ * This class does not support null element.
+ *
+ * This class is not thread safe.
+ *
+ * @param <K> Key type for looking up the elements
+ * @param <E> Element type, which must be
+ * (1) a subclass of K, and
+ * (2) implementing {@link LinkedElement} interface.
+ */
+@InterfaceAudience.Private
+public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
+ /**
+ * Elements of {@link LightWeightGSet}.
+ */
+ public static interface LinkedElement {
+ /** Set the next element. */
+ public void setNext(LinkedElement next);
+
+ /** Get the next element. */
+ public LinkedElement getNext();
+ }
+
+ public static final Log LOG = LogFactory.getLog(GSet.class);
+ static final int MAX_ARRAY_LENGTH = 1 << 30; //prevent int overflow problem
+ static final int MIN_ARRAY_LENGTH = 1;
+
+ /**
+ * An internal array of entries, which are the rows of the hash table.
+ * The size must be a power of two.
+ */
+ private final LinkedElement[] entries;
+ /** A mask for computing the array index from the hash value of an element. */
+ private final int hash_mask;
+ /** The size of the set (not the entry array). */
+ private int size = 0;
+ /** Modification version for fail-fast.
+ * @see ConcurrentModificationException
+ */
+ private int modification = 0;
+
+ /**
+ * @param recommended_length Recommended size of the internal array.
+ */
+ public LightWeightGSet(final int recommended_length) {
+ final int actual = actualArrayLength(recommended_length);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("recommended=" + recommended_length + ", actual=" + actual);
+ }
+ entries = new LinkedElement[actual];
+ hash_mask = entries.length - 1;
+ }
+
+ //compute actual length
+ private static int actualArrayLength(int recommended) {
+ if (recommended > MAX_ARRAY_LENGTH) {
+ return MAX_ARRAY_LENGTH;
+ } else if (recommended < MIN_ARRAY_LENGTH) {
+ return MIN_ARRAY_LENGTH;
+ } else {
+ final int a = Integer.highestOneBit(recommended);
+ return a == recommended? a: a << 1;
+ }
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ private int getIndex(final K key) {
+ return key.hashCode() & hash_mask;
+ }
+
+ private E convert(final LinkedElement e){
+ @SuppressWarnings("unchecked")
+ final E r = (E)e;
+ return r;
+ }
+
+ @Override
+ public E get(final K key) {
+ //validate key
+ if (key == null) {
+ throw new NullPointerException("key == null");
+ }
+
+ //find element
+ final int index = getIndex(key);
+ for(LinkedElement e = entries[index]; e != null; e = e.getNext()) {
+ if (e.equals(key)) {
+ return convert(e);
+ }
+ }
+ //element not found
+ return null;
+ }
+
+ @Override
+ public boolean contains(final K key) {
+ return get(key) != null;
+ }
+
+ @Override
+ public E put(final E element) {
+ //validate element
+ if (element == null) {
+ throw new NullPointerException("Null element is not supported.");
+ }
+ if (!(element instanceof LinkedElement)) {
+ throw new HadoopIllegalArgumentException(
+ "!(element instanceof LinkedElement), element.getClass()="
+ + element.getClass());
+ }
+ final LinkedElement e = (LinkedElement)element;
+
+ //find index
+ final int index = getIndex(element);
+
+ //remove if it already exists
+ final E existing = remove(index, element);
+
+ //insert the element to the head of the linked list
+ modification++;
+ size++;
+ e.setNext(entries[index]);
+ entries[index] = e;
+
+ return existing;
+ }
+
+ /**
+ * Remove the element corresponding to the key,
+ * given key.hashCode() == index.
+ *
+ * @return If such element exists, return it.
+ * Otherwise, return null.
+ */
+ private E remove(final int index, final K key) {
+ if (entries[index] == null) {
+ return null;
+ } else if (entries[index].equals(key)) {
+ //remove the head of the linked list
+ modification++;
+ size--;
+ final LinkedElement e = entries[index];
+ entries[index] = e.getNext();
+ e.setNext(null);
+ return convert(e);
+ } else {
+ //head != null and key is not equal to head
+ //search the element
+ LinkedElement prev = entries[index];
+ for(LinkedElement curr = prev.getNext(); curr != null; ) {
+ if (curr.equals(key)) {
+ //found the element, remove it
+ modification++;
+ size--;
+ prev.setNext(curr.getNext());
+ curr.setNext(null);
+ return convert(curr);
+ } else {
+ prev = curr;
+ curr = curr.getNext();
+ }
+ }
+ //element not found
+ return null;
+ }
+ }
+
+ @Override
+ public E remove(final K key) {
+ //validate key
+ if (key == null) {
+ throw new NullPointerException("key == null");
+ }
+ return remove(getIndex(key), key);
+ }
+
+ @Override
+ public Iterator<E> iterator() {
+ return new SetIterator();
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder b = new StringBuilder(getClass().getSimpleName());
+ b.append("(size=").append(size)
+ .append(String.format(", %08x", hash_mask))
+ .append(", modification=").append(modification)
+ .append(", entries.length=").append(entries.length)
+ .append(")");
+ return b.toString();
+ }
+
+ /** Print detailed information of this object. */
+ public void printDetails(final PrintStream out) {
+ out.print(this + ", entries = [");
+ for(int i = 0; i < entries.length; i++) {
+ if (entries[i] != null) {
+ LinkedElement e = entries[i];
+ out.print("\n " + i + ": " + e);
+ for(e = e.getNext(); e != null; e = e.getNext()) {
+ out.print(" -> " + e);
+ }
+ }
+ }
+ out.println("\n]");
+ }
+
+ private class SetIterator implements Iterator<E> {
+ /** The starting modification for fail-fast. */
+ private final int startModification = modification;
+ /** The current index of the entry array. */
+ private int index = -1;
+ /** The next element to return. */
+ private LinkedElement next = nextNonemptyEntry();
+
+ /** Find the next nonempty entry starting at (index + 1). */
+ private LinkedElement nextNonemptyEntry() {
+ for(index++; index < entries.length && entries[index] == null; index++);
+ return index < entries.length? entries[index]: null;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return next != null;
+ }
+
+ @Override
+ public E next() {
+ if (modification != startModification) {
+ throw new ConcurrentModificationException("modification=" + modification
+ + " != startModification = " + startModification);
+ }
+
+ final E e = convert(next);
+
+ //find the next element
+ final LinkedElement n = next.getNext();
+ next = n != null? n: nextNonemptyEntry();
+
+ return e;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Remove is not supported.");
+ }
+ }
+
+ /**
+ * Let t = percentage of max memory.
+ * Let e = round(log_2 t).
+ * Then, we choose capacity = 2^e/(size of reference),
+ * unless it is outside the close interval [1, 2^30].
+ */
+ public static int computeCapacity(double percentage, String mapName) {
+ return computeCapacity(Runtime.getRuntime().maxMemory(), percentage,
+ mapName);
+ }
+
+ @VisibleForTesting
+ static int computeCapacity(long maxMemory, double percentage,
+ String mapName) {
+ if (percentage > 100.0 || percentage < 0.0) {
+ throw new HadoopIllegalArgumentException("Percentage " + percentage
+ + " must be greater than or equal to 0 "
+ + " and less than or equal to 100");
+ }
+ if (maxMemory < 0) {
+ throw new HadoopIllegalArgumentException("Memory " + maxMemory
+ + " must be greater than or equal to 0");
+ }
+ if (percentage == 0.0 || maxMemory == 0) {
+ return 0;
+ }
+ //VM detection
+ //See http://java.sun.com/docs/hotspot/HotSpotFAQ.html#64bit_detection
+ final String vmBit = System.getProperty("sun.arch.data.model");
+
+ //Percentage of max memory
+ final double percentDivisor = 100.0/percentage;
+ final double percentMemory = maxMemory/percentDivisor;
+
+ //compute capacity
+ final int e1 = (int)(Math.log(percentMemory)/Math.log(2.0) + 0.5);
+ final int e2 = e1 - ("32".equals(vmBit)? 2: 3);
+ final int exponent = e2 < 0? 0: e2 > 30? 30: e2;
+ final int c = 1 << exponent;
+
+ LOG.info("Computing capacity for map " + mapName);
+ LOG.info("VM type = " + vmBit + "-bit");
+ LOG.info(percentage + "% max memory = "
+ + StringUtils.TraditionalBinaryPrefix.long2String(maxMemory, "B", 1));
+ LOG.info("capacity = 2^" + exponent + " = " + c + " entries");
+ return c;
+ }
+
+ public void clear() {
+ for (int i = 0; i < entries.length; i++) {
+ entries[i] = null;
+ }
+ size = 0;
+ }
+}
Added: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java?rev=1505875&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java (added)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGSet.java Tue Jul 23 01:40:58 2013
@@ -0,0 +1,538 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.util.ConcurrentModificationException;
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.util.Time;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestGSet {
+ private static final Random ran = new Random();
+ private static final long starttime = Time.now();
+
+ private static void print(Object s) {
+ System.out.print(s);
+ System.out.flush();
+ }
+
+ private static void println(Object s) {
+ System.out.println(s);
+ }
+
+ @Test
+ public void testExceptionCases() {
+ {
+ //test contains
+ final LightWeightGSet<Integer, Integer> gset
+ = new LightWeightGSet<Integer, Integer>(16);
+ try {
+ //test contains with a null element
+ gset.contains(null);
+ Assert.fail();
+ } catch(NullPointerException e) {
+ LightWeightGSet.LOG.info("GOOD: getting " + e, e);
+ }
+ }
+
+ {
+ //test get
+ final LightWeightGSet<Integer, Integer> gset
+ = new LightWeightGSet<Integer, Integer>(16);
+ try {
+ //test get with a null element
+ gset.get(null);
+ Assert.fail();
+ } catch(NullPointerException e) {
+ LightWeightGSet.LOG.info("GOOD: getting " + e, e);
+ }
+ }
+
+ {
+ //test put
+ final LightWeightGSet<Integer, Integer> gset
+ = new LightWeightGSet<Integer, Integer>(16);
+ try {
+ //test put with a null element
+ gset.put(null);
+ Assert.fail();
+ } catch(NullPointerException e) {
+ LightWeightGSet.LOG.info("GOOD: getting " + e, e);
+ }
+ try {
+ //test putting an element which is not implementing LinkedElement
+ gset.put(1);
+ Assert.fail();
+ } catch(IllegalArgumentException e) {
+ LightWeightGSet.LOG.info("GOOD: getting " + e, e);
+ }
+ }
+
+ {
+ //test iterator
+ final IntElement[] data = new IntElement[5];
+ for(int i = 0; i < data.length; i++) {
+ data[i] = new IntElement(i, i);
+ }
+
+ for(int v = 1; v < data.length-1; v++) {
+ {
+ //test remove while iterating
+ final GSet<IntElement, IntElement> gset = createGSet(data);
+ for(IntElement i : gset) {
+ if (i.value == v) {
+ //okay because data[0] is not in gset
+ gset.remove(data[0]);
+ }
+ }
+
+ try {
+ //exception because data[1] is in gset
+ for(IntElement i : gset) {
+ if (i.value == v) {
+ gset.remove(data[1]);
+ }
+ }
+ Assert.fail();
+ } catch(ConcurrentModificationException e) {
+ LightWeightGSet.LOG.info("GOOD: getting " + e, e);
+ }
+ }
+
+ {
+ //test put new element while iterating
+ final GSet<IntElement, IntElement> gset = createGSet(data);
+ try {
+ for(IntElement i : gset) {
+ if (i.value == v) {
+ gset.put(data[0]);
+ }
+ }
+ Assert.fail();
+ } catch(ConcurrentModificationException e) {
+ LightWeightGSet.LOG.info("GOOD: getting " + e, e);
+ }
+ }
+
+ {
+ //test put existing element while iterating
+ final GSet<IntElement, IntElement> gset = createGSet(data);
+ try {
+ for(IntElement i : gset) {
+ if (i.value == v) {
+ gset.put(data[3]);
+ }
+ }
+ Assert.fail();
+ } catch(ConcurrentModificationException e) {
+ LightWeightGSet.LOG.info("GOOD: getting " + e, e);
+ }
+ }
+ }
+ }
+ }
+
+ private static GSet<IntElement, IntElement> createGSet(final IntElement[] data) {
+ final GSet<IntElement, IntElement> gset
+ = new LightWeightGSet<IntElement, IntElement>(8);
+ for(int i = 1; i < data.length; i++) {
+ gset.put(data[i]);
+ }
+ return gset;
+ }
+
+ @Test
+ public void testGSet() {
+ //The parameters are: table length, data size, modulus.
+ check(new GSetTestCase(1, 1 << 4, 65537));
+ check(new GSetTestCase(17, 1 << 16, 17));
+ check(new GSetTestCase(255, 1 << 10, 65537));
+ }
+
+ /**
+ * A long test,
+ * which may take ~5 hours,
+ * with various data sets and parameters.
+ * If you are changing the implementation,
+ * please un-comment the following line in order to run the test.
+ */
+ //@Test
+ public void runMultipleTestGSet() {
+ for(int offset = -2; offset <= 2; offset++) {
+ runTestGSet(1, offset);
+ for(int i = 1; i < Integer.SIZE - 1; i++) {
+ runTestGSet((1 << i) + 1, offset);
+ }
+ }
+ }
+
+ private static void runTestGSet(final int modulus, final int offset) {
+ println("\n\nmodulus=" + modulus + ", offset=" + offset);
+ for(int i = 0; i <= 16; i += 4) {
+ final int tablelength = (1 << i) + offset;
+
+ final int upper = i + 2;
+ final int steps = Math.max(1, upper/3);
+
+ for(int j = 0; j <= upper; j += steps) {
+ final int datasize = 1 << j;
+ check(new GSetTestCase(tablelength, datasize, modulus));
+ }
+ }
+ }
+
+ private static void check(final GSetTestCase test) {
+ //check add
+ print(" check add .................. ");
+ for(int i = 0; i < test.data.size()/2; i++) {
+ test.put(test.data.get(i));
+ }
+ for(int i = 0; i < test.data.size(); i++) {
+ test.put(test.data.get(i));
+ }
+ println("DONE " + test.stat());
+
+ //check remove and add
+ print(" check remove & add ......... ");
+ for(int j = 0; j < 10; j++) {
+ for(int i = 0; i < test.data.size()/2; i++) {
+ final int r = ran.nextInt(test.data.size());
+ test.remove(test.data.get(r));
+ }
+ for(int i = 0; i < test.data.size()/2; i++) {
+ final int r = ran.nextInt(test.data.size());
+ test.put(test.data.get(r));
+ }
+ }
+ println("DONE " + test.stat());
+
+ //check remove
+ print(" check remove ............... ");
+ for(int i = 0; i < test.data.size(); i++) {
+ test.remove(test.data.get(i));
+ }
+ Assert.assertEquals(0, test.gset.size());
+ println("DONE " + test.stat());
+
+ //check remove and add again
+ print(" check remove & add again ... ");
+ for(int j = 0; j < 10; j++) {
+ for(int i = 0; i < test.data.size()/2; i++) {
+ final int r = ran.nextInt(test.data.size());
+ test.remove(test.data.get(r));
+ }
+ for(int i = 0; i < test.data.size()/2; i++) {
+ final int r = ran.nextInt(test.data.size());
+ test.put(test.data.get(r));
+ }
+ }
+ println("DONE " + test.stat());
+
+ final long s = (Time.now() - starttime)/1000L;
+ println("total time elapsed=" + s + "s\n");
+ }
+
+ /** Test cases */
+ private static class GSetTestCase implements GSet<IntElement, IntElement> {
+ final GSet<IntElement, IntElement> expected
+ = new GSetByHashMap<IntElement, IntElement>(1024, 0.75f);
+ final GSet<IntElement, IntElement> gset;
+ final IntData data;
+
+ final String info;
+ final long starttime = Time.now();
+ /** Determine the probability in {@link #check()}. */
+ final int denominator;
+ int iterate_count = 0;
+ int contain_count = 0;
+
+ GSetTestCase(int tablelength, int datasize, int modulus) {
+ denominator = Math.min((datasize >> 7) + 1, 1 << 16);
+ info = getClass().getSimpleName()
+ + ": tablelength=" + tablelength
+ + ", datasize=" + datasize
+ + ", modulus=" + modulus
+ + ", denominator=" + denominator;
+ println(info);
+
+ data = new IntData(datasize, modulus);
+ gset = new LightWeightGSet<IntElement, IntElement>(tablelength);
+
+ Assert.assertEquals(0, gset.size());
+ }
+
+ private boolean containsTest(IntElement key) {
+ final boolean e = expected.contains(key);
+ Assert.assertEquals(e, gset.contains(key));
+ return e;
+ }
+ @Override
+ public boolean contains(IntElement key) {
+ final boolean e = containsTest(key);
+ check();
+ return e;
+ }
+
+ private IntElement getTest(IntElement key) {
+ final IntElement e = expected.get(key);
+ Assert.assertEquals(e.id, gset.get(key).id);
+ return e;
+ }
+ @Override
+ public IntElement get(IntElement key) {
+ final IntElement e = getTest(key);
+ check();
+ return e;
+ }
+
+ private IntElement putTest(IntElement element) {
+ final IntElement e = expected.put(element);
+ if (e == null) {
+ Assert.assertEquals(null, gset.put(element));
+ } else {
+ Assert.assertEquals(e.id, gset.put(element).id);
+ }
+ return e;
+ }
+ @Override
+ public IntElement put(IntElement element) {
+ final IntElement e = putTest(element);
+ check();
+ return e;
+ }
+
+ private IntElement removeTest(IntElement key) {
+ final IntElement e = expected.remove(key);
+ if (e == null) {
+ Assert.assertEquals(null, gset.remove(key));
+ } else {
+ Assert.assertEquals(e.id, gset.remove(key).id);
+ }
+
+ check();
+ return e;
+ }
+ @Override
+ public IntElement remove(IntElement key) {
+ final IntElement e = removeTest(key);
+ check();
+ return e;
+ }
+
+ private int sizeTest() {
+ final int s = expected.size();
+ Assert.assertEquals(s, gset.size());
+ return s;
+ }
+ @Override
+ public int size() {
+ final int s = sizeTest();
+ check();
+ return s;
+ }
+
+ @Override
+ public Iterator<IntElement> iterator() {
+ throw new UnsupportedOperationException();
+ }
+
+ void check() {
+ //test size
+ sizeTest();
+
+ if (ran.nextInt(denominator) == 0) {
+ //test get(..), check content and test iterator
+ iterate_count++;
+ for(IntElement i : gset) {
+ getTest(i);
+ }
+ }
+
+ if (ran.nextInt(denominator) == 0) {
+ //test contains(..)
+ contain_count++;
+ final int count = Math.min(data.size(), 1000);
+ if (count == data.size()) {
+ for(IntElement i : data.integers) {
+ containsTest(i);
+ }
+ } else {
+ for(int j = 0; j < count; j++) {
+ containsTest(data.get(ran.nextInt(data.size())));
+ }
+ }
+ }
+ }
+
+ String stat() {
+ final long t = Time.now() - starttime;
+ return String.format(" iterate=%5d, contain=%5d, time elapsed=%5d.%03ds",
+ iterate_count, contain_count, t/1000, t%1000);
+ }
+
+ @Override
+ public void clear() {
+ gset.clear();
+ }
+ }
+
+ /** Test data set */
+ private static class IntData {
+ final IntElement[] integers;
+
+ IntData(int size, int modulus) {
+ integers = new IntElement[size];
+ for(int i = 0; i < integers.length; i++) {
+ integers[i] = new IntElement(i, ran.nextInt(modulus));
+ }
+ }
+
+ IntElement get(int i) {
+ return integers[i];
+ }
+
+ int size() {
+ return integers.length;
+ }
+ }
+
+ /** Elements of {@link LightWeightGSet} in this test */
+ private static class IntElement implements LightWeightGSet.LinkedElement,
+ Comparable<IntElement> {
+ private LightWeightGSet.LinkedElement next;
+ final int id;
+ final int value;
+
+ IntElement(int id, int value) {
+ this.id = id;
+ this.value = value;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj != null && obj instanceof IntElement
+ && value == ((IntElement)obj).value;
+ }
+
+ @Override
+ public int hashCode() {
+ return value;
+ }
+
+ @Override
+ public int compareTo(IntElement that) {
+ return value - that.value;
+ }
+
+ @Override
+ public String toString() {
+ return id + "#" + value;
+ }
+
+ @Override
+ public LightWeightGSet.LinkedElement getNext() {
+ return next;
+ }
+
+ @Override
+ public void setNext(LightWeightGSet.LinkedElement e) {
+ next = e;
+ }
+ }
+
+ /**
+ * Test for {@link LightWeightGSet#computeCapacity(double, String)}
+ * with invalid percent less than 0.
+ */
+ @Test(expected=HadoopIllegalArgumentException.class)
+ public void testComputeCapacityNegativePercent() {
+ LightWeightGSet.computeCapacity(1024, -1.0, "testMap");
+ }
+
+ /**
+ * Test for {@link LightWeightGSet#computeCapacity(double, String)}
+ * with invalid percent greater than 100.
+ */
+ @Test(expected=HadoopIllegalArgumentException.class)
+ public void testComputeCapacityInvalidPercent() {
+ LightWeightGSet.computeCapacity(1024, 101.0, "testMap");
+ }
+
+ /**
+ * Test for {@link LightWeightGSet#computeCapacity(double, String)}
+ * with invalid negative max memory
+ */
+ @Test(expected=HadoopIllegalArgumentException.class)
+ public void testComputeCapacityInvalidMemory() {
+ LightWeightGSet.computeCapacity(-1, 50.0, "testMap");
+ }
+
+ private static boolean isPowerOfTwo(int num) {
+ return num == 0 || (num > 0 && Integer.bitCount(num) == 1);
+ }
+
+ /** Return capacity as percentage of total memory */
+ private static int getPercent(long total, int capacity) {
+ // Reference size in bytes
+ double referenceSize =
+ System.getProperty("sun.arch.data.model").equals("32") ? 4.0 : 8.0;
+ return (int)(((capacity * referenceSize)/total) * 100.0);
+ }
+
+ /** Return capacity as percentage of total memory */
+ private static void testCapacity(long maxMemory, double percent) {
+ int capacity = LightWeightGSet.computeCapacity(maxMemory, percent, "map");
+ LightWeightGSet.LOG.info("Validating - total memory " + maxMemory + " percent "
+ + percent + " returned capacity " + capacity);
+ // Returned capacity is zero or power of two
+ Assert.assertTrue(isPowerOfTwo(capacity));
+
+ // Ensure the capacity returned is the nearest to the asked perecentage
+ int capacityPercent = getPercent(maxMemory, capacity);
+ if (capacityPercent == percent) {
+ return;
+ } else if (capacityPercent > percent) {
+ Assert.assertTrue(getPercent(maxMemory, capacity * 2) > percent);
+ } else {
+ Assert.assertTrue(getPercent(maxMemory, capacity / 2) < percent);
+ }
+ }
+
+ /**
+ * Test for {@link LightWeightGSet#computeCapacity(double, String)}
+ */
+ @Test
+ public void testComputeCapacity() {
+ // Tests for boundary conditions where percent or memory are zero
+ testCapacity(0, 0.0);
+ testCapacity(100, 0.0);
+ testCapacity(0, 100.0);
+
+ // Compute capacity for some 100 random max memory and percentage
+ Random r = new Random();
+ for (int i = 0; i < 100; i++) {
+ long maxMemory = r.nextInt(Integer.MAX_VALUE);
+ double percent = r.nextInt(101);
+ testCapacity(maxMemory, percent);
+ }
+ }
+}