You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2015/11/02 18:15:23 UTC
svn commit: r1712088 - in
/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src:
main/java/org/apache/uima/internal/util/IntHashSet.java
test/java/org/apache/uima/internal/util/IntHashSetTest.java
Author: schor
Date: Mon Nov 2 17:15:23 2015
New Revision: 1712088
URL: http://svn.apache.org/viewvc?rev=1712088&view=rev
Log:
[UIMA-4682] reuse removed slots, add tests
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java?rev=1712088&r1=1712087&r2=1712088&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java Mon Nov 2 17:15:23 2015
@@ -22,7 +22,7 @@ package org.apache.uima.internal.util;
import java.util.Arrays;
import java.util.NoSuchElementException;
-import org.apache.uima.jcas.impl.JCasHashMap;
+import org.apache.uima.util.Misc;
/**
* A set of non-zero ints.
@@ -55,6 +55,10 @@ public class IntHashSet implements Posit
public static final int SIZE_NEEDING_4_BYTES = 256 * 256 - 2;
public static final float DEFAULT_LOAD_FACTOR = 0.66F;
+
+ private static final int REMOVED4 = Integer.MIN_VALUE;
+ private static final int REMOVED2 = Short.MIN_VALUE;
+
// set to true to collect statistics for tuning
// you have to also put a call to showHistogram() at the end of the run
private static final boolean TUNE = false;
@@ -79,6 +83,8 @@ public class IntHashSet implements Posit
private int size; // number of elements in the table
private int nbrRemoved; // number of removed elements (coded as removed)
+ private int removedPositionToUse = -1;
+
// offset only used with keys2. values stored are key - offset
// intent is to have them fit into short data type.
// If the offset is 100,000, then the keys range from 100000 - 32766 to 100000 + 32767, includes "0"
@@ -227,7 +233,7 @@ public class IntHashSet implements Posit
// switch to 4
if (TUNE) {System.out.println("Switching to 4 byte keys, Capacity increasing from " + oldCapacity + " to " + newCapacity);}
for (short adjKey : oldKeys) {
- if (adjKey != 0 && adjKey != Short.MIN_VALUE) {
+ if (adjKey != 0 && adjKey != REMOVED2) {
addInner4(getRawFromAdjKey(adjKey));
}
}
@@ -235,7 +241,7 @@ public class IntHashSet implements Posit
} else {
if (TUNE) {System.out.println("Keeping 2 byte keys, Capacity increasing from " + oldCapacity + " to " + newCapacity);}
for (short adjKey : oldKeys) {
- if (adjKey != 0 && adjKey != Short.MIN_VALUE) {
+ if (adjKey != 0 && adjKey != REMOVED2) {
addInner2(adjKey);
}
}
@@ -246,7 +252,7 @@ public class IntHashSet implements Posit
if (TUNE) {System.out.println("Capacity increasing from " + oldCapacity + " to " + newCapacity);}
newTableKeepSize(newCapacity, true);
for (int key : oldKeys) {
- if (key != 0 && key != Integer.MIN_VALUE) {
+ if (key != 0 && key != REMOVED4) {
addInner4(key);
}
}
@@ -321,25 +327,35 @@ public class IntHashSet implements Posit
* @return the probeAddr in keys array - might have a 0 value, or the key value if found
*/
private int findPosition(final int adjKey) {
+ return findPosition(adjKey, false);
+ }
+
+ private int findPosition(final int adjKey, final boolean includeRemoved) {
if (adjKey == 0) {
throw new IllegalArgumentException("0 is an invalid key");
}
- final int hash = JCasHashMap.hashInt(adjKey);
+ final int hash = Misc.hashInt(adjKey);
int nbrProbes = 1;
int probeDelta = 1;
int probeAddr;
+ removedPositionToUse = -1;
if (keys4 == null) {
final short[] localKeys2 = keys2;
final int bitMask = localKeys2.length - 1;
probeAddr = hash & bitMask;
- while (true) {
+
+ while (true) {
final int testKey = localKeys2[probeAddr];
+ if (includeRemoved && (removedPositionToUse == -1) && (testKey == REMOVED2)) {
+ nbrRemoved --; // because caller will use this as a slot for adding
+ removedPositionToUse = probeAddr;
+ }
if (testKey == 0 || testKey == adjKey) {
- break;
+ break;
}
- nbrProbes++;
+ nbrProbes++;
probeAddr = bitMask & (probeAddr + (probeDelta++));
}
} else {
@@ -348,6 +364,10 @@ public class IntHashSet implements Posit
probeAddr = hash & bitMask;
while (true) {
final int testKey = localKeys4[probeAddr];
+ if (includeRemoved && (removedPositionToUse == -1) && (testKey == REMOVED4)) {
+ nbrRemoved --; // because caller will use this as a slot for adding
+ removedPositionToUse = probeAddr;
+ }
if (testKey == 0 || testKey == adjKey) {
break;
}
@@ -430,7 +450,7 @@ public class IntHashSet implements Posit
final short[] oldKeys = keys2;
newTableKeepSize(getCapacity(), true); // make a 4 table. same size
for (short adjKey : oldKeys) {
- if (adjKey != 0 && adjKey != Short.MIN_VALUE) {
+ if (adjKey != 0 && adjKey != REMOVED2) {
addInner4(getRawFromAdjKey(adjKey));
}
}
@@ -459,34 +479,36 @@ public class IntHashSet implements Posit
}
if (keys4 != null) {
- return find4(rawKey);
+ return find4AndAddIfMissing(rawKey);
// short keys
} else {
int adjKey = getAdjKey(rawKey);
if (isAdjKeyOutOfRange(adjKey)) {
switchTo4byte();
- return find4(rawKey);
+ return find4AndAddIfMissing(rawKey);
// key in range
} else {
- final int i = findPosition(adjKey);
+ final int i = findPosition(adjKey, true); // reuse removed slots
if (keys2[i] == adjKey) {
return false;
}
- keys2[i] = (short) adjKey;
+
+ keys2[ (removedPositionToUse != -1) ? removedPositionToUse : i ] = (short) adjKey;
incrementSize();
return true;
}
}
}
- private boolean find4(int rawKey) {
- final int i = findPosition(rawKey);
+ private boolean find4AndAddIfMissing(int rawKey) {
+ removedPositionToUse = -1;
+ final int i = findPosition(rawKey, true);
if (keys4[i] == rawKey) {
return false;
}
- keys4[i] = rawKey;
+ keys4[ (removedPositionToUse == -1) ? i : removedPositionToUse] = rawKey;
incrementSize();
return true;
}
@@ -552,9 +574,9 @@ public class IntHashSet implements Posit
}
if (keys4 == null) {
- keys2[pos] = Short.MIN_VALUE;
+ keys2[pos] = REMOVED2;
} else {
- keys4[pos] = Integer.MIN_VALUE;
+ keys4[pos] = REMOVED4;
}
size--;
@@ -633,13 +655,13 @@ public class IntHashSet implements Posit
final int adjKey;
if (keys4 == null) {
adjKey = keys2[index];
- if (adjKey == 0 || adjKey == Short.MIN_VALUE) {
+ if (adjKey == 0 || adjKey == REMOVED2) {
return 0; // null, not present
}
return getRawFromAdjKey(adjKey);
} else {
adjKey = keys4[index];
- if (adjKey == 0 || adjKey == Integer.MIN_VALUE) {
+ if (adjKey == 0 || adjKey == REMOVED4) {
return 0; // null, not present
}
return adjKey;
@@ -663,7 +685,7 @@ public class IntHashSet implements Posit
return pos;
}
int v = get(pos);
- if (v != 0 && v != Short.MIN_VALUE) {
+ if (v != 0 && v != REMOVED2) {
return pos;
}
pos++;
@@ -675,7 +697,7 @@ public class IntHashSet implements Posit
return pos;
}
int v = get(pos);
- if (v != 0 && v != Integer.MIN_VALUE) {
+ if (v != 0 && v != REMOVED4) {
return pos;
}
pos ++;
@@ -699,7 +721,7 @@ public class IntHashSet implements Posit
return pos;
}
int v = get(pos);
- if (v != 0 && v != ( (keys4 == null) ? Short.MIN_VALUE : Integer.MIN_VALUE)) {
+ if (v != 0 && v != ( (keys4 == null) ? REMOVED2 : REMOVED4)) {
return pos;
}
pos --;
@@ -801,13 +823,13 @@ public class IntHashSet implements Posit
public void bulkAddTo(IntVector v) {
if (null == keys4) {
for (int k : keys2) {
- if (k != 0 && k != Short.MIN_VALUE) {
+ if (k != 0 && k != REMOVED2) {
v.add(getRawFromAdjKey(k));
}
}
} else {
for (int k : keys4) {
- if (k != 0 && k != Integer.MIN_VALUE) {
+ if (k != 0 && k != REMOVED4) {
v.add(k);
}
}
Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java?rev=1712088&r1=1712087&r2=1712088&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java Mon Nov 2 17:15:23 2015
@@ -20,6 +20,7 @@
package org.apache.uima.internal.util;
import java.util.Arrays;
+import java.util.Random;
import junit.framework.TestCase;
@@ -124,7 +125,62 @@ public class IntHashSetTest extends Test
}
}
+ public void testAddIntoRemovedSlot() {
+ for (int i = 1; i < 100; i++) {
+ ihs.add(i);
+ }
+
+ /** Test with 2 byte numbers */
+ checkRemovedReuse(true);
+
+ ihs = new IntHashSet();
+ for (int i = 1; i < 99; i++) {
+ ihs.add(i);
+ }
+ ihs.add(100000); // force 4 byte
+ checkRemovedReuse(false);
+ }
+ private void checkRemovedReuse(boolean is2) {
+ Random r = new Random();
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+ for (int i = 0; i < 100000; i++) {
+ int v = 1 + r.nextInt(100 + (i % 30000));
+ ihs.remove(v);
+ assertTrue(!(ihs.contains(v)));
+ v = 1 + r.nextInt(100 + (i % 30000));
+ ihs.add(v);
+ assertTrue(ihs.contains(v));
+ }
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 16384 : 32768) );
+
+ for (int i = ((is2) ? 16384 : 32768); i > 128; i = i / 2) {
+ ihs.clear();
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2 || i == 32768) ? i : i/2));
+ ihs.clear();
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2 || i == 32768) ? i : i/2));
+ }
+ ihs.clear();
+
+ assertTrue(ihs.getSpaceUsedInWords() == (is2 ? 128 : 64));
+ for (int i = 1; i < ((is2) ? 100 : 99); i++) {
+ ihs.add(i);
+ }
+ if (!is2) {
+ ihs.add(100000);
+ }
+
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+ for (int i = 0; i < 1000; i++) {
+ int v = 1 + r.nextInt(100);
+ ihs.remove(v);
+ assertTrue(!(ihs.contains(v)));
+ ihs.add(v);
+ assertTrue(ihs.contains(v));
+ }
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+
+ }
}