You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2019/12/18 14:04:44 UTC
[uima-uimaj] 01/01: no jira, experiment, robin hood hashing.
This is an automated email from the ASF dual-hosted git repository.
schor pushed a commit to branch robin-hood-hash
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git
commit 9a426895dfaa761ff74dd1de3d43ed47c79a834d
Author: Marshall Schor <ms...@schor.com>
AuthorDate: Wed Dec 18 09:04:09 2019 -0500
no jira, experiment, robin hood hashing.
Initial results: worse performance for IntHashSet, might be OK for Obj
HashSet
---
.../uima/internal/util/Common_hash_support_rh.java | 561 +++++++++++++++
.../apache/uima/internal/util/IntHashSetRh.java | 794 +++++++++++++++++++++
.../apache/uima/internal/util/ObjHashSetRh.java | 520 ++++++++++++++
.../uima/internal/util/IntHashSetPerfTestRh.java | 219 ++++++
.../uima/internal/util/IntHashSetTestRh.java | 250 +++++++
.../uima/internal/util/ObjHashSetTestRh.java | 185 +++++
6 files changed, 2529 insertions(+)
diff --git a/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java b/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java
new file mode 100644
index 0000000..01c0edd
--- /dev/null
+++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java
@@ -0,0 +1,561 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.function.IntPredicate;
+
+/**
+ * A common superclass for hash maps and hash sets
+ * Uses robin hood with backward shift for deletion
+ * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
+ *
+ * uses linear probing (no delta expansion)
+ *
+ * extra table (1 byte per slot) holds lower 7 bits of hash
+ * and therefore also serves to indicate distance from initial probe =
+ * probe spot % 128 - lower 7 bits of hash (if negative, add 128, unless wrap around?)
+ *
+ * 8th bit , if 1 , is flag showing empty; no tombstones, because using backward-shift technique for removes
+ *
+ * find: stop early, if # probe-spot > distance from initial probe. (mod wrap-around)
+ *
+ */
+public abstract class Common_hash_support_rh {
+
+ // set to true to collect statistics for tuning
+ // you have to also put a call to showHistogram() at the end of the run
+ protected static final boolean TUNE = false;
+
+ protected static final int MIN_SIZE = 10; // 10 / .66 =15.15
+ protected static final int MIN_CAPACITY = 16;
+ protected static final int MIN_CAPACITY_SHRINK = 64; // don't shrink below this - thrashing
+
+ private static final byte LHB_EMPTY = (byte) 0x80;
+ private static final int LHB_HASH_MASK = 0x7f;
+
+ protected final float loadFactor;
+
+ protected final int initialCapacity;
+
+ protected int histogram [];
+ protected int maxProbe = 0;
+
+ protected int sizeWhichTriggersExpansion;
+ private int size = 0; // number of elements in the table
+
+ protected boolean secondTimeShrinkable = false;
+
+ protected abstract boolean is_valid_key(int pos);
+ protected abstract int keys_length();
+ protected abstract void newKeysAndValues(int capacity);
+ protected abstract void clearKeysAndValues();
+
+ protected int bitMask; // = key size -1
+ /**
+ * 7 bits of lower hash map, serves as distance from initial bucket probe
+ * initialize to "empty" = 0x80
+ * If not "empty", the value is
+ * - often the same as the lower 7 bits of the index
+ * - if not that, it's because there's a collision, and some slot with a higher index will have the lower 7 bits of the index
+ * - so, the distance from the initial probe is the probe addr with the matching value - initial probe addr.
+ * - which, is the same as the
+ * ((probe addr with the matching value) & LBH_HASH_MASK)
+ * - matching value
+ */
+ private byte[] lower_hash_bits;
+ protected int initial_probe;
+ protected byte initial_probe_lhb;
+ private int smaller_of_mask;
+
+ private static Common_hash_support_rh tune_instance;
+
+ protected abstract void shift(int prev, int pos);
+ protected abstract void setEmpty(int pos);
+
+ protected abstract void expand_table(); // double table size
+
+ /**
+ * @param initialSizeBeforeExpanding the number of elements the table should hold before expanding
+ */
+ public Common_hash_support_rh(int initialSizeBeforeExpanding) {
+ this(initialSizeBeforeExpanding, 0.66F);
+ }
+
+ public Common_hash_support_rh(int initialSizeBeforeExpanding, float factor) {
+ this.loadFactor = factor;
+ this.initialCapacity = tableSpace(initialSizeBeforeExpanding, factor);
+ lower_hash_bits = init_lhb(this.initialCapacity);
+ bitMask = this.initialCapacity - 1;
+ setSmallerOfMask();
+ if (TUNE) {
+ tune_instance = this;
+ histogram = new int[200];
+ Arrays.fill(histogram, 0);
+ maxProbe = 0;
+ }
+ }
+
+ public Common_hash_support_rh(Common_hash_support_rh orig) {
+ this(orig.initialCapacity);
+ this.sizeWhichTriggersExpansion = orig.sizeWhichTriggersExpansion;
+ this.size = orig.size;
+ this.secondTimeShrinkable = orig.secondTimeShrinkable;
+
+ // copy doesn't do tuning measurements
+ this.histogram = null;
+ }
+
+ public void clear() {
+ // see if size is less than the 1/2 size that triggers expansion
+ if (size < (sizeWhichTriggersExpansion >>> 1)) {
+ // if 2nd time then shrink by 50%
+ // this is done to avoid thrashing around the threshold
+ if (secondTimeShrinkable) {
+ secondTimeShrinkable = false;
+ final int newCapacity = Math.max(initialCapacity, keys_length() >>> 1);
+ if (newCapacity < keys_length()) {
+ newTable(newCapacity); // shrink table by 50%
+ size = 0;
+ resetHistogram();
+ if (PositiveIntSet.IS_TRACE_MODE_SWITCH) {
+ System.out.println("TRAcE_MODE Common_hash clear 2nd time shrinkable, newCapacity=" + newCapacity + ", keys_length: " + keys_length());
+ }
+ return;
+ } else { // don't shrink below minimum
+ clearExisting();
+ if (PositiveIntSet.IS_TRACE_MODE_SWITCH) {
+ System.out.println("TRAcE_MODE Common_hash clear 2nd time shrinkable but nothing done, below minimum: newCapacity=" + newCapacity + ", keys_length: " + keys_length());
+ }
+ return;
+ }
+ } else {
+ if (PositiveIntSet.IS_TRACE_MODE_SWITCH) System.out.println("TRACE_MODE Common_hash clear setting 2nd time shrinkable");
+ secondTimeShrinkable = true;
+ }
+ } else {
+ secondTimeShrinkable = false; // reset this to require 2 triggers in a row
+ }
+ clearExisting();
+ }
+
+ private void clearExisting() {
+ clearKeysAndValues();
+ Arrays.fill(lower_hash_bits, LHB_EMPTY);
+ size = 0;
+ resetHistogram();
+ }
+
+ /** It gets a ref to the current value of table, and then searches that array.
+ *
+ * speedups: avoid looking in lower_hash_bits array for 90% of the time the hit is in the first 2 positions?
+ *
+ * @param hash the hash code of the key
+ * @param is_eq true if the key at the int position is == to the key, or is 0
+ * @return the probeAddr in keys array.
+ * If not found, the probe value is to empty cell or to some other value
+ * Use isEmpty(probeAddr) to distinguish.
+ */
+ protected int findPosition(
+ int hash,
+ IntPredicate is_eq_or_not_present
+ ) {
+
+ int probeAddr = initial_probe = hash & bitMask;
+ final byte initialProbeLhb = initial_probe_lhb = (byte) (hash & LHB_HASH_MASK);
+
+ if (is_eq_or_not_present.test(probeAddr) ) {
+ if (TUNE) {
+ updateHistogram(0);
+ }
+ return probeAddr;
+ }
+ int extraProbes = 0;
+
+ for (;;) {
+
+ byte plhb = lower_hash_bits[probeAddr];
+ // not found if hit an empty slot
+ if (plhb == LHB_EMPTY ||
+ // or the distance to original probe for this slot is < number of probes
+ // (because otherwise, the other item would have replaced if present)
+ extraProbesForEntry(plhb, probeAddr) < extraProbes
+ ) {
+ // not found
+ if (TUNE) {
+ updateHistogram(extraProbes);
+ }
+ return probeAddr;
+ }
+
+ if (plhb == initialProbeLhb) {
+ if (is_eq_or_not_present.test(probeAddr)) {
+ // found
+ if (TUNE) {
+ updateHistogram(extraProbes);
+ }
+ return probeAddr;
+ }
+ // else was collision, not equal
+ }
+
+ extraProbes++;
+
+ probeAddr = incrPos(probeAddr);
+ }
+
+ }
+
+ /**
+ * Find the position for a new (guaranteed not in the table) item
+ *
+ * @param hash the hash code of the key
+ * @return the probeAddr in keys array.
+ * If not found, the probe value is to empty cell or to some other value
+ * Use isEmpty(probeAddr) to distinguish.
+ */
+ protected int findPosition_new(int hash) {
+
+ int extraProbes = 0;
+ int probeAddr = initial_probe = hash & bitMask;
+ initial_probe_lhb = (byte) (hash & LHB_HASH_MASK);
+
+
+ for (;;) {
+
+ byte plhb = lower_hash_bits[probeAddr];
+ // not found if hit an empty slot
+ if (plhb == LHB_EMPTY ||
+ // or the distance to original probe for this slot is < number of probes
+ // (because otherwise, the other item would have replaced if present)
+ extraProbesForEntry(plhb, probeAddr) < extraProbes
+ ) {
+ // not found
+ if (TUNE) {
+ updateHistogram(extraProbes);
+ }
+ return probeAddr;
+ }
+
+ extraProbes++;
+
+ probeAddr = incrPos(probeAddr);
+ }
+ }
+
+ protected boolean isEmpty(int probeAddr) {
+ return lower_hash_bits[probeAddr] == LHB_EMPTY;
+ }
+
+ protected byte setLhb(int pos, byte lbh) {
+ byte r = lower_hash_bits[pos];
+ assert lbh >= 0;
+ if (extraProbesForEntry(lbh, pos) >= 20) {
+ System.out.println("debug");
+ }
+ assert extraProbesForEntry(lbh, pos) < 20;
+ lower_hash_bits[pos] = lbh;
+ return r;
+ }
+
+ protected void remove_common(int position) {
+ int prev = position;
+ int i = incrPos(position);
+ size --;
+ for (;;) {
+ byte lhb = lower_hash_bits[i];
+ if (lhb == LHB_EMPTY || extraProbesForEntry(lhb, i) == 0 ) {
+ lower_hash_bits[prev] = LHB_EMPTY;
+ setEmpty(prev);
+ return;
+ }
+ //debug
+ if (extraProbesForEntry(lhb, prev) > 20) {
+ System.out.println("debug");
+ }
+ lower_hash_bits[prev] = lhb;
+ shift(prev, i);
+
+ prev = i;
+ i = incrPos(i);
+ }
+ }
+
+// /**
+// * This method calls the subclass's copy_to_new_table method,
+// * passing an inner lambda containing common code for copying old to new.
+// *
+// * That inner lambda, when invoked by the copy_to_new_table method,
+// * is passed another lambda of one argument (the old index)
+// * which is called to copy each element.
+// * @param new_capacity
+// * @param old_capacity
+// */
+// private void copyOld2New(int new_capacity, int old_capacity) {
+// copy_to_new_table(
+// new_capacity,
+// old_capacity,
+//
+// // this code assigned to "commonCopy" arg
+// (IntConsumer copyToNew, IntPredicate is_valid_old_key) -> {
+// newTable(new_capacity);
+// for (int i = 0; i < old_capacity; i++) {
+// if (is_valid_old_key.test(i)) {
+// copyToNew.accept(i);
+// }
+// }
+// });
+// }
+
+ /**
+ * advance pos until it points to a non 0 or is 1 past end
+ * If pos is negative, start at 0.
+ * Don't move if pos already has valid key
+ * @param pos -
+ * @return updated pos
+ */
+ protected int moveToNextFilled(int pos) {
+ final int max = keys_length();
+ if (pos < 0) {
+ pos = 0;
+ }
+ while (true) {
+ if (pos >= max) {
+ return pos;
+ }
+ if (is_valid_key(pos)) {
+ return pos;
+ }
+ pos ++;
+ }
+ }
+
+ /**
+ * decrement pos until it points to a non 0 or is -1
+ * If pos is beyond end start at end.
+ * Don't move if pos already has valid key
+ * @param pos -
+ * @return updated pos
+ */
+ protected int moveToPreviousFilled(int pos) {
+ final int max = keys_length();
+ if (pos > max) {
+ pos = max - 1;
+ }
+
+ while (true) {
+ if (pos < 0) {
+ return pos;
+ }
+ if (is_valid_key(pos)) {
+ return pos;
+ }
+ pos --;
+ }
+ }
+
+ // called by clear, increase table size, decrease table size
+ protected void newTable(int capacity) {
+ capacity = Math.max(MIN_SIZE, Misc.nextHigherPowerOf2(capacity));
+ newKeysAndValues(capacity);
+ lower_hash_bits = init_lhb(capacity);
+ setSmallerOfMask();
+ bitMask = capacity - 1;
+ sizeWhichTriggersExpansion = (int)(capacity * loadFactor);
+ }
+
+ protected void incrementSize() {
+ size++;
+ if (size >= sizeWhichTriggersExpansion) {
+ maybeIncreaseTableCapacity();
+ }
+ }
+
+ private void maybeIncreaseTableCapacity() {
+
+ if (TUNE) {
+ int old_capacity = keys_length();
+ int new_capacity = 2 * old_capacity;
+ System.out.println("Capacity increasing from " + old_capacity + " to " + new_capacity + ", size=" + size);
+ }
+ expand_table();
+ }
+
+// /**
+// * only called if actually found and removed an entry
+// */
+// protected void commonRemove() {
+// size --;
+//// debugValidate();
+// }
+
+ public int size() {
+ return size;
+ }
+
+ protected void resetHistogram() {
+// if (TUNE) {
+// histogram = new int[200];
+// Arrays.fill(histogram, 0);
+// maxProbe = 0;
+// }
+ }
+
+ private void updateHistogram(int extraProbes) {
+ histogram[extraProbes] = 1 + histogram[extraProbes];
+ if (maxProbe < extraProbes) {
+ maxProbe = extraProbes;
+ }
+ }
+
+ public void showHistogram() {
+ if (TUNE) {
+ int sumI = 0;
+ for (int i : histogram) {
+ sumI += i;
+ }
+
+ System.out.format(
+ "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr of find operations at last table size=%,d%n",
+ loadFactor, maxProbe, sumI);
+ for (int i = 0; i <= maxProbe; i++) {
+ if (i == maxProbe && histogram[i] == 0) {
+ System.out.println("huh?");
+ }
+ System.out.println(i + ": " + histogram[i]);
+ }
+
+ System.out.println("bytes / entry = " + (float) (keys_length()) * 8 / size());
+ System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+ size(),
+ (int) ((keys_length() >>> 1) * loadFactor),
+ (int) (keys_length() * loadFactor));
+ }
+ }
+
+ static {
+ if (TUNE) {
+ Runtime.getRuntime().addShutdownHook(new Thread(null, () -> {
+ tune_instance.showHistogram();
+ }));
+ }
+ }
+
+ // test case use
+ int getCapacity() {
+ return keys_length();
+ }
+
+ protected abstract class CommonKeyIterator implements IntListIterator {
+
+ protected int curPosition;
+
+ protected final int firstPosition;
+
+ protected CommonKeyIterator() {
+ this.curPosition = moveToNextFilled(0);
+ firstPosition = curPosition;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return curPosition < keys_length() && curPosition >= 0;
+ }
+
+ @Override
+ public boolean hasPrevious() {
+ if (curPosition > keys_length() || curPosition <= 0) {
+ return false;
+ }
+
+ int test = moveToPreviousFilled(curPosition - 1);
+ return test >= 0;
+ }
+
+ @Override
+ /**
+ * @see org.apache.uima.internal.util.IntListIterator#moveToStart()
+ */
+ public void moveToStart() {
+ curPosition = moveToNextFilled(0);
+ }
+
+ @Override
+ /**
+ * @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
+ */
+ public void moveToEnd() {
+ curPosition = moveToPreviousFilled(bitMask);
+ }
+
+ }
+
+ /**
+ *
+ * @param numberOfElements -
+ * @param factor -
+ * @return capacity of the main table (either 2 byte or 4 byte entries)
+ */
+ public static int tableSpace(int numberOfElements, Float factor) {
+ if (numberOfElements < 0) {
+ throw new IllegalArgumentException("must be > 0");
+ }
+ final int capacity = Math.round(numberOfElements / factor);
+ return Math.max(16, Misc.nextHigherPowerOf2(capacity));
+ }
+
+ protected void debugValidate() {
+ // count non-0, non-removed, compare to size
+ int sum = 0;
+ for (int i = 0; i < keys_length(); i ++) {
+ if (is_valid_key(i)) {
+ sum ++;
+ if (sum > size) {
+ System.out.println("debug");
+ }
+ }
+ }
+ }
+
+ protected int incrPos(int pos) {
+ return bitMask & (pos + 1);
+ }
+
+ private byte[] init_lhb(int size) {
+ byte[] lhb = new byte[size];
+ Arrays.fill(lhb, LHB_EMPTY);
+ return lhb;
+ }
+
+ /**
+ * Compute extra probes for an entry.
+ * Handle cases where the table size is bigger or smaller than the
+ * size of the lower-hash-bits
+ * @param entry the lower-hash-bits, represents the original probe's last 7 bits
+ * @param pos the current position
+ * @return the extra distance of this entry from the original probe
+ */
+ private int extraProbesForEntry(byte entry, int pos) {
+ return smaller_of_mask & (pos - entry);
+ }
+
+ private void setSmallerOfMask () {
+ smaller_of_mask = Math.min(lower_hash_bits.length, 128) - 1;
+ }
+}
diff --git a/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java b/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java
new file mode 100644
index 0000000..571be51
--- /dev/null
+++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java
@@ -0,0 +1,794 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.NoSuchElementException;
+
+import org.apache.uima.util.impl.Constants;
+
+/**
+ * A set of non-zero ints.
+ * Can be negative.
+ *
+ * 0 reserved internally to indicate "not in the map";
+ * you will get an exception if you try to store 0 as a value.
+ *
+ * 0 will be returned if the value is missing from the map.
+ *
+ * allowed range is Integer.MIN_VALUE to Integer.MAX_VALUE
+ * 0 is the value for an empty cell
+ * Integer.MIN_VALUE is the value for a deleted (removed) value
+ *
+ * based on Int2IntHashMap
+ * This impl is for use in a single thread case only
+ *
+ * Supports shrinking (reallocating the big table)
+ *
+ * Supports representing ints as "short" 2byte values if possible,
+ * together with an offset amount.
+ * Because of the offset, the adjusted key could be == to the offset,
+ * so we subtract 1 from it to preserve 0 value as being the null / empty.
+ * For short values, the range is:
+ * Short.MIN_VALUE+2 to Short.MAX_VALUE after Offset,
+ * with the "0" value moved down by 1 and
+ * the Short.MIN_VALUE used for deleted (removed) items
+ *
+ * Automatically switches to full int representation if needed
+ */
+public class IntHashSetRh extends Common_hash_support_rh implements PositiveIntSet {
+
+ public static final int SIZE_NEEDING_4_BYTES = 256 * 256 - 2;
+
+ // this load factor gives, for array doubling strategy:
+ // between 1.5 * 4 bytes (1 word for key) = 6 and
+ // 3 * 4 12 bytes per entry
+ // This compares with 160 bytes/entry for the IntArrayRBT impl
+
+ // offset only used with keys2. values stored are key - offset
+ // intent is to have them fit into short data type.
+ // If the offset is 100,000, then the keys range from 100000 - 32766 to 100000 + 32767, includes "0"
+ // -32767 == Short.MIN_VALUE + 1,
+ // Numbers 0 and below are stored as n - 1, so "0" itself isn't actually stored
+ // because it's used to represent an empty slot
+ // - value after key adjustment of 0 reserved; if value is 0 - means no entry
+ // store values at or below the offset are stored as 1 less to avoid mapping to "0".
+
+ private static final short EMPTY2 = 0;
+ private int offset;
+
+ private int [] keys4;
+ private short[] keys2;
+
+ private boolean isMake4;
+
+ // these are true values (before any offset adjustment)
+ private int mostPositive = Integer.MIN_VALUE;
+ private int mostNegative = Integer.MAX_VALUE;
+
+ public IntHashSetRh() {
+ this(10, 0);
+ }
+
+ public IntHashSetRh(int initialCapacity) {
+ this(initialCapacity, 0);
+ }
+
+ /**
+ *
+ * @param initialSizeBeforeExpanding - you can add this many before expansion
+ * @param offset - for values in the short range, the amount to subtract
+ * before storing.
+ * If == MIN_VALUE, then force 4 byte ints
+ */
+ public IntHashSetRh(int initialSizeBeforeExpanding, int offset) {
+ super(initialSizeBeforeExpanding);
+ isMake4 = offset == Integer.MIN_VALUE;
+ this.offset = isMake4 ? 0 : offset;
+ newTable(this.initialCapacity);
+ resetTable();
+ if (IS_TRACE_MODE_SWITCH) {
+ System.out.println("TRACE_MODE new IntHashSet, sizeBeforeExpanding = " + initialSizeBeforeExpanding + ", offset= " + offset);
+ }
+ }
+
+// /**
+// * The number of 32 bit words that are reserved when
+// * creating a table to hold the specified number of elements
+// *
+// * The number is a power of 2.
+// *
+// * The number is at least 16.
+// *
+// * The number is such that you could add this many elements without
+// * triggering the capacity expansion.
+// *
+// * @param numberOfElements -
+// * @return -
+// */
+// public int tableSpace(int numberOfElements) {
+// return tableSpace(numberOfElements, loadFactor);
+// }
+//
+
+ public static int tableSpace(int numberOfElements) {
+ return Common_hash_support.tableSpace(numberOfElements, 0.66f);
+ }
+
+
+ /**
+ * Method called by handleHashSet in PositiveIntSet
+ * to indicate if adding this many items would cause an expansion
+ * @return true if would not expand
+ */
+ public boolean wontExpand() {
+ return wontExpand(1);
+ }
+
+ /**
+ * Method called by handleHashSet in PositiveIntSet
+ * to indicate if adding this many items would cause an expansion
+ * @param n the number of items added
+ * @return true if would not expand
+ */
+ public boolean wontExpand(int n) {
+ return (size() + n) < sizeWhichTriggersExpansion;
+ }
+
+ public int getSpaceUsedInWords() {
+ return (keys4 != null) ? keys4.length : (keys2.length >> 1);
+ }
+
+ public static int getSpaceOverheadInWords() {
+ return 11;
+ }
+
+ /**
+ * Only call this if using short values with offset
+ * @param adjKey
+ * @return raw key
+ */
+ private int getRawFromAdjKey(int adjKey) {
+ assert (adjKey != Short.MIN_VALUE);
+ return adjKey + offset + ((adjKey < 0) ? 1 : 0);
+ }
+
+
+ private void resetTable() {
+ mostPositive = Integer.MIN_VALUE;
+ mostNegative = Integer.MAX_VALUE;
+// resetHistogram();
+// size = 0;
+// nbrRemoved = 0;
+ }
+
+ @Override
+ public void clear() {
+ super.clear();
+ resetTable();
+ }
+
+
+ // only called when keys are shorts
+ private boolean isAdjKeyOutOfRange(int adjKey) {
+ return (adjKey > Short.MAX_VALUE ||
+ // the minimum adjKey value stored in a short is
+ // Short.MIN_VALUE + 1
+ adjKey <= Short.MIN_VALUE);
+ }
+
+ @Override
+ public boolean contains(int rawKey) {
+ int pos = findPosition(rawKey);
+ if (isEmpty(pos)) {
+ return false;
+ }
+ return get(pos) == rawKey;
+ }
+
+ /**
+ * This method is part of the PositiveSet API, and
+ * is defined to return an int that could be used with
+ * iterators to position them.
+ *
+ * For this case, it is not used, because
+ * the iterators don't support positioning this way
+ * because they are not sorted.
+ */
+ @Override
+ public int find(int rawKey) {
+ throw new UnsupportedOperationException();
+ }
+
+
+ /**
+ * @param rawKey the key value to find
+ * @return the position in the table if present, otherwise the position of the slot where the
+ * key value would be added, unless the new value is at a position which would require
+ * the key2 form to be switched to the key4 form, in which case,
+ * -1 is returned (means not found, and requires conversion to 4 byte keys)
+ */
+ private int findPosition(int rawKey) {
+
+ if (rawKey == 0) {
+ throw new IllegalArgumentException("0 is an invalid key");
+ }
+
+ if (keys4 == null) {
+ // special handling for 2 byte keys with offsets
+ // check for keys never stored in short table
+ // adjKey of Short.MIN_VALUE which is the removed flag
+ final int adjKey = getAdjKey(rawKey);
+ if (isAdjKeyOutOfRange(adjKey)) {
+ return -1;
+ }
+
+
+ return findPositionAdjKey(adjKey);
+
+ } else {
+
+
+ return findPosition4(rawKey);
+ }
+
+ }
+
+ private int findPosition4(int rawKey) {
+
+ return super.findPosition(
+
+ // key hash
+ Misc.hashInt(rawKey),
+
+ // is_eq || not present
+ i -> keys4[i] == rawKey || keys4[i] == 0
+
+ );
+ }
+
+ private int findPositionAdjKey(int adjKey) {
+ // special handling for 2 byte keys with offsets
+ // check for keys never stored in short table
+ // adjKey of Short.MIN_VALUE which is the removed flag
+
+ return super.findPosition(
+
+ // key hash
+ Misc.hashInt(adjKey),
+
+ // is_eq
+ i -> keys2[i] == 0 || keys2[i] == adjKey
+ );
+ }
+
+
+ /**
+ * return the adjusted key.
+ * never called for 4 byte form
+ * for 2 byte key mode, subtract the offset, and adjust by -1 if 0 or less
+ * Note: returned value can be less than Short.MIN_VALUE
+ * @param rawKey
+ * @return adjusted key, a range from negative to positive, but never 0
+ */
+ private int getAdjKey(int rawKey) {
+// if (rawKey == 0 || (rawKey == Integer.MIN_VALUE)) {
+// throw new ArrayIndexOutOfBoundsException(rawKey);
+// }
+// if (keys4 != null) {
+// return rawKey;
+// }
+ int adjKey = rawKey - offset;
+ return adjKey - ( (adjKey <= 0) ? 1 : 0);
+ }
+
+ private void switchTo4byte(int capacity) {
+ // convert to 4 byte because values can't be offset and fit in a short
+ final short[] oldKeys = keys2;
+ isMake4 = true;
+ newTable(capacity); // make a 4 table. same size
+ for (short adjKey : oldKeys) {
+ if (adjKey != EMPTY2 ) {
+ find4AndAddIfMissing(getRawFromAdjKey(adjKey));
+ }
+ }
+ }
+
+ /**
+ *
+ * @param rawKey -
+ * @return true if this set did not already contain the specified element
+ */
+ @Override
+ public boolean add(int rawKey) {
+ if (rawKey == 0) {
+ throw new IllegalArgumentException("argument must be non-zero");
+ }
+
+ if (size() == 0) {
+ mostPositive = mostNegative = rawKey;
+ } else {
+ if (rawKey > mostPositive) {
+ mostPositive = rawKey;
+ }
+ if (rawKey < mostNegative) {
+ mostNegative = rawKey;
+ }
+ }
+
+ if (keys4 != null) {
+ return maybeAdd4(rawKey);
+ // short keys
+ } else {
+ int adjKey = getAdjKey(rawKey);
+ if (isAdjKeyOutOfRange(adjKey)) {
+ switchTo4byte(getCapacity());
+ return maybeAdd4(rawKey);
+
+ // key in range
+ } else {
+ return maybeAdd2(adjKey);
+ }
+ }
+ }
+
+ private boolean maybeAdd2(int adjKey) {
+ boolean wasAdded = find2AndAddIfMissing(adjKey);
+ if (wasAdded) {
+ incrementSize();
+ }
+ return wasAdded;
+ }
+
+ private boolean maybeAdd4(int rawKey) {
+ boolean wasAdded = find4AndAddIfMissing(rawKey);
+ if (wasAdded) {
+ incrementSize();
+ }
+ return wasAdded;
+ }
+
+ private boolean find4AndAddIfMissing(int rawKey) {
+
+ int i = findPosition4(rawKey);
+ if (keys4[i] == rawKey) {
+ return false; // already in table
+ }
+
+ int c = keys4[i];
+ int saved = c;
+
+ byte prev_lhb = setLhb(i, initial_probe_lhb);
+ keys4[i] = rawKey;
+
+ if (saved == 0) {
+ return true;
+ }
+ // robin hood - if stole slot, add that slot back
+ rh_add4(incrPos(i), saved, prev_lhb);
+ return true;
+ }
+
+ private void rh_add4(int pos, int v, byte lhb) {
+ while (true) {
+ int c = keys4[pos];
+ int saved = c; // if empty, is 0
+
+ byte prev_lhb = setLhb(pos, lhb);
+ keys4[pos] = v;
+
+ if (saved == 0) {
+ break;
+ }
+ // robin hood - if stole slot, add that slot back
+ v = saved;
+ lhb = prev_lhb;
+ pos = incrPos(pos);
+ }
+ }
+
+ private boolean find2AndAddIfMissing(int adjKey) {
+ int i = findPositionAdjKey(adjKey);
+
+ short c = keys2[i];
+ if (c == (short)adjKey) {
+ return false; // already in table
+ }
+
+ short saved = c; // if empty, is 0
+
+ byte prev_lhb = setLhb(i, initial_probe_lhb);
+ keys2[i] = (short) adjKey;
+
+ if (saved == 0) {
+ return true;
+ }
+ // robin hood - if stole slot, add that slot back
+ rh_add2(incrPos(i), saved, prev_lhb);
+ return true;
+ }
+
+ private void rh_add2(int pos, short v, byte lhb) {
+ while (true) {
+ short c = keys2[pos];
+ short saved = c; // if empty, is 0
+
+ byte prev_lhb = setLhb(pos, lhb);
+ keys2[pos] = v;
+
+ if (saved == 0) {
+ break;
+ }
+ // robin hood - if stole slot, add that slot back
+ v = saved;
+ lhb = prev_lhb;
+ pos = incrPos(pos);
+ }
+
+ }
+
+ @Override
+ protected void shift(int prev, int next) {
+ if (keys4 == null) {
+ keys2[prev] = keys2[next];
+ } else {
+ keys4[prev] = keys4[next];
+ }
+ }
+
+ @Override
+ protected void setEmpty(int pos) {
+ if (keys4 == null) {
+ keys2[pos] = 0;
+ } else {
+ keys4[pos] = 0;
+ }
+
+ }
+ /**
+ * mostPositive and mostNegative are not updated
+ * for removes. So these values may be inaccurate,
+ * but mostPositive is always >= actual most positive,
+ * and mostNegative is always <= actual most negative.
+ * No conversion from int to short
+ *
+ * Does slot shifting on remove
+ *
+ * @param rawKey the value to remove
+ * @return true if the key was present
+ */
+ @Override
+ public boolean remove(int rawKey) {
+// debugValidate();
+ final int pos = findPosition(rawKey);
+ if (isEmpty(pos)) {
+ return false;
+ }
+
+ if (get(pos) != rawKey) {
+ return false;
+ }
+ remove_common(pos);
+
+// //debug
+// if (size() <= 0)
+// System.out.println("debug");
+// assert size() > 0;
+
+ if (rawKey == mostPositive) {
+ mostPositive --; // a weak adjustment
+ }
+ if (rawKey == mostNegative) {
+ mostNegative ++; // a weak adjustment
+ }
+
+ return true;
+ }
+
+ /**
+ *
+ * @return a value that is >= the actual most positive value in the table.
+ * it will be == unless a remove operation has removed a most positive value
+ */
+ public int getMostPositive() {
+ return mostPositive;
+ }
+
+ /**
+ *
+ * @return a value that is <= the actual least positive value in the table.
+ * It will be == unless remove operations has removed a least positive value.
+ */
+ public int getMostNegative() {
+ return mostNegative;
+ }
+
+ public void showHistogram() {
+ if (TUNE) {
+ int sumI = 0;
+ for (int i : histogram) {
+ sumI += i;
+ }
+
+ System.out.format(
+ "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr of find operations at last table size=%,d%n",
+ loadFactor, maxProbe, sumI);
+ for (int i = 0; i <= maxProbe; i++) {
+ if (i == maxProbe && histogram[i] == 0) {
+ System.out.println("huh?");
+ }
+ System.out.println(i + ": " + histogram[i]);
+ }
+
+ if (keys4 == null) {
+ System.out.println("bytes / entry = " + (float) (keys2.length) * 2 / size());
+ System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+ size(),
+ (int) ((keys2.length >>> 1) * loadFactor),
+ (int) (keys2.length * loadFactor));
+ } else {
+ System.out.println("bytes / entry = " + (float) (keys4.length) * 4 / size());
+ System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+ size(),
+ (int) ((keys4.length >>> 1) * loadFactor),
+ (int) (keys4.length * loadFactor));
+ }
+ }
+ }
+
+ /**
+ * For iterator use, position is a magic number returned by the internal find
+ * For short keys, the value stored for adjKey == 0 is -1, adjKey == -1 is -2, etc.
+ */
+ @Override
+ public int get(int pos) {
+ final int adjKey;
+ if (keys4 == null) {
+ adjKey = keys2[pos];
+ if (adjKey == 0 ) {
+ return 0; // null, not present
+ }
+ return getRawFromAdjKey(adjKey);
+ } else {
+ adjKey = keys4[pos];
+ if (adjKey == 0 ) {
+ return 0; // null, not present
+ }
+ return adjKey;
+ }
+ }
+
+
+ private class IntHashSetIterator implements IntListIterator {
+
+ private int curPosition;
+
+ private IntHashSetIterator() {
+ this.curPosition = 0;
+ }
+
+ public final boolean hasNext() {
+ curPosition = moveToNextFilled(curPosition);
+ return curPosition < getCapacity();
+ }
+
+ public final int nextNvc() {
+ curPosition = moveToNextFilled(curPosition);
+ int r = get(curPosition);
+ curPosition = moveToNextFilled(curPosition + 1);
+ return r;
+ }
+
+ /**
+ * @see org.apache.uima.internal.util.IntListIterator#hasPrevious()
+ */
+ public boolean hasPrevious() {
+ int prev = moveToPreviousFilled(curPosition - 1);
+ return (prev >= 0);
+ }
+
+ @Override
+ public int previous() {
+ curPosition = moveToPreviousFilled(curPosition - 1);
+ if (curPosition < 0) {
+ throw new NoSuchElementException();
+ }
+ return get(curPosition);
+ }
+
+ @Override
+ public int previousNvc() {
+ curPosition = moveToPreviousFilled(curPosition - 1);
+ return get(curPosition);
+ }
+
+ /**
+ * @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
+ */
+ public void moveToEnd() {
+ curPosition = getCapacity() - 1;
+ }
+
+ /**
+ * @see org.apache.uima.internal.util.IntListIterator#moveToStart()
+ */
+ public void moveToStart() {
+ curPosition = 0;
+ }
+ }
+
+
+ @Override
+ public IntListIterator iterator() {
+ return new IntHashSetIterator();
+ }
+
+ @Override
+ public int moveToFirst() {
+ return (size() == 0) ? -1 : moveToNextFilled(0);
+ }
+
+ @Override
+ public int moveToLast() {
+ return (size() == 0) ? -1 : moveToPreviousFilled(getCapacity() -1);
+ }
+
+ @Override
+ public int moveToNext(int position) {
+ if (position < 0) {
+ return position;
+ }
+ final int n = moveToNextFilled(position + 1);
+ return (n >= getCapacity()) ? -1 : n;
+ }
+
+ @Override
+ public int moveToPrevious(int position) {
+ if (position >= getCapacity()) {
+ return -1;
+ }
+ return moveToPreviousFilled(position - 1);
+ }
+
+ @Override
+ public boolean isValid(int position) {
+ return (position >= 0) && (position < getCapacity());
+ }
+
+ @Override
+ public void bulkAddTo(IntVector v) {
+ if (null == keys4) {
+ for (int k : keys2) {
+ if (k != 0) {
+ v.add(getRawFromAdjKey(k));
+ }
+ }
+ } else {
+ for (int k : keys4) {
+ if (k != 0) {
+ v.add(k);
+ }
+ }
+ }
+ }
+
+ @Override
+ public int[] toIntArray() {
+ final int s = size();
+ if (s == 0) {
+ return Constants.EMPTY_INT_ARRAY;
+ }
+ final int[] r = new int[size()];
+ int pos = moveToFirst();
+ for (int i = 0; i < r.length; i ++) {
+ r[i] = get(pos);
+ pos = moveToNextFilled(pos + 1);
+ }
+ return r;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ return String
+ .format(
+ "IntHashSet [loadFactor=%s, initialCapacity=%s, sizeWhichTriggersExpansion=%s, size=%s, offset=%s%n keys4=%s%n keys2=%s%n secondTimeShrinkable=%s, mostPositive=%s, mostNegative=%s]",
+ loadFactor, initialCapacity, sizeWhichTriggersExpansion, size(), offset,
+ Arrays.toString(keys4), Arrays.toString(keys2), secondTimeShrinkable, mostPositive,
+ mostNegative);
+ }
+
+ // for testing
+ boolean isShortHashSet() {
+ return keys2 != null;
+ }
+
+ int getOffset() {
+ return offset;
+ }
+
+ @Override
+ protected boolean is_valid_key(int pos) {
+ if (keys4 == null) {
+ return keys2[pos] != 0;
+ }
+ return keys4[pos] != 0;
+ }
+
+ @Override
+ protected int keys_length() {
+ return (keys4 == null) ? keys2.length : keys4.length;
+ }
+
+ @Override
+ protected void newKeysAndValues(int capacity) {
+ if (isMake4) {
+ keys4 = new int[capacity];
+ keys2 = null;
+ } else {
+ keys2 = new short[capacity];
+ keys4 = null;
+ }
+ }
+
+ @Override
+ protected void clearKeysAndValues() {
+ if (keys4 == null) {
+ Arrays.fill(keys2, (short)0);
+ } else {
+ Arrays.fill(keys4, 0);
+ }
+ resetTable();
+ }
+
+ @Override
+ protected void expand_table() {
+ int old_capacity = keys_length();
+ int new_capacity = old_capacity * 2;
+
+ if (keys4 == null) {
+ if (new_capacity >= 256 * 256) {
+ // switch to 4
+ if (TUNE) {System.out.println("Switching to 4 byte keys");}
+ switchTo4byte(new_capacity);
+ } else {
+ final short[] oldKeys = keys2;
+ newTable(new_capacity);
+ for (int v :oldKeys) {
+ if (v != 0) {
+ find2AndAddIfMissing(v);
+ }
+ }
+ }
+ } else {
+ // keys4
+ final int[] oldKeys = keys4;
+ newTable(new_capacity);
+ for (int v :oldKeys) {
+ if (v != 0) {
+ find4AndAddIfMissing(v);
+ }
+ }
+ }
+ }
+
+}
diff --git a/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java b/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java
new file mode 100644
index 0000000..d70c7d3
--- /dev/null
+++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java
@@ -0,0 +1,520 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.lang.reflect.Array;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.uima.cas.FeatureStructure;
+
+/**
+ * A set of Objects of type T
+
+ * This impl is for use in a single thread case only, when table is being updated.
+ * Multiple reader threads are OK if there's no writing.
+ *
+ * Supports shrinking (reallocating the big table)
+ *
+ * Uses robin hood method, with
+ * incrementing collision resolution, and shift backwards on remove, and 1 byte lower-hash-bits aux table
+ * for locality of ref.
+ */
+public class ObjHashSetRh<T> extends Common_hash_support_rh implements Set<T> {
+
+// public static final float DEFAULT_LOAD_FACTOR = 0.66F;
+
+ private final Class<T> clazz; // for reifying the T type, resizing table, toArray operations
+
+// private final float loadFactor = DEFAULT_LOAD_FACTOR;
+
+// private final int initialCapacity;
+//
+// private int histogram [];
+// private int maxProbe = 0;
+//
+// private int sizeWhichTriggersExpansion;
+// private int size; // number of elements in the table
+// private int nbrRemoved; // number of removed elements (coded as removed)
+
+ // the actual Object table, operated as a hashtable
+ private T [] keys;
+// final private T [] emptyKeyArray;
+
+// private boolean secondTimeShrinkable = false;
+
+// private int modificationCount = 0; // not currently used
+
+ public ObjHashSetRh(Class<T> clazz) {
+ this(12, clazz); // default initial size
+ }
+
+ /**
+ * @param initialCapacity - you can add this many before expansion
+ * @param clazz - a superclass of the stored items
+ * @param removedMarker - a unique value never stored in the table, used to mark removed items
+ */
+ public ObjHashSetRh(int initialCapacity, Class<T> clazz) {
+ super(initialCapacity);
+ this.clazz = clazz;
+ newTable(initialCapacity);
+ }
+
+ /**
+ * Copy constructor
+ * @param ohs -
+ */
+ public ObjHashSetRh(ObjHashSetRh<T> ohs) {
+ super(ohs);
+ this.clazz = ohs.clazz;
+ this.keys = Arrays.copyOf(ohs.keys, ohs.keys.length);
+ }
+
+
+ public ObjHashSetRh(ObjHashSetRh<T> ohs, boolean readOnly) {
+ this(ohs);
+ if (!readOnly) Misc.internalError();
+ }
+
+
+ /**
+ * returns a position in the key/value table
+ * if the key is not found, then the position will be to the
+ * place where the key would be put upon adding (without reusing REMOVED, and the
+ * current internal value of keys[position] would be 0.
+ *
+ * if the key is found, then keys[position] == key
+ * @param obj the object to find
+ * @return the probeAddr in keys array - might reference a slot holding null, or the key value if found
+ */
+ private int findPosition(final T key) {
+ if (key == null) {
+ throw new IllegalArgumentException("null is an invalid key");
+ }
+
+ return findPosition(
+
+ // key hash
+ Misc.hashInt(key.hashCode()),
+
+ // is_eq
+ i -> keys[i].equals(key));
+ }
+
+
+ @Override
+ public boolean contains(Object obj) { // arg must be Object to fit Collection API
+ return (clazz.isAssignableFrom(obj.getClass())) ? (find((T) obj) != -1) : false;
+ }
+
+ /**
+ * @param obj the object to find in the table (if it is there)
+ * @return the position of obj in the table, or -1 if not in the table
+ */
+ public int find(T obj) {
+ if (obj == null || size() == 0) {
+ return -1;
+ }
+
+ final int pos = findPosition(obj);
+ return obj.equals(keys[pos]) ? pos : -1; // keys[pos] can be null
+ }
+
+ /**
+ *
+ * @param obj - the object to add
+ * @return true if this set did not already contain the specified element
+ */
+ @Override
+ public boolean add(T obj) {
+ final int i = findPosition(obj);
+ T c = keys[i];
+ if (obj.equals(c)) { // keys[i] may be null
+ return false; // false if already present
+ }
+
+ // i is ref to empty or non-empty (but not equal) slot which should be stolen
+ add_new(obj, c, i);
+ incrementSize();
+ return true;
+ }
+
+ private void add_new(T obj, T saved, int i) {
+ keys[i] = obj;
+ byte prev_lhb = setLhb(i, initial_probe_lhb);
+
+ if (saved != null) {
+ rh_add(incrPos(i), saved, prev_lhb);
+ }
+ }
+
+ private void rh_add(int pos,T v, byte lhb) {
+ while (true) {
+ T c = keys[pos];
+ T saved = c; // if empty, is 0
+
+ byte prev_lhb = setLhb(pos, lhb);
+ keys[pos] = v;
+
+ if (saved == null) {
+ break;
+ }
+ // robin hood - if stole slot, add that slot back
+ v = saved;
+ lhb = prev_lhb;
+ pos = incrPos(pos);
+ }
+ }
+
+ @Override
+ protected void shift(int prev, int next) {
+ keys[prev] = keys[next];
+ }
+
+ @Override
+ protected void setEmpty(int pos) {
+ keys[pos] = null;
+ }
+
+
+ /**
+ * Using robin hood shifting for removed item
+ * @param rawKey the value to remove
+ * @return true if the key was present
+ */
+ @Override
+ public boolean remove(Object rawKey) {
+ if (rawKey == null) {
+ return false;
+ }
+
+ final int pos = findPosition((T) rawKey); // null or equal or collision obj
+ T c = keys[pos];
+ if (c == null || ! c.equals(rawKey)) {
+ return false;
+ }
+
+ remove_common(pos);
+ return true;
+ }
+
+ private void removeAtPosition(int pos) {
+ remove_common(pos);
+ }
+
+
+// /**
+// * @see Set#size()
+// */
+// @Override
+// public int size() {
+// return size;
+// }
+
+// public void showHistogram() {
+// if (TUNE) {
+// int sumI = 0;
+// for (int i : histogram) {
+// sumI += i;
+// }
+//
+// System.out.format(
+// "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr of find operations at last table size=%,d%n",
+// loadFactor, maxProbe, sumI);
+// for (int i = 0; i <= maxProbe; i++) {
+// if (i == maxProbe && histogram[i] == 0) {
+// System.out.println("huh?");
+// }
+// System.out.println(i + ": " + histogram[i]);
+// }
+//
+// System.out.println("bytes / entry = " + (float) (keys.length) * 4 / size());
+// System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+// size(),
+// (int) ((keys.length >>> 1) * loadFactor),
+// (int) (keys.length * loadFactor));
+// }
+// }
+
+ /**
+ */
+ /**
+ * For iterator use
+ * @param index - a magic number returned by the internal find
+ * @return the T at that spot, or null if nothing there
+ */
+ public T get(int index) {
+ T obj = keys[index];
+ if (obj == null) {
+ return null; // null, not present
+ }
+ return obj;
+ }
+
+//// /**
+//// * advance pos until it points to a non 0 or is 1 past end
+//// * @param pos -
+//// * @return updated pos
+//// */
+//// public int moveToNextFilled(int pos) {
+////// if (pos < 0) {
+////// pos = 0;
+////// }
+////
+//// final int max = getCapacity();
+//// while (true) {
+//// if (pos >= max) {
+//// return pos;
+//// }
+//// T v = keys[pos];
+//// if (v != null && v != removedMarker) {
+//// return pos;
+//// }
+//// pos ++;
+//// }
+//// }
+//
+// /**
+// * decrement pos until it points to a non 0 or is -1
+// * @param pos -
+// * @return updated pos
+// */
+// public int moveToPreviousFilled(int pos) {
+// final int max = getCapacity();
+// if (pos > max) {
+// pos = max - 1;
+// }
+//
+// while (true) {
+// if (pos < 0) {
+// return pos;
+// }
+// T v = keys[pos];
+// if (v != null && v != removedMarker) {
+// return pos;
+// }
+// pos --;
+// }
+// }
+
+ private class ObjHashSetIterator implements Iterator<T> {
+
+ /**
+ * Keep this always pointing to a non-0 entry, or
+ * if not valid, outside the range
+ */
+ protected int curPosition;
+
+ private ObjHashSetIterator() {
+ this.curPosition = moveToNextFilled(0);
+ }
+
+ @Override
+ public final boolean hasNext() {
+ return curPosition < getCapacity();
+ }
+
+ @Override
+ public final T next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+ T r = get(curPosition);
+ curPosition = moveToNextFilled(curPosition + 1);
+ return r;
+ }
+
+ @Override
+ public void remove() {
+ int pos = moveToPreviousFilled(curPosition - 1);
+ if (pos >= 0) {
+ removeAtPosition(pos);
+ }
+ }
+ }
+
+
+ @Override
+ public Iterator<T> iterator() {
+ return new ObjHashSetIterator();
+ }
+
+ private int moveToFirst() {
+ return (size() == 0) ? -1 : moveToNextFilled(0);
+ }
+
+// private int moveToLast() {
+// return (size() == 0) ? -1 : moveToPreviousFilled(getCapacity() -1);
+// }
+
+// private int moveToNext(int position) {
+// if (position < 0) {
+// return position;
+// }
+// final int n = moveToNextFilled(position + 1);
+// return (n >= getCapacity()) ? -1 : n;
+// }
+//
+// private int moveToPrevious(int position) {
+// if (position >= getCapacity()) {
+// return -1;
+// }
+// return moveToPreviousFilled(position - 1);
+// }
+
+// public boolean isValid(int position) {
+// return (position >= 0) && (position < getCapacity());
+// }
+
+ /**
+ * if the fs is in the set, the iterator should return it.
+ * if not, return -1 (makes iterator invalid)
+ * @param fs position to this fs
+ * @return the index if present, otherwise -1;
+ */
+ public int moveTo(FeatureStructure fs) {
+ if (clazz.isAssignableFrom(fs.getClass())) {
+ int pos = find((T)fs);
+ if (pos >= 0) {
+ return pos;
+ }
+ }
+ return -1;
+ }
+
+ @Override
+ public <T2> T2[] toArray(T2[] a) {
+ final int s = size();
+ if (s == 0) {
+ if (a.length >= 1) {
+ a[0] = null; // part of the contract of toArray, where the array a size is >
+ }
+ return a;
+ }
+
+ final T2[] r = (a.length >= s)? a : (T2[]) Array.newInstance(a.getClass(), s);
+ int pos = moveToFirst();
+ for (int i = 0; i < s; i ++) {
+ r[i] = (T2) get(pos);
+ pos = moveToNextFilled(pos + 1);
+ }
+ if (a.length > s) {
+ r[s] = null; // part of the contract of toArray, where the array a size is >
+ }
+ return r;
+ }
+
+ @Override
+ public T[] toArray() {
+ return toArray((T[]) Array.newInstance(clazz, size()));
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ return String
+ .format(
+ "%s [loadFactor=%s, initialCapacity=%s, sizeWhichTriggersExpansion=%s, size=%s, secondTimeShrinkable=%s%n keys=%s]",
+ this.getClass().getName(), loadFactor, initialCapacity, sizeWhichTriggersExpansion, size(), secondTimeShrinkable,
+ Arrays.toString(keys));
+ }
+
+ // Collection methods
+ @Override
+ public boolean isEmpty() {
+ return size() == 0;
+ }
+
+ @Override
+ public boolean containsAll(Collection<?> c) {
+ c.stream().allMatch(item -> contains(item));
+ return false;
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends T> c) {
+ boolean[] anyChanged = {false};
+ c.stream().forEach(item -> anyChanged[0] |= add(item));
+ return anyChanged[0];
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> c) {
+ boolean[] anyChanged = {false};
+ c.stream().forEach(item -> anyChanged[0] |= remove(item));
+ return anyChanged[0];
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> c) {
+ boolean anyChanged = false;
+ Iterator<T> it = iterator();
+ while (it.hasNext()) {
+ T v = it.next();
+ if (!c.contains(v)) {
+ anyChanged = true;
+ it.remove();
+ }
+ }
+ return anyChanged;
+ }
+
+ @Override
+ protected boolean is_valid_key(int pos) {
+ return keys[pos] != null;
+ }
+
+ @Override
+ protected int keys_length() {
+ return keys.length;
+ }
+
+ @Override
+ protected void newKeysAndValues(int capacity) {
+ keys = (T[]) Array.newInstance(clazz, capacity);
+ }
+
+ @Override
+ protected void clearKeysAndValues() {
+ Arrays.fill(keys, null);
+ }
+
+ @Override
+ protected void expand_table() {
+ int old_capacity = keys_length();
+ int new_capacity = old_capacity * 2;
+
+ final T[] oldKeys = keys;
+ newTable(new_capacity);
+ for (T v : oldKeys) {
+ if (v != null) {
+ int pos = findPosition_new( Misc.hashInt(v.hashCode()));
+ add_new(v, keys[pos], pos);
+ }
+ }
+ }
+
+
+}
diff --git a/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
new file mode 100644
index 0000000..5bb5286
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Random;
+
+import org.apache.uima.internal.util.rb_trees.IntArrayRBT;
+
+import junit.framework.TestCase;
+
+public class IntHashSetPerfTestRh extends TestCase {
+ /**
+ * Set to false to run the performance test
+ *
+ * Tests both IntHashSet and IntBitSet
+ */
+ final boolean SKIP = false;
+
+ static int cacheLoadSize;
+
+ static long seed = 3737463135938899369L;
+// new Random().nextLong();
+ static {
+ System.out.println("Random seed for IntHashSetPerfTest: " + seed);
+ }
+ Random r = new Random(seed);
+
+// Set<Integer> keys = new HashSet<Integer>(1000);
+
+ int dmv = 0;
+
+ IntHashSetRh m2;
+ IntArrayRBT m1;
+ IntBitSet m3;
+
+ final int[] keys10000 = new int[511111];
+ int k10ki = 0;
+
+
+ public void testPerf() {
+ if (SKIP) return;
+ m1 = new IntArrayRBT(16);
+ m2 = new IntHashSetRh(16);
+ m3 = new IntBitSet(16);
+
+ for (int i = 0; i < keys10000.length; i++) {
+ int k = r.nextInt(511110);
+
+ keys10000[i] = k + 1;
+ }
+
+ System.out.format("%n%n W A R M U P %n%n");
+ cacheLoadSize = 0;
+ warmup(m1);
+ warmup(m2);
+
+ System.out.format("%n%n Time 100 %n%n");
+ timelp(100);
+ System.out.format("%n%n Time 1000 %n%n");
+ timelp(1000);
+ System.out.format("%n%n Time 10000 %n%n");
+ timelp(10000);
+ System.out.format("%n%n Time 100000 %n%n");
+ timelp(100000);
+ cacheLoadSize = 0; // 1 * 256 * 1;
+ System.out.format("%n%n Time 100000 %n%n");
+ timelp(100000);
+
+ System.out.format("%n%n Time 500000 %n%n");
+ timelp(500000);
+
+ System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+ timelp(500000);
+ System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+ timelp(500000);
+ System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+ timelp(500000);
+
+
+ System.out.println(dmv);
+ }
+ private void time2(int n) {
+// float f1 = time(m1, n);
+ float f2 = time(m2, n);
+ float f3 = time(m3, n);
+ System.out.format(" ratio "
+// + "RBT/hash = %.3f RTB/bitset = %.3f "
+ + "hash/bitset = %.3f%n",
+// f1/f2, f1/f3,
+ f2/f3);
+ }
+
+ private void timelp(int n) {
+ time2(n);
+ time2(n);
+ time2(n);
+ }
+
+ private void warmup(Object m) {
+ for (int i = 0; i < 500; i++) {
+ inner(m,true, 1000) ; // warm up
+ }
+ }
+
+ private float time(Object m, int ss) {
+ long start = System.nanoTime();
+ for (int i = 0; i < 500; i++) {
+ inner(m,false, ss);
+ }
+ float t = (System.nanoTime() - start) / 1000000.0F;
+ System.out.format("time for %,d: %s is %.3f milliseconds %n", ss,
+ m.getClass().getSimpleName(),
+ t);
+ return t;
+ }
+
+ private int nextKey() {
+ int r = keys10000[k10ki++];
+ if (k10ki >= keys10000.length) {
+ k10ki = 0;
+ }
+ return r;
+ }
+
+ private void inner(Object m, boolean check, int ss) {
+ CS cs = new CS(m);
+
+ for (int i = 0; i < ss; i++) {
+
+ int k = keys10000[i];
+// System.out.print(" " + k);
+// if (i %100 == 0) System.out.println("");
+// keys.add(k);
+ cs.add(k);
+ cacheLoad(i);
+ if (check) {
+ assertTrue(cs.contains(k));
+ }
+ }
+ for (int i = 0; i < ss; i++) {
+ boolean v = cs.contains(keys10000[i]);
+ if (!v) {
+ throw new RuntimeException("never happen");
+ }
+ dmv += 1;
+ cacheLoad(i);
+ }
+ cs.clear();
+
+
+// for (int k : keys) {
+// assertEquals(10000 + k, (m instanceof IntHashSetRh) ?
+// ((IntHashSetRh)m).get(k) :
+// ((IntArrayRBT)m).getMostlyClose(k));
+// }
+
+ }
+
+ static class CS {
+ final Object set;
+
+ CS(Object set) {
+ this.set = set;
+ }
+
+ boolean contains(int i) {
+ return (set instanceof IntArrayRBT) ? ((IntArrayRBT)set).contains(i) :
+ (set instanceof IntHashSetRh) ? ((IntHashSetRh) set).contains(i) :
+ ((IntBitSet) set).contains(i);
+ }
+
+ void add(int i) {
+ if (set instanceof IntArrayRBT) {
+ ((IntArrayRBT)set).add(i);
+ } else if (set instanceof IntHashSetRh) {
+ ((IntHashSetRh)set).add(i);
+ } else {
+ ((IntBitSet)set).add(i);
+ }
+ }
+
+ void clear() {
+ if (set instanceof IntArrayRBT) {
+ ((IntArrayRBT)set).clear();
+ } else if (set instanceof IntHashSetRh) {
+ ((IntHashSetRh)set).clear();
+ } else {
+ ((IntBitSet)set).clear();
+ }
+ }
+
+ }
+
+ void cacheLoad(int i) {
+ if (cacheLoadSize > 0) {
+ int[] cl = new int[cacheLoadSize];
+ if (i != 100000) {
+ cl = null;
+ }
+ }
+ }
+}
diff --git a/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java
new file mode 100644
index 0000000..09d3365
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+
+public class IntHashSetTestRh extends TestCase {
+
+ IntHashSetRh ihs;
+
+ Random random;
+
+ public void setUp() {
+ ihs = new IntHashSetRh();
+ }
+
+ public void testBasic() {
+
+ ihs.add(15);
+ ihs.add(188);
+ int[] sv = getSortedValues(ihs);
+ assertEquals(2, sv.length);
+ assertEquals(15, sv[0]);
+ assertEquals(188, sv[1]);
+ assertEquals(15, ihs.getMostNegative());
+ assertEquals(188, ihs.getMostPositive());
+
+ // test most positive / negative
+ ihs.clear();
+ ihs.add(189);
+ assertEquals(189, ihs.getMostNegative());
+ assertEquals(189, ihs.getMostPositive());
+ ihs.add(1000);
+ ihs.add(-1000);
+ assertEquals(1000, ihs.getMostPositive());
+ assertEquals(-1000, ihs.getMostNegative());
+ ihs.add(500);
+ ihs.add(-500);
+ assertEquals(1000, ihs.getMostPositive());
+ assertEquals(-1000, ihs.getMostNegative());
+ ihs.remove(1000);
+ assertEquals(999, ihs.getMostPositive());
+ assertEquals(-1000, ihs.getMostNegative());
+ ihs.add(1001);
+ assertEquals(1001, ihs.getMostPositive());
+ sv = getSortedValues(ihs);
+ assertTrue(Arrays.equals(sv, new int[]{-1000, -500, 189, 500, 1001}));
+ }
+
+ public void testSwitching224() {
+ final int OS = 100000;
+ ihs = new IntHashSetRh(16, OS);
+ ihs.add(OS - 1);
+ ihs.add(OS);
+ ihs.add(OS + 1);
+ int[] sv = getSortedValues(ihs);
+ assertTrue(Arrays.equals(sv, new int[]{99999, 100000, 100001 }));
+ ihs.add(OS - 32767);
+ sv = getSortedValues(ihs);
+ assertTrue(Arrays.equals(sv, new int[]{OS - 32767, 99999, 100000, 100001}));
+ ihs.add(OS - 32768);
+ sv = getSortedValues(ihs);
+ assertTrue(Arrays.equals(sv, new int[]{OS - 32768, OS - 32767, 99999, 100000, 100001}));
+
+ }
+
+ private int[] getSortedValues(IntHashSetRh s) {
+ IntListIterator it = s.iterator();
+ int[] r = new int[s.size()];
+ int i = 0;
+ while (it.hasNext()) {
+ r[i++] = it.nextNvc();
+ }
+ Arrays.sort(r);
+ return r;
+ }
+
+ public void testContains() {
+ ihs.add(1188);
+ ihs.add(1040);
+ assertTrue(ihs.contains(1188));
+ assertTrue(ihs.contains(1040));
+ assertFalse(ihs.contains(1));
+ assertFalse(ihs.contains(99));
+ }
+
+// public void testTableSpace() {
+// assertEquals(32, IntHashSetRh.tableSpace(19, 0.6f)); // 19 / .6 = 31.xxx, round to 32
+// assertEquals(64, IntHashSetRh.tableSpace(21, 0.6f));
+// assertEquals(32, ihs.tableSpace(21));
+// }
+
+ public void testWontExpand() {
+ ihs = new IntHashSetRh(21);
+ assertEquals(16, ihs.getSpaceUsedInWords());
+ assertTrue(ihs.wontExpand(20));
+ assertFalse(ihs.wontExpand(21));
+ }
+
+ public void testExpandNpe() {
+ ihs.add(15);
+ ihs.add(150000); // makes 4 byte table entries
+
+ for (int i = 1; i < 256; i++) { // 0 is invalid key
+ ihs.add(i); // causes resize, check no NPE etc thrown.
+ }
+ }
+
+ public void testAddIntoRemovedSlot() {
+ long seed = // -5748775656590176364L;
+ new Random().nextLong();
+ System.out.println("Random seed for testAddIntoRemovedSlot in " + this.getClass().getName() + ": " + seed);
+ random = new Random(seed);
+
+ for (int i = 1; i < 100; i++) {
+ ihs.add(i);
+ assertEquals(i, ihs.size());
+ }
+
+ assertEquals(99, ihs.size());
+
+ /** Test with 2 byte numbers */
+ checkRemovedReuse(true);
+
+ ihs = new IntHashSetRh();
+ for (int i = 1; i < 99; i++) {
+ ihs.add(i);
+ }
+ ihs.add(100000); // force 4 byte
+ checkRemovedReuse(false);
+ }
+
+ private void checkRemovedReuse(boolean is2) {
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+ for (int i = 0; i < 100000; i++) {
+// if (i % 25000 == 0) System.out.println(i);
+ int v = 1 + random.nextInt(100 + (i % 30000)); // random between 1 and 30,101
+ int sz = ihs.size();
+ boolean wasRemoved = ihs.remove(v);
+ assertEquals(sz - (wasRemoved ? 1 : 0), ihs.size());
+ // debug
+ if (ihs.contains(v)) {
+ System.out.println("debug");
+ }
+ assertTrue(!(ihs.contains(v)));
+ v = 1 + random.nextInt(100 + (i % 30000));
+ sz = ihs.size();
+ boolean wasAdded = ihs.add(v);
+ assertEquals(sz + (wasAdded ? 1 : 0), ihs.size());
+ assertTrue(ihs.contains(v));
+ }
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 16384 : 32768) );
+
+ // 32,768, 16,384, 8,192, 4096, 2048, 1024, 512, 256
+ // for 2 byte storage, is2 = true, and expected is: i / 2
+ // for 4 byte storage, is2 = false, and expected is i
+
+ ihs.clear(); // doesn't set 2nd time because size + removed > 1/2 the capacity
+
+ for (int i = 32768; i > 128; i = i / 2) {
+ ihs.clear(); // sets 2nd time shrinkable
+ assertTrue(ihs.getSpaceUsedInWords() == (is2 ? i/2 : i));
+ ihs.clear(); // shrinks
+ assertTrue(ihs.getSpaceUsedInWords() == (is2 ? i/4 : i/2));
+ }
+// ihs.clear();
+//
+ assertTrue(ihs.getSpaceUsedInWords() == (is2 ? 64: 128));
+
+ // table size should be 128, adding 100 items should cause expansion (84 == .66 * 128)
+ for (int i = 1; i < ((is2) ? 100 : 99); i++) {
+ ihs.add(i);
+ }
+ if (!is2) {
+ ihs.add(100000);
+ }
+
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+ for (int i = 0; i < 1000; i++) {
+ int v = 1 + random.nextInt(100);
+ ihs.remove(v);
+ assertTrue(!(ihs.contains(v)));
+ ihs.add(v);
+ assertTrue(ihs.contains(v));
+ }
+
+ assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+
+ }
+
+ public void testRandom() {
+ int countAdd = 0;
+ int dupsA = 0;
+ int notPres = 0;
+ int countRmv = 0;
+
+ long seed =
+ new Random().nextLong();
+ System.out.println("Random seed for testRandom in " + this.getClass().getName() + ": " + seed);
+ random = new Random(seed);
+
+ for (int i = 1; i < 1024 * 1024; i++) {
+ int k = i & (1024 * 256) - 1;
+ if (k == 0) continue;
+ if (random.nextInt(3) > 0) {
+ int sz = ihs.size();
+ if (ihs.add(k)) {
+ countAdd ++;
+ assertEquals(sz + 1, ihs.size());
+ } else {
+ dupsA ++;
+ }
+
+ } else {
+ int sz = ihs.size();
+ if (ihs.remove(k)) {
+ countRmv ++;
+ assertEquals(sz - 1, ihs.size());
+ } else {
+ notPres ++;
+ }
+
+ }
+ }
+
+ System.out.format("added: %,d dups: %,d rmvd: %,d notPres: %,d, size: %d%n", countAdd, dupsA, countRmv, notPres, ihs.size());
+ assertEquals(countAdd - countRmv, ihs.size() );
+ }
+}
diff --git a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java
new file mode 100644
index 0000000..e8aefc2
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+
+public class ObjHashSetTestRh extends TestCase {
+
+ ObjHashSetRh<Integer> ihs;
+
+ Random random;
+
+ public void setUp() {
+ ihs = new ObjHashSetRh<>(Integer.class);
+ }
+
+ public void testBasic() {
+
+ ihs.add(15);
+ ihs.add(188);
+ Integer[] sv = ihs.toArray();
+ assertEquals(2, sv.length);
+ assertEquals(15 + 188, sv[0] + sv[1]);
+
+ // test most positive / negative
+ ihs.clear();
+ ihs.add(189);
+ ihs.add(1000);
+ ihs.add(-1000);
+ ihs.add(500);
+ ihs.add(-500);
+ ihs.remove(1000);
+ ihs.add(1001);
+ }
+
+ public void testContains() {
+ ihs.add(1188);
+ ihs.add(1040);
+ assertTrue(ihs.contains(1188));
+ assertTrue(ihs.contains(1040));
+ assertFalse(ihs.contains(1));
+ assertFalse(ihs.contains(99));
+ }
+
+// public void testTableSpace() {
+// assertEquals(32, ObjHashSetRh.tableSpace(19, 0.6f)); // 19 / .6 = 31.xxx, round to 32
+// assertEquals(64, ObjHashSetRh.tableSpace(21, 0.6f));
+// assertEquals(32, ihs.tableSpace(21));
+// }
+
+ public void testExpandNpe() {
+ ihs.add(15);
+ ihs.add(150000); // makes 4 byte table entries
+
+ for (int i = 1; i < 256; i++) { // 0 is invalid key
+ ihs.add(i); // causes resize, check no NPE etc thrown.
+ }
+ }
+
+ public void testAddIntoRemovedSlot() {
+ long seed = // -6401324561844760524L;
+ new Random().nextLong();
+ System.out.println("Random seed for testAddIntoRemovedSlot in " + this.getClass().getName() + ": " + seed);
+ random = new Random(seed);
+
+ for (int i = 1; i < 100; i++) {
+ ihs.add(i);
+ assertEquals(i, ihs.size());
+ }
+
+ assertEquals(99, ihs.size());
+
+ /** Test with 2 byte numbers */
+ checkRemovedReuse(true);
+
+ ihs = new ObjHashSetRh<>(Integer.class);
+ for (int i = 1; i < 99; i++) {
+ ihs.add(i);
+ }
+ ihs.add(100000); // force 4 byte
+ checkRemovedReuse(false);
+ }
+
+ private void checkRemovedReuse(boolean is2) {
+ for (int i = 0; i < 100000; i++) {
+ int v = 1 + random.nextInt(100 + (i % 30000)); // random between 1 and 30,101
+ int sz = ihs.size();
+ boolean wasRemoved = ihs.remove(v);
+ assertEquals(sz - (wasRemoved ? 1 : 0), ihs.size());
+ assertTrue(!(ihs.contains(v)));
+ v = 1 + random.nextInt(100 + (i % 30000));
+ sz = ihs.size();
+ boolean wasAdded = ihs.add(v);
+ assertEquals(sz + (wasAdded ? 1 : 0), ihs.size());
+ assertTrue(ihs.contains(v));
+ }
+
+ ihs.clear(); // doesn't set 2nd time because size + removed > 1/2 the capacity
+
+ for (int i = 32768; i > 128; i = i / 2) {
+ ihs.clear(); // sets 2nd time shrinkable
+ assertEquals(i, ihs.getCapacity());
+ ihs.clear(); // shrinks
+ assertEquals(i/2, ihs.getCapacity());
+ }
+// ihs.clear();
+//
+
+ // table size should be 128, adding 100 items should cause expansion (84 == .66 * 128)
+ for (int i = 1; i < 100; i++) {
+ ihs.add(i);
+ }
+
+ assertEquals(256, ihs.getCapacity());
+ for (int i = 0; i < 1000; i++) {
+ int v = 1 + random.nextInt(100);
+ ihs.remove(v);
+ assertTrue(!(ihs.contains(v)));
+ ihs.add(v);
+ assertTrue(ihs.contains(v));
+ }
+
+ assertEquals(256, ihs.getCapacity());
+
+ }
+
+ public void testRandom() {
+ int countAdd = 0;
+ int dupsA = 0;
+ int notPres = 0;
+ int countRmv = 0;
+
+ long seed =
+ new Random().nextLong();
+ System.out.println("Random seed for testRandom in " + this.getClass().getName() + ": " + seed);
+ random = new Random(seed);
+
+ for (int i = 1; i < 1024 * 1024; i++) {
+ int k = i & (1024 * 256) - 1;
+ if (k == 0) continue;
+ if (random.nextInt(3) > 0) {
+ int sz = ihs.size();
+ if (ihs.add(k)) {
+ countAdd ++;
+ assertEquals(sz + 1, ihs.size());
+ } else {
+ dupsA ++;
+ }
+
+ } else {
+ int sz = ihs.size();
+ if (ihs.remove(k)) {
+ countRmv ++;
+ assertEquals(sz - 1, ihs.size());
+ } else {
+ notPres ++;
+ }
+
+ }
+ }
+
+ System.out.format("added: %,d dups: %,d rmvd: %,d notPres: %,d, size: %d%n", countAdd, dupsA, countRmv, notPres, ihs.size());
+ assertEquals(countAdd - countRmv, ihs.size() );
+ }
+}