You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2019/12/18 14:04:43 UTC

[uima-uimaj] branch robin-hood-hash created (now 9a42689)

This is an automated email from the ASF dual-hosted git repository.

schor pushed a change to branch robin-hood-hash
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git.


      at 9a42689  no jira, experiment, robin hood hashing.

This branch includes the following new commits:

     new 9a42689  no jira, experiment, robin hood hashing.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[uima-uimaj] 01/01: no jira, experiment, robin hood hashing.

Posted by sc...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

schor pushed a commit to branch robin-hood-hash
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git

commit 9a426895dfaa761ff74dd1de3d43ed47c79a834d
Author: Marshall Schor <ms...@schor.com>
AuthorDate: Wed Dec 18 09:04:09 2019 -0500

    no jira, experiment, robin hood hashing.
    
    Initial results: worse performance for IntHashSet, might be OK for Obj
    HashSet
---
 .../uima/internal/util/Common_hash_support_rh.java | 561 +++++++++++++++
 .../apache/uima/internal/util/IntHashSetRh.java    | 794 +++++++++++++++++++++
 .../apache/uima/internal/util/ObjHashSetRh.java    | 520 ++++++++++++++
 .../uima/internal/util/IntHashSetPerfTestRh.java   | 219 ++++++
 .../uima/internal/util/IntHashSetTestRh.java       | 250 +++++++
 .../uima/internal/util/ObjHashSetTestRh.java       | 185 +++++
 6 files changed, 2529 insertions(+)

diff --git a/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java b/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java
new file mode 100644
index 0000000..01c0edd
--- /dev/null
+++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java
@@ -0,0 +1,561 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.function.IntPredicate;
+
+/**
+ * A common superclass for hash maps and hash sets
+ * Uses robin hood with backward shift for deletion 
+ * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
+ *
+ * uses linear probing (no delta expansion)
+ * 
+ * extra table (1 byte per slot) holds lower 7 bits of hash 
+ *   and therefore also serves to indicate distance from initial probe =
+ *     probe spot % 128 - lower 7 bits of hash (if negative, add 128, unless wrap around?)
+ * 
+ * 8th bit , if 1 , is flag showing empty; no tombstones, because using backward-shift technique for removes
+ * 
+ * find: stop early, if # probe-spot > distance from initial probe.  (mod wrap-around)
+ * 
+ */
+public abstract class Common_hash_support_rh {
+
+  // set to true to collect statistics for tuning
+  // you have to also put a call to showHistogram() at the end of the run
+  protected static final boolean TUNE = false;
+  
+  protected static final int MIN_SIZE = 10;   // 10 / .66 =15.15
+  protected static final int MIN_CAPACITY = 16;
+  protected static final int MIN_CAPACITY_SHRINK = 64;  // don't shrink below this - thrashing
+  
+  private static final byte LHB_EMPTY = (byte) 0x80;
+  private static final int LHB_HASH_MASK =  0x7f;
+  
+  protected final float loadFactor;  
+  
+  protected final int initialCapacity; 
+
+  protected int histogram [];
+  protected int maxProbe = 0;
+
+  protected int sizeWhichTriggersExpansion;
+  private int size = 0; // number of elements in the table  
+
+  protected boolean secondTimeShrinkable = false;
+  
+  protected abstract boolean is_valid_key(int pos);
+  protected abstract int keys_length();
+  protected abstract void newKeysAndValues(int capacity);
+  protected abstract void clearKeysAndValues();
+
+  protected int bitMask;  // = key size -1
+  /**
+   * 7 bits of lower hash map, serves as distance from initial bucket probe 
+   * initialize to "empty" = 0x80
+   * If not "empty", the value is 
+   *   - often the same as the lower 7 bits of the index
+   *   - if not that, it's because there's a collision, and some slot with a higher index will have the lower 7 bits of the index
+   *   - so, the distance from the initial probe is the probe addr with the matching value - initial probe addr.
+   *   -   which, is the same as the 
+   *         ((probe addr with the matching value) & LBH_HASH_MASK) 
+   *         - matching value
+   */
+  private byte[] lower_hash_bits; 
+  protected int initial_probe;
+  protected byte initial_probe_lhb;
+  private int smaller_of_mask;
+
+  private static Common_hash_support_rh tune_instance;
+  
+  protected abstract void shift(int prev, int pos);
+  protected abstract void setEmpty(int pos);
+  
+  protected abstract void expand_table(); // double table size
+
+  /**
+   * @param initialSizeBeforeExpanding the number of elements the table should hold before expanding
+   */
+  public Common_hash_support_rh(int initialSizeBeforeExpanding) {
+    this(initialSizeBeforeExpanding, 0.66F);
+  }
+   
+  public Common_hash_support_rh(int initialSizeBeforeExpanding, float factor) {
+    this.loadFactor = factor;
+    this.initialCapacity = tableSpace(initialSizeBeforeExpanding, factor);
+    lower_hash_bits = init_lhb(this.initialCapacity);
+    bitMask = this.initialCapacity - 1;
+    setSmallerOfMask();
+    if (TUNE) {
+      tune_instance = this;
+      histogram = new int[200];
+      Arrays.fill(histogram, 0);
+      maxProbe = 0;
+    }
+  }
+
+  public Common_hash_support_rh(Common_hash_support_rh orig) {
+    this(orig.initialCapacity);
+    this.sizeWhichTriggersExpansion = orig.sizeWhichTriggersExpansion;
+    this.size = orig.size;
+    this.secondTimeShrinkable = orig.secondTimeShrinkable;
+
+    // copy doesn't do tuning measurements
+    this.histogram = null;
+  }
+  
+  public void clear() {
+    // see if size is less than the 1/2 size that triggers expansion
+    if (size <  (sizeWhichTriggersExpansion >>> 1)) {
+      // if 2nd time then shrink by 50%
+      //   this is done to avoid thrashing around the threshold
+      if (secondTimeShrinkable) {
+        secondTimeShrinkable = false;
+        final int newCapacity = Math.max(initialCapacity, keys_length() >>> 1);
+        if (newCapacity < keys_length()) { 
+          newTable(newCapacity);  // shrink table by 50%
+          size = 0;
+          resetHistogram();
+          if (PositiveIntSet.IS_TRACE_MODE_SWITCH) {
+            System.out.println("TRAcE_MODE Common_hash clear 2nd time shrinkable, newCapacity=" + newCapacity + ", keys_length: " + keys_length());
+          }
+          return;
+        } else { // don't shrink below minimum
+          clearExisting();
+          if (PositiveIntSet.IS_TRACE_MODE_SWITCH) {
+            System.out.println("TRAcE_MODE Common_hash clear 2nd time shrinkable but nothing done, below minimum: newCapacity=" + newCapacity + ", keys_length: " + keys_length());
+          }
+          return;
+        }
+      } else {
+        if (PositiveIntSet.IS_TRACE_MODE_SWITCH) System.out.println("TRACE_MODE Common_hash clear setting 2nd time shrinkable");
+        secondTimeShrinkable = true;
+      }
+    } else {
+      secondTimeShrinkable = false; // reset this to require 2 triggers in a row
+    }
+    clearExisting();
+  }
+  
+  private void clearExisting() {
+    clearKeysAndValues();
+    Arrays.fill(lower_hash_bits, LHB_EMPTY);
+    size = 0;
+    resetHistogram();    
+  }
+  
+  /** It gets a ref to the current value of table, and then searches that array.
+   * 
+   * speedups: avoid looking in lower_hash_bits array for 90% of the time the hit is in the first 2 positions?
+   * 
+   * @param hash     the hash code of the key
+   * @param is_eq    true if the key at the int position is == to the key, or is 0
+   * @return the probeAddr in keys array.  
+   *         If not found, the probe value is to empty cell or to some other value
+   *         Use isEmpty(probeAddr) to distinguish.
+   */
+  protected int findPosition(
+                   int hash,
+                   IntPredicate is_eq_or_not_present
+                   ) {
+        
+    int probeAddr = initial_probe = hash & bitMask;
+    final byte initialProbeLhb = initial_probe_lhb = (byte) (hash & LHB_HASH_MASK);
+ 
+    if (is_eq_or_not_present.test(probeAddr) ) {
+      if (TUNE) {
+        updateHistogram(0);
+      }
+      return probeAddr;
+    }
+    int extraProbes = 0;
+    
+    for (;;) {      
+
+      byte plhb = lower_hash_bits[probeAddr];
+          // not found if hit an empty slot
+      if (plhb == LHB_EMPTY || 
+       // or the distance to original probe for this slot is < number of probes
+       //   (because otherwise, the other item would have replaced if present)
+          extraProbesForEntry(plhb, probeAddr) < extraProbes
+         ) {
+        // not found
+        if (TUNE) {
+          updateHistogram(extraProbes);
+        }
+        return probeAddr;
+      }
+      
+      if (plhb == initialProbeLhb) {
+        if (is_eq_or_not_present.test(probeAddr)) {
+          // found
+          if (TUNE) {
+            updateHistogram(extraProbes);
+          }
+          return probeAddr;
+        }
+        // else was collision, not equal
+      }
+      
+     extraProbes++;
+      
+      probeAddr = incrPos(probeAddr);
+    }
+    
+  } 
+ 
+  /** 
+   * Find the position for a new (guaranteed not in the table) item
+   * 
+   * @param hash     the hash code of the key
+   * @return the probeAddr in keys array.  
+   *         If not found, the probe value is to empty cell or to some other value
+   *         Use isEmpty(probeAddr) to distinguish.
+   */
+  protected int findPosition_new(int hash) {
+        
+    int extraProbes = 0;
+    int probeAddr = initial_probe = hash & bitMask;
+    initial_probe_lhb = (byte) (hash & LHB_HASH_MASK);
+    
+    
+    for (;;) {
+
+      byte plhb = lower_hash_bits[probeAddr];
+          // not found if hit an empty slot
+      if (plhb == LHB_EMPTY || 
+       // or the distance to original probe for this slot is < number of probes
+       //   (because otherwise, the other item would have replaced if present)
+          extraProbesForEntry(plhb, probeAddr) < extraProbes
+         ) {
+        // not found
+        if (TUNE) {
+          updateHistogram(extraProbes);
+        }
+        return probeAddr;
+      }
+            
+      extraProbes++;
+      
+      probeAddr = incrPos(probeAddr);
+    }
+  }
+  
+  protected boolean isEmpty(int probeAddr) {
+    return lower_hash_bits[probeAddr] == LHB_EMPTY;
+  }
+  
+  protected byte setLhb(int pos, byte lbh) {
+    byte r = lower_hash_bits[pos];
+    assert lbh >= 0;
+    if (extraProbesForEntry(lbh, pos) >= 20) {
+      System.out.println("debug");
+    }
+    assert extraProbesForEntry(lbh, pos) < 20;
+    lower_hash_bits[pos] = lbh;
+    return r;
+  }
+    
+  protected void remove_common(int position) {
+    int prev = position;
+    int i = incrPos(position);
+    size --;
+    for (;;) {
+      byte lhb = lower_hash_bits[i];
+      if (lhb == LHB_EMPTY || extraProbesForEntry(lhb, i) == 0 ) {
+        lower_hash_bits[prev] = LHB_EMPTY;
+        setEmpty(prev);
+        return;
+      }
+      //debug
+      if (extraProbesForEntry(lhb, prev) > 20) {
+        System.out.println("debug");
+      }
+      lower_hash_bits[prev] = lhb;
+      shift(prev, i);
+      
+      prev = i;
+      i = incrPos(i);
+    }
+  }
+  
+//  /**
+//   * This method calls the subclass's copy_to_new_table method, 
+//   *   passing an inner lambda containing common code for copying old to new.
+//   *   
+//   *   That inner lambda, when invoked by the copy_to_new_table method, 
+//   *     is passed another lambda of one argument (the old index) 
+//   *     which is called to copy each element.  
+//   * @param new_capacity
+//   * @param old_capacity
+//   */
+//  private void copyOld2New(int new_capacity, int old_capacity) {
+//    copy_to_new_table( 
+//      new_capacity,
+//      old_capacity,
+//      
+//        // this code assigned to "commonCopy" arg
+//        (IntConsumer copyToNew, IntPredicate is_valid_old_key) -> {  
+//      newTable(new_capacity);
+//      for (int i = 0; i < old_capacity; i++) {
+//        if (is_valid_old_key.test(i)) {
+//          copyToNew.accept(i);
+//        }
+//      }
+//    });    
+//  }
+
+  /**
+   * advance pos until it points to a non 0 or is 1 past end
+   * If pos is negative, start at 0.
+   * Don't move if pos already has valid key
+   * @param pos -
+   * @return updated pos
+   */
+  protected int moveToNextFilled(int pos) {
+    final int max = keys_length();
+    if (pos < 0) {
+      pos = 0;
+    }
+    while (true) {
+      if (pos >= max) {
+        return pos;
+      }
+      if (is_valid_key(pos)) {
+        return pos;
+      }
+      pos ++;
+    }
+  }
+
+  /**
+   * decrement pos until it points to a non 0 or is -1
+   * If pos is beyond end start at end.
+   * Don't move if pos already has valid key
+   * @param pos -
+   * @return updated pos
+   */
+  protected int moveToPreviousFilled(int pos) {
+    final int max = keys_length();
+    if (pos > max) {
+      pos = max - 1;
+    }
+    
+    while (true) {
+      if (pos < 0) {
+        return pos;
+      }
+      if (is_valid_key(pos)) {
+        return pos;
+      }
+      pos --;
+    }
+  }
+  
+  // called by clear, increase table size, decrease table size
+  protected void newTable(int capacity) {
+    capacity = Math.max(MIN_SIZE, Misc.nextHigherPowerOf2(capacity));
+    newKeysAndValues(capacity);
+    lower_hash_bits = init_lhb(capacity);
+    setSmallerOfMask();
+    bitMask = capacity - 1;
+    sizeWhichTriggersExpansion = (int)(capacity * loadFactor);
+  }
+  
+  protected void incrementSize() {
+    size++;
+    if (size >= sizeWhichTriggersExpansion) {
+      maybeIncreaseTableCapacity();
+    }
+  }
+
+  private void maybeIncreaseTableCapacity() {
+    
+    if (TUNE) {
+      int old_capacity = keys_length();
+      int new_capacity = 2 * old_capacity;
+      System.out.println("Capacity increasing from " + old_capacity + " to " + new_capacity + ", size=" + size);
+    }   
+    expand_table();
+  }
+    
+//  /**
+//   * only called if actually found and removed an entry
+//   */
+//  protected void commonRemove() {
+//    size --;
+////    debugValidate();
+//  }
+  
+  public int size() {
+    return size;
+  }
+  
+  protected void resetHistogram() {
+//    if (TUNE) {
+//      histogram = new int[200];
+//      Arrays.fill(histogram, 0);
+//      maxProbe = 0;
+//    }    
+  }
+
+  private void updateHistogram(int extraProbes) {
+    histogram[extraProbes] = 1 + histogram[extraProbes];
+    if (maxProbe < extraProbes) {
+      maxProbe = extraProbes;
+    }
+  }
+
+  public void showHistogram() {
+    if (TUNE) {
+      int sumI = 0;
+      for (int i : histogram) {
+        sumI += i;
+      }
+      
+      System.out.format(
+          "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr of find operations at last table size=%,d%n",
+          loadFactor, maxProbe, sumI);
+      for (int i = 0; i <= maxProbe; i++) {
+        if (i == maxProbe && histogram[i] == 0) {
+          System.out.println("huh?");
+        }
+        System.out.println(i + ": " + histogram[i]);
+      }     
+      
+      System.out.println("bytes / entry = " + (float) (keys_length()) * 8 / size());
+      System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+          size(),
+          (int) ((keys_length() >>> 1) * loadFactor),
+          (int) (keys_length() * loadFactor));
+    }
+  }
+  
+  static {
+    if (TUNE) {
+      Runtime.getRuntime().addShutdownHook(new Thread(null, () -> {
+        tune_instance.showHistogram();
+      }));     
+    }
+  }
+
+  // test case use
+  int getCapacity() {
+    return keys_length();
+  }
+  
+  protected abstract class CommonKeyIterator implements IntListIterator {
+
+    protected int curPosition;
+    
+    protected final int firstPosition;
+    
+    protected CommonKeyIterator() {
+        this.curPosition = moveToNextFilled(0);
+        firstPosition = curPosition;
+    }
+    
+    @Override
+    public boolean hasNext() {
+      return curPosition < keys_length() && curPosition >= 0;
+    }
+
+    @Override
+    public boolean hasPrevious() {
+      if (curPosition > keys_length() || curPosition <= 0) {
+        return false;
+      }
+      
+      int test = moveToPreviousFilled(curPosition - 1);
+      return test >= 0;
+    }
+
+    @Override
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToStart()
+     */
+    public void moveToStart() {
+      curPosition = moveToNextFilled(0);
+    }
+
+    @Override
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
+     */
+    public void moveToEnd() {
+      curPosition = moveToPreviousFilled(bitMask);
+    }
+    
+  }
+  
+  /**
+   * 
+   * @param numberOfElements -
+   * @param factor -
+   * @return capacity of the main table (either 2 byte or 4 byte entries)
+   */
+  public static int tableSpace(int numberOfElements, Float factor) {
+    if (numberOfElements < 0) {
+      throw new IllegalArgumentException("must be > 0");
+    }
+    final int capacity = Math.round(numberOfElements / factor);
+    return  Math.max(16, Misc.nextHigherPowerOf2(capacity));
+  }
+
+  protected void debugValidate() {
+    // count non-0, non-removed, compare to size
+    int sum = 0;
+    for (int i = 0; i < keys_length(); i ++) {
+      if (is_valid_key(i)) {
+        sum ++;
+        if (sum > size) {
+          System.out.println("debug");
+        }
+      }
+    }
+  }
+  
+  protected int incrPos(int pos) {
+    return bitMask & (pos + 1);
+  }
+  
+  private byte[] init_lhb(int size) {
+    byte[] lhb = new byte[size];
+    Arrays.fill(lhb,  LHB_EMPTY);
+    return lhb;
+  }
+  
+  /**
+   * Compute extra probes for an entry.
+   * Handle cases where the table size is bigger or smaller than the
+   * size of the lower-hash-bits
+   * @param entry the lower-hash-bits, represents the original probe's last 7 bits
+   * @param pos the current position
+   * @return the extra distance of this entry from the original probe
+   */
+  private int extraProbesForEntry(byte entry, int pos) {
+    return smaller_of_mask & (pos - entry);
+  }
+  
+  private void setSmallerOfMask () {
+    smaller_of_mask = Math.min(lower_hash_bits.length, 128) - 1; 
+  }
+}
diff --git a/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java b/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java
new file mode 100644
index 0000000..571be51
--- /dev/null
+++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java
@@ -0,0 +1,794 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.NoSuchElementException;
+
+import org.apache.uima.util.impl.Constants;
+
+/**
+ * A set of non-zero ints. 
+ *   Can be negative.
+ *   
+ *   0 reserved internally to indicate "not in the map";
+ *   you will get an exception if you try to store 0 as a value.
+ *   
+ *   0 will be returned if the value is missing from the map.
+ *    
+ *   allowed range is Integer.MIN_VALUE to Integer.MAX_VALUE 
+ *     0 is the value for an empty cell
+ *     Integer.MIN_VALUE is the value for a deleted (removed) value
+ *   
+ * based on Int2IntHashMap
+ * This impl is for use in a single thread case only
+ * 
+ * Supports shrinking (reallocating the big table)
+ *
+ * Supports representing ints as "short" 2byte values if possible,
+ *   together with an offset amount.
+ *   Because of the offset, the adjusted key could be == to the offset,
+ *   so we subtract 1 from it to preserve 0 value as being the null / empty.
+ *   For short values, the range is:
+ *        Short.MIN_VALUE+2 to Short.MAX_VALUE after Offset,
+ *        with the "0" value moved down by 1 and 
+ *        the Short.MIN_VALUE used for deleted (removed) items 
+ *   
+ *   Automatically switches to full int representation if needed  
+ */
+public class IntHashSetRh extends Common_hash_support_rh implements PositiveIntSet {
+  
+  public static final int SIZE_NEEDING_4_BYTES = 256 * 256 - 2; 
+    
+  // this load factor gives, for array doubling strategy:
+  //   between 1.5 * 4 bytes (1 word for key) = 6 and
+  //           3   * 4                          12 bytes per entry
+  // This compares with 160 bytes/entry for the IntArrayRBT impl
+      
+  // offset only used with keys2.  values stored are key - offset
+  //   intent is to have them fit into short data type.
+  //   If the offset is 100,000, then the keys range from 100000 - 32766 to 100000 + 32767, includes "0"
+  //                 -32767 == Short.MIN_VALUE + 1,   
+  //                 Numbers 0 and below are stored as n - 1, so "0" itself isn't actually stored
+  //                   because it's used to represent an empty slot
+  //   - value after key adjustment of 0 reserved; if value is 0 - means no entry
+  //   store values at or below the offset are stored as 1 less to avoid mapping to "0".
+  
+  private static final short EMPTY2 = 0;
+  private int offset;
+ 
+  private int [] keys4;
+  private short[] keys2;
+  
+  private boolean isMake4;
+  
+  // these are true values (before any offset adjustment)
+  private int mostPositive = Integer.MIN_VALUE;
+  private int mostNegative = Integer.MAX_VALUE;
+      
+  public IntHashSetRh() {
+    this(10, 0);
+  }
+  
+  public IntHashSetRh(int initialCapacity) {
+    this(initialCapacity, 0);
+  }
+  
+  /**
+   * 
+   * @param initialSizeBeforeExpanding - you can add this many before expansion
+   * @param offset - for values in the short range, the amount to subtract
+   *                 before storing.
+   *                 If == MIN_VALUE, then force 4 byte ints
+   */
+  public IntHashSetRh(int initialSizeBeforeExpanding, int offset) {
+    super(initialSizeBeforeExpanding);
+    isMake4 = offset == Integer.MIN_VALUE;
+    this.offset = isMake4 ? 0 : offset;
+    newTable(this.initialCapacity);
+    resetTable();
+    if (IS_TRACE_MODE_SWITCH) {
+      System.out.println("TRACE_MODE new IntHashSet, sizeBeforeExpanding = " + initialSizeBeforeExpanding + ", offset= " + offset);
+    }
+  }
+  
+//  /**
+//   * The number of 32 bit words that are reserved when 
+//   * creating a table to hold the specified number of elements
+//   * 
+//   * The number is a power of 2.
+//   * 
+//   * The number is at least 16.
+//   * 
+//   * The number is such that you could add this many elements without
+//   *   triggering the capacity expansion.
+//   *   
+//   * @param numberOfElements -
+//   * @return -
+//   */
+//  public int tableSpace(int numberOfElements) {
+//    return tableSpace(numberOfElements, loadFactor);
+//  }
+//  
+  
+  public static int tableSpace(int numberOfElements) {
+    return Common_hash_support.tableSpace(numberOfElements, 0.66f);
+  }
+  
+  
+  /**
+   * Method called by handleHashSet in PositiveIntSet
+   * to indicate if adding this many items would cause an expansion
+   * @return true if would not expand
+   */
+  public boolean wontExpand() {
+    return wontExpand(1);
+  }
+  
+  /**
+   * Method called by handleHashSet in PositiveIntSet
+   * to indicate if adding this many items would cause an expansion
+   * @param n the number of items added
+   * @return true if would not expand
+   */
+  public boolean wontExpand(int n) {
+    return (size() + n) < sizeWhichTriggersExpansion;  
+  }
+  
+  public int getSpaceUsedInWords() {
+    return (keys4 != null) ? keys4.length : (keys2.length >> 1);  
+  }
+  
+  public static int getSpaceOverheadInWords() {
+    return 11;
+  }
+    
+  /**
+   * Only call this if using short values with offset
+   * @param adjKey
+   * @return raw key
+   */
+  private int getRawFromAdjKey(int adjKey) {
+    assert (adjKey != Short.MIN_VALUE);
+    return adjKey + offset + ((adjKey < 0) ? 1 : 0); 
+  }
+  
+  
+  private void resetTable() {
+    mostPositive = Integer.MIN_VALUE;
+    mostNegative = Integer.MAX_VALUE;
+//    resetHistogram();
+//    size = 0;
+//    nbrRemoved = 0;    
+  }
+  
+  @Override
+  public void clear() {
+    super.clear();
+    resetTable();
+  }
+
+   
+  // only called when keys are shorts
+  private boolean isAdjKeyOutOfRange(int adjKey) {
+    return (adjKey > Short.MAX_VALUE ||
+           // the minimum adjKey value stored in a short is
+           // Short.MIN_VALUE + 1
+            adjKey <= Short.MIN_VALUE);   
+  }
+  
+  @Override
+  public boolean contains(int rawKey) {
+    int pos = findPosition(rawKey);
+    if (isEmpty(pos)) {
+      return false;
+    }
+    return get(pos) == rawKey;
+  }
+  
+  /** 
+   * This method is part of the PositiveSet API, and
+   * is defined to return an int that could be used with
+   *   iterators to position them.  
+   *   
+   * For this case, it is not used, because 
+   *   the iterators don't support positioning this way
+   *   because they are not sorted.
+   */
+  @Override
+  public int find(int rawKey) {
+    throw new UnsupportedOperationException();
+  }
+  
+
+  /**
+   * @param rawKey the key value to find
+   * @return the position in the table if present, otherwise the position of the slot where the 
+   *         key value would be added, unless the new value is at a position which would require
+   *         the key2 form to be switched to the key4 form, in which case,
+   *         -1 is returned (means not found, and requires conversion to 4 byte keys)
+   */
+  private int findPosition(int rawKey) {
+    
+    if (rawKey == 0) {  
+      throw new IllegalArgumentException("0 is an invalid key");
+    }
+    
+    if (keys4 == null) {
+      // special handling for 2 byte keys with offsets
+      // check for keys never stored in short table 
+      //   adjKey of Short.MIN_VALUE which is the removed flag
+      final int adjKey = getAdjKey(rawKey);
+      if (isAdjKeyOutOfRange(adjKey)) {
+        return -1;
+      }
+      
+      
+      return findPositionAdjKey(adjKey);
+      
+    } else {
+      
+      
+      return findPosition4(rawKey);
+    }
+    
+  }
+   
+  private int findPosition4(int rawKey) {
+    
+    return super.findPosition(
+        
+        // key hash
+        Misc.hashInt(rawKey),
+        
+        // is_eq || not present
+        i -> keys4[i] == rawKey || keys4[i] == 0
+        
+      );    
+  }
+  
+  private int findPositionAdjKey(int adjKey) {
+    // special handling for 2 byte keys with offsets
+    // check for keys never stored in short table 
+    //   adjKey of Short.MIN_VALUE which is the removed flag
+        
+    return super.findPosition(
+          
+          // key hash
+          Misc.hashInt(adjKey),
+          
+          // is_eq
+          i -> keys2[i] == 0 || keys2[i] == adjKey
+        );    
+  }
+  
+  
+  /**
+   * return the adjusted key.
+   *   never called for 4 byte form
+   *   for 2 byte key mode, subtract the offset, and adjust by -1 if 0 or less
+   *     Note: returned value can be less than Short.MIN_VALUE 
+   * @param rawKey
+   * @return adjusted key, a range from negative to positive, but never 0
+   */
+  private int getAdjKey(int rawKey) {
+//    if (rawKey == 0 || (rawKey == Integer.MIN_VALUE)) {
+//      throw new ArrayIndexOutOfBoundsException(rawKey);
+//    }
+//    if (keys4 != null) {
+//      return rawKey;
+//    }
+    int adjKey = rawKey - offset;
+    return adjKey - (  (adjKey <= 0) ? 1 : 0);
+  }
+  
+  private void switchTo4byte(int capacity) {
+    // convert to 4 byte because values can't be offset and fit in a short
+    final short[] oldKeys = keys2;
+    isMake4 = true;
+    newTable(capacity);  // make a 4 table. same size
+    for (short adjKey : oldKeys) {
+      if (adjKey != EMPTY2 ) {
+        find4AndAddIfMissing(getRawFromAdjKey(adjKey));
+      }
+    } 
+  }
+   
+  /**
+   * 
+   * @param rawKey -
+   * @return true if this set did not already contain the specified element
+   */
+  @Override
+  public boolean add(int rawKey) {
+    if (rawKey == 0) {
+      throw new IllegalArgumentException("argument must be non-zero");
+    }
+       
+    if (size() == 0) {
+      mostPositive = mostNegative = rawKey;
+    } else {
+      if (rawKey > mostPositive) {
+        mostPositive = rawKey;
+      }
+      if (rawKey < mostNegative) {
+        mostNegative = rawKey;
+      }
+    }
+    
+    if (keys4 != null) {
+      return maybeAdd4(rawKey);      
+      // short keys
+    } else {
+      int adjKey = getAdjKey(rawKey);
+      if (isAdjKeyOutOfRange(adjKey)) {
+        switchTo4byte(getCapacity());
+        return maybeAdd4(rawKey);
+        
+        // key in range
+      } else {
+        return maybeAdd2(adjKey);
+      }
+    }
+  }
+  
+  private boolean maybeAdd2(int adjKey) {
+    boolean wasAdded = find2AndAddIfMissing(adjKey);  
+    if (wasAdded) {
+      incrementSize();
+    }
+    return wasAdded;
+  }
+    
+  private boolean maybeAdd4(int rawKey) {
+    boolean wasAdded = find4AndAddIfMissing(rawKey);
+    if (wasAdded) {
+      incrementSize();
+    }
+    return wasAdded;     
+  }
+  
+  private boolean find4AndAddIfMissing(int rawKey) {
+   
+    int i = findPosition4(rawKey);
+    if (keys4[i] == rawKey) {
+      return false;  // already in table
+    }
+    
+    int c = keys4[i];
+    int saved = c;
+    
+    byte prev_lhb = setLhb(i, initial_probe_lhb);
+    keys4[i] = rawKey;
+    
+    if (saved == 0) {
+      return true;
+    }
+    // robin hood - if stole slot, add that slot back
+    rh_add4(incrPos(i), saved, prev_lhb);   
+    return true;
+  }
+  
+  private void rh_add4(int pos, int v, byte lhb) {
+    while (true) {
+      int c = keys4[pos];
+      int saved = c;  // if empty, is 0
+      
+      byte prev_lhb = setLhb(pos, lhb);
+      keys4[pos] = v;
+      
+      if (saved == 0) {
+        break;
+      }
+      // robin hood - if stole slot, add that slot back
+      v = saved;
+      lhb = prev_lhb;
+      pos = incrPos(pos);
+    }
+  }
+  
+  private boolean find2AndAddIfMissing(int adjKey) {
+    int i = findPositionAdjKey(adjKey);
+ 
+    short c = keys2[i];
+    if (c == (short)adjKey) {
+      return false;  // already in table
+    }
+    
+    short saved = c;  // if empty, is 0
+    
+    byte prev_lhb = setLhb(i, initial_probe_lhb);
+    keys2[i] = (short) adjKey;
+    
+    if (saved == 0) {
+      return true;
+    }
+    // robin hood - if stole slot, add that slot back
+    rh_add2(incrPos(i), saved, prev_lhb);
+    return true;    
+  }
+  
+  private void rh_add2(int pos, short v, byte lhb) {
+    while (true) {
+      short c = keys2[pos];
+      short saved = c;  // if empty, is 0
+      
+      byte prev_lhb = setLhb(pos, lhb);
+      keys2[pos] = v;
+      
+      if (saved == 0) {
+        break;
+      }
+      // robin hood - if stole slot, add that slot back
+      v = saved;
+      lhb = prev_lhb;
+      pos = incrPos(pos);
+    }
+    
+  }
+          
+  @Override
+  protected void shift(int prev, int next) {
+    if (keys4 == null) {
+      keys2[prev] = keys2[next];
+    } else {
+      keys4[prev] = keys4[next];
+    }
+  }
+  
+  @Override
+  protected void setEmpty(int pos) { 
+    if (keys4 == null) {
+      keys2[pos] = 0;
+    } else {
+      keys4[pos] = 0;
+    }
+    
+  }
+  /**
+   * mostPositive and mostNegative are not updated
+   *   for removes.  So these values may be inaccurate,
+   *   but mostPositive is always &gt;= actual most positive,
+   *   and mostNegative is always &lt;= actual most negative.
+   * No conversion from int to short
+   *
+   * Does slot shifting on remove
+   * 
+   * @param rawKey the value to remove
+   * @return true if the key was present
+   */
+  @Override
+  public boolean remove(int rawKey) {
+//    debugValidate();
+    final int pos = findPosition(rawKey);
+    if (isEmpty(pos)) {
+      return false;
+    }
+    
+    if (get(pos) != rawKey) {
+      return false;
+    }
+    remove_common(pos);
+    
+//    //debug
+//    if (size() <= 0) 
+//      System.out.println("debug");
+//    assert size() > 0;
+
+    if (rawKey == mostPositive) {
+      mostPositive --;  // a weak adjustment
+    }
+    if (rawKey == mostNegative) {
+      mostNegative ++;  // a weak adjustment
+    }
+   
+    return true;
+  }    
+    
+  /**
+   * 
+   * @return a value that is &gt;= the actual most positive value in the table.
+   *   it will be == unless a remove operation has removed a most positive value
+   */
+  public int getMostPositive() {
+    return mostPositive;
+  }
+  
+  /**
+   * 
+   * @return a value that is &lt;= the actual least positive value in the table.
+   *   It will be == unless remove operations has removed a least positive value.
+   */
+  public int getMostNegative() {
+    return mostNegative;
+  }
+   
+  public void showHistogram() {
+    if (TUNE) {
+      int sumI = 0;
+      for (int i : histogram) {
+        sumI += i;
+      }
+      
+      System.out.format(
+          "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr of find operations at last table size=%,d%n",
+          loadFactor, maxProbe, sumI);
+      for (int i = 0; i <= maxProbe; i++) {
+        if (i == maxProbe && histogram[i] == 0) {
+          System.out.println("huh?");
+        }
+        System.out.println(i + ": " + histogram[i]);
+      }     
+      
+      if (keys4 == null) {
+        System.out.println("bytes / entry = " + (float) (keys2.length) * 2 / size());
+        System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+          size(),
+          (int) ((keys2.length >>> 1) * loadFactor),
+          (int) (keys2.length * loadFactor));
+      } else {
+        System.out.println("bytes / entry = " + (float) (keys4.length) * 4 / size());
+        System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+          size(),
+          (int) ((keys4.length >>> 1) * loadFactor),
+          (int) (keys4.length * loadFactor));        
+      }
+    }
+  }
+  
+  /**
+   * For iterator use, position is a magic number returned by the internal find
+   * For short keys, the value stored for adjKey == 0 is -1, adjKey == -1 is -2, etc.
+   */
+  @Override
+  public int get(int pos) {
+    final int adjKey;
+    if (keys4 == null) {
+      adjKey = keys2[pos];
+      if (adjKey == 0 ) {
+        return 0;  // null, not present
+      }
+      return getRawFromAdjKey(adjKey);
+    } else {
+      adjKey = keys4[pos];
+      if (adjKey == 0 ) {
+        return 0;  // null, not present
+      }
+      return adjKey;
+    }
+  }
+  
+
+  private class IntHashSetIterator implements IntListIterator {
+
+    private int curPosition;
+
+    private IntHashSetIterator() {
+      this.curPosition = 0;
+    }
+
+    public final boolean hasNext() {
+      curPosition = moveToNextFilled(curPosition);
+      return curPosition < getCapacity();
+    }
+
+    public final int nextNvc() {
+      curPosition = moveToNextFilled(curPosition);
+      int r = get(curPosition);
+      curPosition = moveToNextFilled(curPosition + 1);
+      return r;
+    }
+    
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#hasPrevious()
+     */
+    public boolean hasPrevious() {
+      int prev = moveToPreviousFilled(curPosition - 1);
+      return (prev >= 0);
+    }
+    
+    @Override
+    public int previous() {
+      curPosition = moveToPreviousFilled(curPosition - 1);
+      if (curPosition < 0) {
+        throw new NoSuchElementException();
+      }
+      return get(curPosition);
+    }
+    
+    @Override
+    public int previousNvc() {
+      curPosition = moveToPreviousFilled(curPosition - 1);
+      return get(curPosition);
+    }
+
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
+     */
+    public void moveToEnd() {
+      curPosition = getCapacity() - 1;
+    }
+
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToStart()
+     */
+    public void moveToStart() {
+      curPosition = 0;
+    }
+  } 
+  
+  
+  @Override
+  public IntListIterator iterator() {
+    return new IntHashSetIterator();
+  }
+
+  @Override
+  public int moveToFirst() {
+    return (size() == 0) ? -1 : moveToNextFilled(0);
+  }
+
+  @Override
+  public int moveToLast() {
+    return (size() == 0) ? -1 : moveToPreviousFilled(getCapacity() -1);
+  }
+
+  @Override
+  public int moveToNext(int position) {
+    if (position < 0) {
+      return position;
+    }
+    final int n = moveToNextFilled(position + 1); 
+    return (n >= getCapacity()) ? -1 : n;
+  }
+
+  @Override
+  public int moveToPrevious(int position) {
+    if (position >= getCapacity()) {
+      return -1;
+    }
+    return moveToPreviousFilled(position - 1);
+  }
+
+  @Override
+  public boolean isValid(int position) {
+    return (position >= 0) && (position < getCapacity());
+  }
+
+  @Override
+  public void bulkAddTo(IntVector v) {
+    if (null == keys4) {
+      for (int k : keys2) {
+        if (k != 0) {
+          v.add(getRawFromAdjKey(k));
+        }
+      }
+    } else {
+      for (int k : keys4) {
+        if (k != 0) {
+          v.add(k);
+        }
+      }
+    }
+  }
+
+  @Override
+  public int[] toIntArray() {
+    final int s = size();
+    if (s == 0) {
+      return Constants.EMPTY_INT_ARRAY;
+    }
+    final int[] r = new int[size()];
+    int pos = moveToFirst();
+    for (int i = 0; i < r.length; i ++) {
+      r[i] = get(pos);
+      pos = moveToNextFilled(pos + 1);
+    }
+    return r;
+  }
+
+  /* (non-Javadoc)
+   * @see java.lang.Object#toString()
+   */
+  @Override
+  public String toString() {
+    return String
+        .format(
+            "IntHashSet [loadFactor=%s, initialCapacity=%s, sizeWhichTriggersExpansion=%s, size=%s, offset=%s%n keys4=%s%n keys2=%s%n secondTimeShrinkable=%s, mostPositive=%s, mostNegative=%s]",
+            loadFactor, initialCapacity, sizeWhichTriggersExpansion, size(), offset,
+            Arrays.toString(keys4), Arrays.toString(keys2), secondTimeShrinkable, mostPositive,
+            mostNegative);
+  }
+  
+  // for testing
+  boolean isShortHashSet() {
+    return keys2 != null;
+  }
+  
+  int getOffset() {
+    return offset;
+  }
+
+  @Override
+  protected boolean is_valid_key(int pos) {
+    if (keys4 == null) {
+      return keys2[pos] != 0;
+    }
+    return keys4[pos] != 0;
+  }
+
+  @Override
+  protected int keys_length() {
+    return (keys4 == null) ? keys2.length : keys4.length;
+  }
+
+  @Override
+  protected void newKeysAndValues(int capacity) {
+    if (isMake4) {
+      keys4 = new int[capacity];
+      keys2 = null;
+    } else {
+      keys2 = new short[capacity];
+      keys4 = null;
+    }
+  }
+
+  @Override
+  protected void clearKeysAndValues() {
+    if (keys4 == null) {
+      Arrays.fill(keys2,  (short)0);
+    } else {
+      Arrays.fill(keys4, 0);
+    }  
+    resetTable();
+  }
+
+  @Override 
+  protected void expand_table() {
+    int old_capacity = keys_length();
+    int new_capacity = old_capacity * 2;
+    
+    if (keys4 == null) {
+      if (new_capacity >= 256 * 256) { 
+        // switch to 4
+        if (TUNE) {System.out.println("Switching to 4 byte keys");}
+        switchTo4byte(new_capacity);
+      } else {
+        final short[] oldKeys = keys2;
+        newTable(new_capacity);
+        for (int v :oldKeys) {
+          if (v != 0) {
+            find2AndAddIfMissing(v);
+          }
+        }
+      }      
+    } else {
+      // keys4
+      final int[] oldKeys = keys4;
+      newTable(new_capacity);
+      for (int v :oldKeys) {
+        if (v != 0) {
+          find4AndAddIfMissing(v);
+        }
+      }      
+    }
+  } 
+  
+}
diff --git a/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java b/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java
new file mode 100644
index 0000000..d70c7d3
--- /dev/null
+++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java
@@ -0,0 +1,520 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.lang.reflect.Array;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.uima.cas.FeatureStructure;
+
+/**
+ * A set of Objects of type T 
+
+ * This impl is for use in a single thread case only, when table is being updated.
+ * Multiple reader threads are OK if there's no writing.
+ * 
+ * Supports shrinking (reallocating the big table)
+ * 
+ * Uses robin hood method, with 
+ *   incrementing collision resolution, and shift backwards on remove, and 1 byte lower-hash-bits aux table 
+ *   for locality of ref.
+ */
+public class ObjHashSetRh<T> extends Common_hash_support_rh implements Set<T> {
+  
+//  public static final float DEFAULT_LOAD_FACTOR = 0.66F;
+
+  private final Class<T> clazz;  // for reifying the T type, resizing table, toArray operations
+   
+//  private final float loadFactor = DEFAULT_LOAD_FACTOR;  
+  
+//  private final int initialCapacity; 
+//
+//  private int histogram [];
+//  private int maxProbe = 0;
+//
+//  private int sizeWhichTriggersExpansion;
+//  private int size; // number of elements in the table
+//  private int nbrRemoved; // number of removed elements (coded as removed)
+  
+  // the actual Object table, operated as a hashtable
+  private T [] keys;
+//  final private T [] emptyKeyArray;  
+
+//  private boolean secondTimeShrinkable = false;
+  
+//  private int modificationCount = 0; // not currently used
+  
+  public ObjHashSetRh(Class<T> clazz) {
+    this(12, clazz);  // default initial size
+  }
+    
+  /**
+   * @param initialCapacity - you can add this many before expansion
+   * @param clazz - a superclass of the stored items
+   * @param removedMarker - a unique value never stored in the table, used to mark removed items
+   */
+  public ObjHashSetRh(int initialCapacity, Class<T> clazz) {
+    super(initialCapacity);
+    this.clazz = clazz;
+    newTable(initialCapacity);
+  }
+  
+  /**
+   * Copy constructor
+   * @param ohs -
+   */
+  public ObjHashSetRh(ObjHashSetRh<T> ohs) {
+    super(ohs);
+    this.clazz = ohs.clazz;
+    this.keys = Arrays.copyOf(ohs.keys, ohs.keys.length);
+  }
+  
+
+  public ObjHashSetRh(ObjHashSetRh<T> ohs, boolean readOnly) {
+    this(ohs);
+    if (!readOnly) Misc.internalError();
+  }
+
+
+  /** 
+  * returns a position in the key/value table
+  *   if the key is not found, then the position will be to the
+  *   place where the key would be put upon adding (without reusing REMOVED, and the 
+  *   current internal value of keys[position] would be 0.
+  *   
+  *   if the key is found, then keys[position] == key
+  * @param obj the object to find
+  * @return the probeAddr in keys array - might reference a slot holding null, or the key value if found
+  */
+  private int findPosition(final T key) {
+    if (key == null) {
+      throw new IllegalArgumentException("null is an invalid key");
+    }
+    
+    return findPosition(
+        
+        // key hash
+        Misc.hashInt(key.hashCode()),
+        
+        // is_eq
+        i -> keys[i].equals(key));
+  }
+  
+
+  @Override
+  public boolean contains(Object obj) {  // arg must be Object to fit Collection API
+    return (clazz.isAssignableFrom(obj.getClass())) ? (find((T) obj) != -1) : false;
+  }
+  
+  /**
+   * @param obj the object to find in the table (if it is there)
+   * @return the position of obj in the table, or -1 if not in the table
+   */
+  public int find(T obj) {
+    if (obj == null || size() == 0) {
+      return -1;
+    }
+    
+    final int pos = findPosition(obj);
+    return obj.equals(keys[pos]) ? pos : -1;  // keys[pos] can be null
+  }
+      
+  /**
+   * 
+   * @param obj - the object to add
+   * @return true if this set did not already contain the specified element
+   */
+  @Override
+  public boolean add(T obj) {           
+    final int i = findPosition(obj);
+    T c = keys[i];
+    if (obj.equals(c)) { // keys[i] may be null
+      return false;  // false if already present
+    }
+    
+    // i is ref to empty or non-empty (but not equal) slot which should be stolen
+    add_new(obj, c, i);
+    incrementSize();
+    return true;
+  }
+  
+  private void add_new(T obj, T saved, int i) {
+    keys[i] = obj;
+    byte prev_lhb = setLhb(i, initial_probe_lhb);
+    
+    if (saved != null) {
+      rh_add(incrPos(i), saved, prev_lhb);    
+    }    
+  }
+
+  private void rh_add(int pos,T v, byte lhb) {
+    while (true) {
+      T c = keys[pos];
+      T saved = c;  // if empty, is 0
+      
+      byte prev_lhb = setLhb(pos, lhb);
+      keys[pos] = v;
+      
+      if (saved == null) {
+        break;
+      }
+      // robin hood - if stole slot, add that slot back
+      v = saved;
+      lhb = prev_lhb;
+      pos = incrPos(pos);
+    } 
+  }
+  
+  @Override
+  protected void shift(int prev, int next) {
+      keys[prev] = keys[next];
+  }
+  
+  @Override
+  protected void setEmpty(int pos) { 
+    keys[pos] = null;
+  }
+    
+  
+  /**
+   * Using robin hood shifting for removed item
+   * @param rawKey the value to remove
+   * @return true if the key was present
+   */
+  @Override
+  public boolean remove(Object rawKey) {
+    if (rawKey == null) {
+      return false;
+    }
+    
+    final int pos = findPosition((T) rawKey);  // null or equal or collision obj
+    T c = keys[pos];
+    if (c == null || ! c.equals(rawKey)) {
+      return false;
+    }
+    
+    remove_common(pos);
+    return true;
+  } 
+  
+  private void removeAtPosition(int pos) {
+    remove_common(pos);
+  }
+  
+
+//  /**
+//   * @see Set#size()
+//   */
+//  @Override
+//  public int size() {
+//    return size;
+//  }
+  
+//  public void showHistogram() {
+//    if (TUNE) {
+//      int sumI = 0;
+//      for (int i : histogram) {
+//        sumI += i;
+//      }
+//      
+//      System.out.format(
+//          "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr of find operations at last table size=%,d%n",
+//          loadFactor, maxProbe, sumI);
+//      for (int i = 0; i <= maxProbe; i++) {
+//        if (i == maxProbe && histogram[i] == 0) {
+//          System.out.println("huh?");
+//        }
+//        System.out.println(i + ": " + histogram[i]);
+//      }     
+//      
+//      System.out.println("bytes / entry = " + (float) (keys.length) * 4 / size());
+//      System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+//        size(),
+//        (int) ((keys.length >>> 1) * loadFactor),
+//        (int) (keys.length * loadFactor));        
+//    }
+//  }
+  
+  /**
+   */
+  /**
+   * For iterator use
+   * @param index - a magic number returned by the internal find
+   * @return the T at that spot, or null if nothing there 
+   */
+  public T get(int index) {
+    T obj = keys[index];
+    if (obj == null) {
+      return null;  // null, not present
+    }
+    return obj;
+  }
+  
+////  /**
+////   * advance pos until it points to a non 0 or is 1 past end
+////   * @param pos -
+////   * @return updated pos
+////   */
+////  public int moveToNextFilled(int pos) {
+//////    if (pos < 0) {
+//////      pos = 0;
+//////    }
+////    
+////    final int max = getCapacity();
+////    while (true) {
+////      if (pos >= max) {
+////        return pos;
+////      }
+////      T v = keys[pos];
+////      if (v != null && v != removedMarker) {
+////        return pos;
+////      }
+////      pos ++;
+////    }
+////  }
+//   
+//  /**
+//   * decrement pos until it points to a non 0 or is -1
+//   * @param pos -
+//   * @return updated pos
+//   */
+//  public int moveToPreviousFilled(int pos) {
+//    final int max = getCapacity();
+//    if (pos > max) {
+//      pos = max - 1;
+//    }
+//    
+//    while (true) {
+//      if (pos < 0) {
+//        return pos;
+//      }
+//      T v = keys[pos];
+//      if (v != null && v != removedMarker) {
+//        return pos;
+//      }
+//      pos --;
+//    }
+//  }
+
+  private class ObjHashSetIterator implements Iterator<T> {
+
+    /**
+     * Keep this always pointing to a non-0 entry, or
+     * if not valid, outside the range
+     */
+    protected int curPosition;
+
+    private ObjHashSetIterator() {
+      this.curPosition = moveToNextFilled(0);
+    }
+
+    @Override
+    public final boolean hasNext() {
+      return curPosition < getCapacity();
+    }
+
+    @Override
+    public final T next() {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      T r = get(curPosition);
+      curPosition = moveToNextFilled(curPosition + 1);
+      return r;
+    }
+
+    @Override
+    public void remove() {
+      int pos = moveToPreviousFilled(curPosition - 1);
+      if (pos >= 0) {
+        removeAtPosition(pos);
+      }
+    }   
+  }
+  
+    
+  @Override
+  public Iterator<T> iterator() {
+    return new ObjHashSetIterator();
+  }
+  
+  private int moveToFirst() {
+    return (size() == 0) ? -1 : moveToNextFilled(0);
+  }
+
+//  private int moveToLast() {
+//    return (size() == 0) ? -1 : moveToPreviousFilled(getCapacity() -1);
+//  }
+
+//  private int moveToNext(int position) {
+//    if (position < 0) {
+//      return position;
+//    }
+//    final int n = moveToNextFilled(position + 1); 
+//    return (n >= getCapacity()) ? -1 : n;
+//  }
+//
+//  private int moveToPrevious(int position) {
+//    if (position >= getCapacity()) {
+//      return -1;
+//    }
+//    return moveToPreviousFilled(position - 1);
+//  }
+
+//  public boolean isValid(int position) {
+//    return (position >= 0) && (position < getCapacity());
+//  }
+  
+  /**
+   * if the fs is in the set, the iterator should return it.
+   * if not, return -1 (makes iterator invalid)
+   * @param fs position to this fs
+   * @return the index if present, otherwise -1;
+   */
+  public int moveTo(FeatureStructure fs) {
+    if (clazz.isAssignableFrom(fs.getClass())) {
+      int pos = find((T)fs);
+      if (pos >= 0) {
+        return pos;
+      }
+    }
+    return -1; 
+  }
+
+  @Override
+  public <T2> T2[] toArray(T2[] a) {
+    final int s = size();
+    if (s == 0) {
+      if (a.length >= 1) { 
+        a[0] = null;  // part of the contract of toArray, where the array a size is > 
+      }
+      return a;
+    }
+    
+    final T2[] r = (a.length >= s)? a : (T2[]) Array.newInstance(a.getClass(), s);
+    int pos = moveToFirst();
+    for (int i = 0; i < s; i ++) {
+      r[i] = (T2) get(pos);
+      pos = moveToNextFilled(pos + 1);
+    }
+    if (a.length > s) {
+      r[s] = null; // part of the contract of toArray, where the array a size is > 
+    }
+    return r;
+  }
+
+  @Override
+  public T[] toArray() {
+    return toArray((T[]) Array.newInstance(clazz, size()));
+  }
+
+  /* (non-Javadoc)
+   * @see java.lang.Object#toString()
+   */
+  @Override
+  public String toString() {
+    return String
+        .format(
+            "%s [loadFactor=%s, initialCapacity=%s, sizeWhichTriggersExpansion=%s, size=%s, secondTimeShrinkable=%s%n keys=%s]",
+            this.getClass().getName(), loadFactor, initialCapacity, sizeWhichTriggersExpansion, size(), secondTimeShrinkable, 
+            Arrays.toString(keys));
+  }
+
+  // Collection methods
+  @Override
+  public boolean isEmpty() {
+    return size() == 0;
+  }
+
+  @Override
+  public boolean containsAll(Collection<?> c) {
+    c.stream().allMatch(item -> contains(item));
+    return false;
+  }
+
+  @Override
+  public boolean addAll(Collection<? extends T> c) {
+    boolean[] anyChanged = {false};
+    c.stream().forEach(item -> anyChanged[0] |= add(item));
+    return anyChanged[0];
+  }
+
+  @Override
+  public boolean removeAll(Collection<?> c) {
+    boolean[] anyChanged = {false};
+    c.stream().forEach(item -> anyChanged[0] |= remove(item));
+    return anyChanged[0];
+  }
+
+  @Override
+  public boolean retainAll(Collection<?> c) {
+    boolean anyChanged = false;
+    Iterator<T> it = iterator();
+    while (it.hasNext()) {
+      T v = it.next();
+      if (!c.contains(v)) {
+        anyChanged = true;
+        it.remove();
+      }
+    }
+    return anyChanged;
+  }
+
+  @Override
+  protected boolean is_valid_key(int pos) {
+    return keys[pos] != null;
+  }
+
+  @Override
+  protected int keys_length() {
+    return keys.length;
+  }
+
+  @Override
+  protected void newKeysAndValues(int capacity) {
+    keys = (T[]) Array.newInstance(clazz, capacity);
+  }
+
+  @Override
+  protected void clearKeysAndValues() {
+    Arrays.fill(keys, null);   
+  }
+
+  @Override
+  protected void expand_table() {
+    int old_capacity = keys_length();
+    int new_capacity = old_capacity * 2;
+    
+    final T[] oldKeys = keys;
+    newTable(new_capacity);
+    for (T v : oldKeys) {
+      if (v != null) { 
+        int pos = findPosition_new( Misc.hashInt(v.hashCode()));
+        add_new(v, keys[pos], pos);
+      }
+    }      
+  }
+
+  
+}
diff --git a/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
new file mode 100644
index 0000000..5bb5286
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Random;
+
+import org.apache.uima.internal.util.rb_trees.IntArrayRBT;
+
+import junit.framework.TestCase;
+
+public class IntHashSetPerfTestRh extends TestCase {
+  /**
+   * Set to false to run the performance test
+   * 
+   * Tests both IntHashSet and IntBitSet
+   */
+  final boolean SKIP = false;
+  
+  static int cacheLoadSize;
+  
+  static long seed = 3737463135938899369L;
+//      new Random().nextLong();
+  static {
+    System.out.println("Random seed for IntHashSetPerfTest: "  + seed);
+  }
+  Random r = new Random(seed);
+
+//  Set<Integer> keys = new HashSet<Integer>(1000);
+  
+  int dmv = 0;
+  
+  IntHashSetRh m2;
+  IntArrayRBT m1;
+  IntBitSet m3;
+  
+  final int[] keys10000 = new int[511111];
+  int k10ki = 0;
+  
+  
+  public void testPerf() {
+    if (SKIP) return;
+    m1 = new IntArrayRBT(16);
+    m2 = new IntHashSetRh(16);
+    m3 = new IntBitSet(16);
+     
+    for (int i = 0; i < keys10000.length; i++) {
+      int k = r.nextInt(511110);
+     
+      keys10000[i] = k + 1;
+    }
+
+    System.out.format("%n%n W A R M U P %n%n");
+    cacheLoadSize = 0;
+    warmup(m1);
+    warmup(m2);
+    
+    System.out.format("%n%n Time 100 %n%n");
+    timelp(100);
+    System.out.format("%n%n Time 1000 %n%n");
+    timelp(1000);
+    System.out.format("%n%n Time 10000 %n%n");
+    timelp(10000);
+    System.out.format("%n%n Time 100000 %n%n");
+    timelp(100000);
+    cacheLoadSize = 0; // 1 * 256 * 1;
+    System.out.format("%n%n Time 100000 %n%n");
+    timelp(100000);
+    
+    System.out.format("%n%n Time 500000 %n%n");
+    timelp(500000);
+
+    System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+    timelp(500000);
+    System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+    timelp(500000);
+    System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+    timelp(500000);
+
+    
+    System.out.println(dmv);
+  }
+  private void time2(int n) {
+//    float f1 = time(m1, n);
+    float f2 = time(m2, n);
+    float f3 = time(m3, n);
+    System.out.format(" ratio "
+//        + "RBT/hash = %.3f   RTB/bitset = %.3f  "
+        + "hash/bitset = %.3f%n", 
+//        f1/f2,  f1/f3, 
+        f2/f3);   
+  }
+  
+  private void timelp(int n) {
+    time2(n);
+    time2(n);
+    time2(n);
+  }
+
+  private void warmup(Object m) {
+    for (int i = 0; i < 500; i++) {
+      inner(m,true, 1000) ; // warm up
+    }
+  }
+  
+  private float time(Object m, int ss) {
+    long start = System.nanoTime();
+    for (int i = 0; i < 500; i++) {
+      inner(m,false, ss);
+    }
+    float t = (System.nanoTime() - start) / 1000000.0F;
+    System.out.format("time for %,d:  %s is %.3f milliseconds %n", ss,
+        m.getClass().getSimpleName(),
+        t);
+    return t;
+  }
+  
+  private int nextKey() {
+    int r = keys10000[k10ki++];
+    if (k10ki >= keys10000.length) {
+      k10ki = 0;
+    }
+    return r;
+  }
+  
+  private void inner(Object m, boolean check, int ss) {
+    CS cs = new CS(m);     
+
+    for (int i = 0; i < ss; i++) {
+      
+      int k = keys10000[i];
+//      System.out.print(" " + k);
+//      if (i %100 == 0) System.out.println("");
+//      keys.add(k);
+      cs.add(k);
+      cacheLoad(i);
+      if (check) {
+        assertTrue(cs.contains(k));
+      }
+    }
+    for (int i = 0; i < ss; i++) {
+      boolean v = cs.contains(keys10000[i]); 
+      if (!v) {
+        throw new RuntimeException("never happen");
+      }
+      dmv += 1;
+      cacheLoad(i);
+    }
+    cs.clear();
+    
+
+//    for (int k : keys) {     
+//      assertEquals(10000 + k, (m instanceof IntHashSetRh) ? 
+//          ((IntHashSetRh)m).get(k) :
+//          ((IntArrayRBT)m).getMostlyClose(k));
+//    }
+
+  }
+  
+  static class CS {
+    final Object set;
+    
+    CS(Object set) {
+      this.set = set;
+    }
+    
+    boolean contains(int i) {
+      return (set instanceof IntArrayRBT) ? ((IntArrayRBT)set).contains(i) :
+             (set instanceof IntHashSetRh)  ? ((IntHashSetRh) set).contains(i) :
+                                            ((IntBitSet)  set).contains(i);
+    }
+    
+    void add(int i) {
+      if (set instanceof IntArrayRBT) {
+        ((IntArrayRBT)set).add(i);
+      } else if (set instanceof IntHashSetRh) {
+        ((IntHashSetRh)set).add(i);
+      } else {
+        ((IntBitSet)set).add(i);
+      }    
+    }
+    
+    void clear() {
+      if (set instanceof IntArrayRBT) {
+        ((IntArrayRBT)set).clear();
+      } else if (set instanceof IntHashSetRh) {
+        ((IntHashSetRh)set).clear();
+      } else {
+        ((IntBitSet)set).clear();
+      }    
+    }
+
+  }
+  
+  void cacheLoad(int i) {
+    if (cacheLoadSize > 0) {
+      int[] cl = new int[cacheLoadSize];
+      if (i != 100000) {
+        cl = null;
+      }
+    }
+  }
+}
diff --git a/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java
new file mode 100644
index 0000000..09d3365
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+
+public class IntHashSetTestRh extends TestCase {
+  
+  IntHashSetRh ihs;
+  
+  Random random;
+  
+  public void setUp() {
+    ihs = new IntHashSetRh();
+  }
+  
+  public void testBasic() {
+    
+    ihs.add(15);
+    ihs.add(188);
+    int[] sv = getSortedValues(ihs);
+    assertEquals(2, sv.length);
+    assertEquals(15, sv[0]);
+    assertEquals(188, sv[1]);
+    assertEquals(15, ihs.getMostNegative());
+    assertEquals(188, ihs.getMostPositive());
+    
+    // test most positive / negative
+    ihs.clear();
+    ihs.add(189);
+    assertEquals(189, ihs.getMostNegative());
+    assertEquals(189, ihs.getMostPositive());
+    ihs.add(1000);
+    ihs.add(-1000);
+    assertEquals(1000, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.add(500);
+    ihs.add(-500);
+    assertEquals(1000, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.remove(1000);
+    assertEquals(999, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.add(1001);    
+    assertEquals(1001, ihs.getMostPositive());
+    sv = getSortedValues(ihs);
+    assertTrue(Arrays.equals(sv, new int[]{-1000, -500, 189, 500, 1001}));
+  }
+  
+  public void testSwitching224() {
+    final int OS = 100000;
+    ihs = new IntHashSetRh(16, OS);
+    ihs.add(OS - 1);
+    ihs.add(OS);
+    ihs.add(OS + 1);
+    int[] sv = getSortedValues(ihs);
+    assertTrue(Arrays.equals(sv, new int[]{99999, 100000, 100001 }));
+    ihs.add(OS - 32767);
+    sv = getSortedValues(ihs);
+    assertTrue(Arrays.equals(sv, new int[]{OS - 32767, 99999, 100000, 100001}));
+    ihs.add(OS - 32768);
+    sv = getSortedValues(ihs);
+    assertTrue(Arrays.equals(sv, new int[]{OS - 32768, OS - 32767, 99999, 100000, 100001}));
+    
+  }
+  
+  private int[] getSortedValues(IntHashSetRh s) {
+    IntListIterator it = s.iterator();
+    int[] r = new int[s.size()];
+    int i = 0;
+    while (it.hasNext()) {
+      r[i++] = it.nextNvc();
+    }
+    Arrays.sort(r);
+    return r;
+  }
+  
+  public void testContains() {
+    ihs.add(1188);
+    ihs.add(1040);
+    assertTrue(ihs.contains(1188));
+    assertTrue(ihs.contains(1040));
+    assertFalse(ihs.contains(1));
+    assertFalse(ihs.contains(99));  
+  }
+  
+//  public void testTableSpace() {
+//    assertEquals(32, IntHashSetRh.tableSpace(19, 0.6f));    // 19 / .6 = 31.xxx, round to 32
+//    assertEquals(64, IntHashSetRh.tableSpace(21, 0.6f));
+//    assertEquals(32, ihs.tableSpace(21));
+//  }
+  
+  public void testWontExpand() {
+    ihs = new IntHashSetRh(21);
+    assertEquals(16, ihs.getSpaceUsedInWords());
+    assertTrue(ihs.wontExpand(20));
+    assertFalse(ihs.wontExpand(21));
+  }
+  
+  public void testExpandNpe() {
+    ihs.add(15);
+    ihs.add(150000);  // makes 4 byte table entries
+    
+    for (int i = 1; i < 256; i++) {  // 0 is invalid key
+      ihs.add(i);  // causes resize, check no NPE etc thrown.
+    }
+  }
+  
+  public void testAddIntoRemovedSlot() {
+    long seed = // -5748775656590176364L;
+        new Random().nextLong();
+    System.out.println("Random seed for testAddIntoRemovedSlot in " + this.getClass().getName() + ": "  + seed);
+    random = new Random(seed);
+
+    for (int i = 1; i < 100; i++) {
+      ihs.add(i);
+      assertEquals(i, ihs.size());
+    }
+    
+    assertEquals(99, ihs.size());
+    
+    /** Test with 2 byte numbers */
+    checkRemovedReuse(true);
+    
+    ihs = new IntHashSetRh();
+    for (int i = 1; i < 99; i++) {
+      ihs.add(i);
+    }
+    ihs.add(100000);  // force 4 byte
+    checkRemovedReuse(false);
+  }
+  
+  private void checkRemovedReuse(boolean is2) {
+    assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+    for (int i = 0; i < 100000; i++) {
+//      if (i % 25000 == 0) System.out.println(i);
+      int v = 1 + random.nextInt(100 + (i % 30000)); // random between 1 and 30,101
+      int sz = ihs.size();
+      boolean wasRemoved = ihs.remove(v);
+      assertEquals(sz - (wasRemoved ? 1 : 0), ihs.size());
+      // debug
+      if (ihs.contains(v)) {
+        System.out.println("debug");
+      }
+      assertTrue(!(ihs.contains(v)));
+      v = 1 + random.nextInt(100 + (i % 30000));
+      sz = ihs.size();
+      boolean wasAdded = ihs.add(v);
+      assertEquals(sz + (wasAdded ? 1 : 0), ihs.size());
+      assertTrue(ihs.contains(v));
+    }
+    assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 16384 : 32768) );
+    
+    //  32,768, 16,384, 8,192, 4096, 2048, 1024, 512, 256
+    // for 2 byte storage, is2 = true, and expected is: i / 2
+    // for 4 byte storage, is2 = false, and expected is i
+    
+    ihs.clear(); // doesn't set 2nd time because size + removed > 1/2 the capacity
+    
+    for (int i = 32768; i > 128; i = i / 2) {
+      ihs.clear(); // sets 2nd time shrinkable
+      assertTrue(ihs.getSpaceUsedInWords() == (is2 ? i/2 : i));
+      ihs.clear();  // shrinks
+      assertTrue(ihs.getSpaceUsedInWords() == (is2 ? i/4 : i/2));
+    }
+//    ihs.clear();
+//    
+    assertTrue(ihs.getSpaceUsedInWords() == (is2 ? 64: 128));
+
+    // table size should be 128, adding 100 items should cause expansion (84 == .66 * 128)
+    for (int i = 1; i < ((is2) ? 100 : 99); i++) {
+      ihs.add(i);
+    }
+    if (!is2) {
+      ihs.add(100000);
+    }
+    
+    assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+    for (int i = 0; i < 1000; i++) {
+      int v = 1 + random.nextInt(100);
+      ihs.remove(v);
+      assertTrue(!(ihs.contains(v)));
+      ihs.add(v);
+      assertTrue(ihs.contains(v));
+    }
+    
+    assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+    
+  }
+ 
+  public void testRandom() {
+    int countAdd = 0;
+    int dupsA = 0;
+    int notPres = 0;
+    int countRmv = 0;
+    
+    long seed = 
+        new Random().nextLong();
+    System.out.println("Random seed for testRandom in " + this.getClass().getName() + ": "  + seed);
+    random = new Random(seed);
+    
+    for (int i = 1; i < 1024 * 1024; i++) {
+      int k = i & (1024 * 256) - 1;
+      if (k == 0) continue;
+      if (random.nextInt(3) > 0) {
+        int sz = ihs.size();
+        if (ihs.add(k)) {
+          countAdd ++;
+          assertEquals(sz + 1, ihs.size());
+        } else {
+          dupsA ++;
+        }
+        
+      } else {
+        int sz = ihs.size();
+        if (ihs.remove(k)) {
+          countRmv ++;
+          assertEquals(sz - 1, ihs.size());
+        } else {
+          notPres ++;
+        }
+        
+      }
+    }
+    
+    System.out.format("added: %,d dups: %,d rmvd: %,d notPres: %,d, size: %d%n", countAdd, dupsA, countRmv, notPres, ihs.size());
+    assertEquals(countAdd - countRmv, ihs.size() );
+  }
+}
diff --git a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java
new file mode 100644
index 0000000..e8aefc2
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+
+public class ObjHashSetTestRh extends TestCase {
+  
+  ObjHashSetRh<Integer> ihs;
+  
+  Random random;
+  
+  public void setUp() {
+    ihs = new ObjHashSetRh<>(Integer.class);
+  }
+  
+  public void testBasic() {
+    
+    ihs.add(15);
+    ihs.add(188);
+    Integer[] sv = ihs.toArray();
+    assertEquals(2, sv.length);
+    assertEquals(15 + 188, sv[0] + sv[1]);
+    
+    // test most positive / negative
+    ihs.clear();
+    ihs.add(189);
+    ihs.add(1000);
+    ihs.add(-1000);
+    ihs.add(500);
+    ihs.add(-500);
+    ihs.remove(1000);
+    ihs.add(1001);    
+  }
+  
+  public void testContains() {
+    ihs.add(1188);
+    ihs.add(1040);
+    assertTrue(ihs.contains(1188));
+    assertTrue(ihs.contains(1040));
+    assertFalse(ihs.contains(1));
+    assertFalse(ihs.contains(99));  
+  }
+  
+//  public void testTableSpace() {
+//    assertEquals(32, ObjHashSetRh.tableSpace(19, 0.6f));    // 19 / .6 = 31.xxx, round to 32
+//    assertEquals(64, ObjHashSetRh.tableSpace(21, 0.6f));
+//    assertEquals(32, ihs.tableSpace(21));
+//  }
+    
+  public void testExpandNpe() {
+    ihs.add(15);
+    ihs.add(150000);  // makes 4 byte table entries
+    
+    for (int i = 1; i < 256; i++) {  // 0 is invalid key
+      ihs.add(i);  // causes resize, check no NPE etc thrown.
+    }
+  }
+  
+  public void testAddIntoRemovedSlot() {
+    long seed = // -6401324561844760524L;
+        new Random().nextLong();
+    System.out.println("Random seed for testAddIntoRemovedSlot in " + this.getClass().getName() + ": "  + seed);
+    random = new Random(seed);
+
+    for (int i = 1; i < 100; i++) {
+      ihs.add(i);
+      assertEquals(i, ihs.size());
+    }
+    
+    assertEquals(99, ihs.size());
+    
+    /** Test with 2 byte numbers */
+    checkRemovedReuse(true);
+    
+    ihs = new ObjHashSetRh<>(Integer.class);
+    for (int i = 1; i < 99; i++) {
+      ihs.add(i);
+    }
+    ihs.add(100000);  // force 4 byte
+    checkRemovedReuse(false);
+  }
+  
+  private void checkRemovedReuse(boolean is2) {
+    for (int i = 0; i < 100000; i++) {
+      int v = 1 + random.nextInt(100 + (i % 30000)); // random between 1 and 30,101
+      int sz = ihs.size();
+      boolean wasRemoved = ihs.remove(v);
+      assertEquals(sz - (wasRemoved ? 1 : 0), ihs.size());
+      assertTrue(!(ihs.contains(v)));
+      v = 1 + random.nextInt(100 + (i % 30000));
+      sz = ihs.size();
+      boolean wasAdded = ihs.add(v);
+      assertEquals(sz + (wasAdded ? 1 : 0), ihs.size());
+      assertTrue(ihs.contains(v));
+    }
+    
+    ihs.clear(); // doesn't set 2nd time because size + removed > 1/2 the capacity
+    
+    for (int i = 32768; i > 128; i = i / 2) {
+      ihs.clear(); // sets 2nd time shrinkable
+      assertEquals(i, ihs.getCapacity());
+      ihs.clear();  // shrinks
+      assertEquals(i/2, ihs.getCapacity());
+    }
+//    ihs.clear();
+//    
+
+    // table size should be 128, adding 100 items should cause expansion (84 == .66 * 128)
+    for (int i = 1; i < 100; i++) {
+      ihs.add(i);
+    }
+    
+    assertEquals(256, ihs.getCapacity());
+    for (int i = 0; i < 1000; i++) {
+      int v = 1 + random.nextInt(100);
+      ihs.remove(v);
+      assertTrue(!(ihs.contains(v)));
+      ihs.add(v);
+      assertTrue(ihs.contains(v));
+    }
+    
+    assertEquals(256, ihs.getCapacity());
+    
+  }
+ 
+  public void testRandom() {
+    int countAdd = 0;
+    int dupsA = 0;
+    int notPres = 0;
+    int countRmv = 0;
+    
+    long seed = 
+        new Random().nextLong();
+    System.out.println("Random seed for testRandom in " + this.getClass().getName() + ": "  + seed);
+    random = new Random(seed);
+    
+    for (int i = 1; i < 1024 * 1024; i++) {
+      int k = i & (1024 * 256) - 1;
+      if (k == 0) continue;
+      if (random.nextInt(3) > 0) {
+        int sz = ihs.size();
+        if (ihs.add(k)) {
+          countAdd ++;
+          assertEquals(sz + 1, ihs.size());
+        } else {
+          dupsA ++;
+        }
+        
+      } else {
+        int sz = ihs.size();
+        if (ihs.remove(k)) {
+          countRmv ++;
+          assertEquals(sz - 1, ihs.size());
+        } else {
+          notPres ++;
+        }
+        
+      }
+    }
+    
+    System.out.format("added: %,d dups: %,d rmvd: %,d notPres: %,d, size: %d%n", countAdd, dupsA, countRmv, notPres, ihs.size());
+    assertEquals(countAdd - countRmv, ihs.size() );
+  }
+}