You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2014/11/03 23:15:42 UTC

svn commit: r1636458 [1/2] - in /uima/uimaj/trunk/uimaj-core/src: main/java/org/apache/uima/cas/impl/ main/java/org/apache/uima/internal/util/ test/java/org/apache/uima/internal/util/ test/java/org/apache/uima/jcas/test/

Author: schor
Date: Mon Nov  3 22:15:41 2014
New Revision: 1636458

URL: http://svn.apache.org/r1636458
Log:
[UIMA-4061] PositiveIntSet enhancement, use for FSBagIndex.

Added:
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet_impl.java
      - copied, changed from r1634141, uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
Modified:
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSBagIndex.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntSet.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntVector.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
    uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
    uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
    uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java
    uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/jcas/test/JCasTest.java

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSBagIndex.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSBagIndex.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSBagIndex.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSBagIndex.java Mon Nov  3 22:15:41 2014
@@ -28,12 +28,16 @@ import org.apache.uima.internal.util.Com
 import org.apache.uima.internal.util.IntComparator;
 import org.apache.uima.internal.util.IntPointerIterator;
 import org.apache.uima.internal.util.IntVector;
+import org.apache.uima.internal.util.PositiveIntSet;
+import org.apache.uima.internal.util.PositiveIntSet_impl;
 
 /**
  * Used for UIMA FS Bag Indexes
  * Uses IntVector to hold values of FSs
  */
 public class FSBagIndex extends FSLeafIndexImpl {
+  
+  private static boolean USE_POSITIVE_INT_SET = false;
 
   private class IntVectorIterator implements ComparableIntPointerIterator, LowLevelIterator {
 
@@ -46,7 +50,7 @@ public class FSBagIndex extends FSLeafIn
     private int[] detectIllegalIndexUpdates; // shared copy with Index Repository
 
     private int typeCode;
-
+    
     public boolean isConcurrentModification() {
       return this.modificationSnapshot != this.detectIllegalIndexUpdates[this.typeCode];
     }
@@ -57,7 +61,7 @@ public class FSBagIndex extends FSLeafIn
 
     private IntVectorIterator() {
       super();
-      this.itPos = 0;
+      moveToFirst(); 
     }
 
     private IntVectorIterator(IntComparator comp) {
@@ -66,30 +70,44 @@ public class FSBagIndex extends FSLeafIn
     }
 
     public boolean isValid() {
-      return ((this.itPos >= 0) && (this.itPos < FSBagIndex.this.index.size()));
+      if (USE_POSITIVE_INT_SET) {
+        return FSBagIndex.this.indexP.isValid(this.itPos);
+      } else {
+        return (this.itPos >=0) && (this.itPos < FSBagIndex.this.index.size());
+      }
     }
 
     public void moveToFirst() {
-      this.itPos = 0;
+      this.itPos = (USE_POSITIVE_INT_SET) ? 
+          FSBagIndex.this.indexP.moveToFirst() : 
+          0;
     }
 
     public void moveToLast() {
-      this.itPos = FSBagIndex.this.index.size() - 1;
+      this.itPos = (USE_POSITIVE_INT_SET) ? 
+          FSBagIndex.this.indexP.moveToLast() :
+          FSBagIndex.this.index.size() - 1;
     }
 
     public void moveToNext() {
-      ++this.itPos;
+      this.itPos = (USE_POSITIVE_INT_SET) ? 
+          FSBagIndex.this.indexP.moveToNext(itPos) :
+          itPos + 1;
     }
 
     public void moveToPrevious() {
-      --this.itPos;
+      this.itPos = (USE_POSITIVE_INT_SET) ? 
+          FSBagIndex.this.indexP.moveToPrevious(itPos) :
+          itPos - 1;
     }
 
     public int ll_get() {
       if (!isValid()) {
         throw new NoSuchElementException();
       }
-      return FSBagIndex.this.index.get(this.itPos);
+      return (USE_POSITIVE_INT_SET) ?
+          FSBagIndex.this.indexP.get(this.itPos) :
+          FSBagIndex.this.index.get(this.itPos);
     }
 
     /**
@@ -159,13 +177,15 @@ public class FSBagIndex extends FSLeafIn
 
   // The index, a vector of FS references.
   private IntVector index;
+  
+  final private PositiveIntSet indexP = USE_POSITIVE_INT_SET ? new PositiveIntSet_impl() : null;
 
   private int initialSize;
 
   FSBagIndex(CASImpl cas, Type type, int initialSize, int indexType) {
     super(cas, type, indexType);
     this.initialSize = initialSize;
-    this.index = new IntVector(initialSize);
+    this.index = USE_POSITIVE_INT_SET ? null : new IntVector(initialSize);
   }
 
   /**
@@ -185,21 +205,30 @@ public class FSBagIndex extends FSLeafIn
     return super.init(newComp);
   }
 
+  // not used (2014)
   IntVector getVector() {
     return this.index;
   }
 
   public void flush() {
     // done this way to reset to initial size if it grows
-    if (this.index.size() > this.initialSize) {
-      this.index = new IntVector(this.initialSize);
+    if (USE_POSITIVE_INT_SET) {
+      indexP.clear();      
     } else {
-      this.index.removeAllElements();
+      if (this.index.size() > this.initialSize) {
+        this.index = new IntVector(this.initialSize);
+      } else {
+        this.index.removeAllElements();
+      }
     }
   }
 
   public final boolean insert(int fs) {
-    this.index.add(fs);
+    if (USE_POSITIVE_INT_SET) {
+      indexP.add(fs);
+    } else {
+      index.add(fs);
+    }
     return true;
   }
 
@@ -217,14 +246,18 @@ public class FSBagIndex extends FSLeafIn
   // // return binarySearch(this.index.getArray(), ele, 0, this.index.size());
   // }
   private final int find(int ele) {
-    final int[] array = this.index.getArray();
-    final int max = this.index.size();
-    for (int i = 0; i < max; i++) {
-      if (ele == array[i]) {
-        return i;
+    if (USE_POSITIVE_INT_SET) {
+      return indexP.find(ele);
+    } else {
+      final int[] array = this.index.getArray();
+      final int max = this.index.size();
+      for (int i = 0; i < max; i++) {
+        if (ele == array[i]) {
+          return i;
+        }
       }
+      return -1;
     }
-    return -1;
   }
 
   public int compare(int fs1, int fs2) {
@@ -309,10 +342,18 @@ public class FSBagIndex extends FSLeafIn
    * @see org.apache.uima.cas.FSIndex#contains(FeatureStructure)
    */
   public boolean contains(FeatureStructure fs) {
-    return (find(((FeatureStructureImpl) fs).getAddress()) >= 0);
+    final int addr = ((FeatureStructureImpl) fs).getAddress(); 
+    return USE_POSITIVE_INT_SET ?
+        indexP.contains(addr) :
+        (find(addr) >= 0);
   }
 
   public FeatureStructure find(FeatureStructure fs) {
+    if (USE_POSITIVE_INT_SET) {
+      final FeatureStructureImpl fsi = (FeatureStructureImpl) fs;
+      final int addr = fsi.getAddress();
+      return (indexP.contains(addr)) ? fsi.getCASImpl().createFS(addr) : null;
+    }
     // Cast to implementation.
     FeatureStructureImpl fsi = (FeatureStructureImpl) fs;
     // Use internal find method.
@@ -329,7 +370,7 @@ public class FSBagIndex extends FSLeafIn
    * @see org.apache.uima.cas.FSIndex#size()
    */
   public int size() {
-    return this.index.size();
+    return USE_POSITIVE_INT_SET ? indexP.size() : index.size();
   }
 
   /**
@@ -340,9 +381,13 @@ public class FSBagIndex extends FSLeafIn
   }
 
   public void remove(int fsRef) {
-    final int pos = this.index.indexOfOptimizeAscending(fsRef);
-    if (pos >= 0) {
-      this.index.remove(pos);
+    if (USE_POSITIVE_INT_SET) {
+      indexP.remove(fsRef);
+    } else {
+      final int pos = this.index.indexOfOptimizeAscending(fsRef);
+      if (pos >= 0) {
+        this.index.remove(pos);
+      }
     }
   }
 

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java Mon Nov  3 22:15:41 2014
@@ -26,7 +26,7 @@ import java.util.ListIterator;
 
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
-import org.apache.uima.internal.util.PositiveIntSet;
+import org.apache.uima.internal.util.PositiveIntSet_impl;
 import org.apache.uima.internal.util.IntVector;
 import org.apache.uima.internal.util.XmlAttribute;
 import org.apache.uima.util.Level;
@@ -194,7 +194,7 @@ public class ListUtils {
   }
   
   public int getLength(int type, int addr, int neListType, int tailFeat) {
-    final PositiveIntSet visited = new PositiveIntSet();
+    final PositiveIntSet_impl visited = new PositiveIntSet_impl();
   	foundCycle = false;
   	// first count length of list so we can allocate array
   	int length = 0;
@@ -235,7 +235,7 @@ public class ListUtils {
     final int headFeat = getHeadFeatCode(type);
     final int tailFeat = getTailFeatCode(type);
     final int neListType = getNeListType(type);
-    final PositiveIntSet visited = new PositiveIntSet();
+    final PositiveIntSet_impl visited = new PositiveIntSet_impl();
 
     while (curNode != CASImpl.NULL) {
       final int curNodeType = cas.getHeapValue(curNode);
@@ -385,7 +385,7 @@ public class ListUtils {
     int currLength = this.getLength(this.neIntListType, addr);
     int curNode = addr;
     int prevNode = 0;
-    final PositiveIntSet visited = new PositiveIntSet();
+    final PositiveIntSet_impl visited = new PositiveIntSet_impl();
     boolean foundCycle = false;
     int i =0;
     
@@ -461,7 +461,7 @@ public class ListUtils {
     int currLength = this.getLength(this.neFloatListType, addr);
     int curNode = addr;
     int prevNode = 0;
-    final PositiveIntSet visited = new PositiveIntSet();
+    final PositiveIntSet_impl visited = new PositiveIntSet_impl();
     boolean foundCycle = false;
     int i =0;
     
@@ -536,7 +536,7 @@ public class ListUtils {
     int first = addr;
     int currLength = this.getLength(this.neFsListType, addr);
     boolean foundCycle = false;
-    final PositiveIntSet visited = new PositiveIntSet();
+    final PositiveIntSet_impl visited = new PositiveIntSet_impl();
     int curNode = addr;
     int prevNode = 0;
     
@@ -614,7 +614,7 @@ public class ListUtils {
   public int updateStringList(int addr, List<String> stringValues) throws SAXException   {
     int first = addr;
     boolean foundCycle = false;
-    final PositiveIntSet visited = new PositiveIntSet();
+    final PositiveIntSet_impl visited = new PositiveIntSet_impl();
     int curNode = addr;
     int prevNode = 0;
     int currLength = this.getLength(this.neStringListType, addr);

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java Mon Nov  3 22:15:41 2014
@@ -36,7 +36,7 @@ import java.util.NoSuchElementException;
  * If using offset, you must add ints in a range equal to or above the offset
  * 
  */
-public class IntBitSet {
+public class IntBitSet implements PositiveIntSet {
     
   private final BitSet set;
   
@@ -83,14 +83,15 @@ public class IntBitSet {
     this(maxInt, 0);
   }
 
-  public IntBitSet(int maxInt, int offset) {
-    set = new BitSet(Math.max(1, maxInt));
+  public IntBitSet(int maxAdjKey, int offset) {
+    set = new BitSet(Math.max(1, maxAdjKey));
     this.offset = offset;
   }
   
   /**
    * empty the IntBitSet
    */
+  @Override
   public void clear() {
     set.clear();
     size = 0;
@@ -101,15 +102,22 @@ public class IntBitSet {
    * @param key
    * @return
    */
+  @Override
   public boolean contains(int key) {
     return (key == 0) ? false : set.get(key - offset);
   }
  
+
+  @Override
+  public int find(int element) {
+    return contains(element) ? element : -1;
+  }
   /**
    * 
    * @param key
    * @return true if this set did not already contain the specified element
    */
+  @Override
   public boolean add(int original_key) {
     if (original_key < offset) {
       throw new IllegalArgumentException("key must be positive, but was " + original_key);
@@ -130,6 +138,7 @@ public class IntBitSet {
    * @param key -
    * @return true if this key was removed, false if not present
    */
+  @Override
   public boolean remove(int original_key) {
     final int key = original_key - offset;
     final boolean prev = set.get(key);
@@ -145,6 +154,7 @@ public class IntBitSet {
    * 
    * @return the number of elements in this set
    */
+  @Override
   public int size() {
     return size;    // bit set cardinality() is slow
   }
@@ -170,6 +180,12 @@ public class IntBitSet {
     return set.length() - 1 + offset;
   }
   
+  @Override
+  public int get(int position) {
+    assert(set.get(position));
+    return position + offset;
+  }
+  
   private class IntBitSetIterator implements IntListIterator {
 
     protected int curKey;
@@ -221,7 +237,38 @@ public class IntBitSet {
 
   }
 
-  public IntBitSetIterator getIterator() {
+  @Override
+  public IntBitSetIterator iterator() {
     return new IntBitSetIterator();
   }
+
+  @Override
+  public int moveToFirst() {
+    return set.nextSetBit(0);
+  }
+
+  @Override
+  public int moveToLast() {
+    return set.length() - 1;
+  }
+
+  @Override
+  public int moveToNext(int position) {
+    return set.nextSetBit(position + 1);
+  }
+
+  @Override
+  public int moveToPrevious(int position) {
+    return set.previousSetBit(position - 1);  
+  }
+
+  /**
+   * This impl depends on position always pointing to a valid (== non 0) 
+   * element of the set, when it should be valid 
+   */
+  @Override
+  public boolean isValid(int position) {
+    return (position >= 0) && set.get(position);
+  }
+
 }

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java Mon Nov  3 22:15:41 2014
@@ -31,10 +31,14 @@ import org.apache.uima.jcas.impl.JCasHas
  * based on Int2IntHashMap
  * This impl is for use in a single thread case only
  * 
- * Supports shrinking (reallocating the big table)  
+ * Supports shrinking (reallocating the big table)
+ *
+ * Supports representing ints as "short" 2byte values if possible,
+ *   together with an offset amount.
  *   
+ *   Automatically switches to full int representation if needed  
  */
-public class IntHashSet {
+public class IntHashSet implements PositiveIntSet {
   
   public static final float DEFAULT_LOAD_FACTOR = 0.66F;
   // set to true to collect statistics for tuning
@@ -58,22 +62,43 @@ public class IntHashSet {
   private int maxProbe = 0;
 
   private int sizeWhichTriggersExpansion;
-  private int size; // number of elements in the table  
+  private int size; // number of elements in the table
+  
+  // offset only used with keys2.  values stored are key - offset
+  //   intent is to have them fit into short data type.
+  //   - value of 0 reserved; if value is 0 - means no entry
+  //   store values at or below the offset are stored as 1 less to avoid mapping to "0".
+  private int offset;
  
-  private int [] keys;
+  private int [] keys4;
+  private short[] keys2;
   
   private boolean secondTimeShrinkable = false;
   
+  // these are true values (before any offset adjustment)
   private int mostPositive = Integer.MIN_VALUE;
   private int mostNegative = Integer.MAX_VALUE;
 
   public IntHashSet() {
-    this(16);
+    this(12, 0);
   }
   
   public IntHashSet(int initialCapacity) {
+    this(initialCapacity, 0);
+  }
+  
+  /**
+   * 
+   * @param initialCapacity - you can add this many before expansion
+   * @param offset - for values in the short range, the amount to subtract
+   *                 before storing.
+   *                 If == MIN_VALUE, then force 4 byte ints
+   */
+  public IntHashSet(int initialCapacity, int offset) {
     this.initialCapacity = initialCapacity;
-    newTableKeepSize(initialCapacity);
+    boolean force4 = offset == Integer.MIN_VALUE;
+    this.offset = force4 ? 0 : offset;
+    newTableKeepSize(tableSpace(initialCapacity), force4);
     size = 0;
     if (TUNE) {
       histogram = new int[200];
@@ -100,6 +125,12 @@ public class IntHashSet {
     return tableSpace(numberOfElements, loadFactor);
   }
   
+  /**
+   * 
+   * @param numberOfElements
+   * @param factor
+   * @return capacity of the main table (either 2 byte or 4 byte entries)
+   */
   public static int tableSpace(int numberOfElements, Float factor) {
     if (numberOfElements < 0) {
       throw new IllegalArgumentException("must be > 0");
@@ -108,9 +139,20 @@ public class IntHashSet {
     return  Math.max(16, nextHigherPowerOf2(capacity));
   }
   
-  private void newTableKeepSize(int capacity) {
-    capacity = Math.max(16, nextHigherPowerOf2(capacity));
-    keys = new int[capacity];
+  private void newTableKeepSize(int capacity, boolean make4) {
+    if (!make4) {
+      capacity = Math.max(4, nextHigherPowerOf2(capacity));
+      make4 = (capacity >= 65536);
+    }
+    // don't use short values unless 
+    //    the number of items is < 65536
+    if (make4) {
+      keys4 = new int[capacity];
+      keys2 = null;
+    } else {
+      keys2 = new short[capacity];
+      keys4 = null;
+    }
     sizeWhichTriggersExpansion = (int)(capacity * loadFactor);
   }
 
@@ -130,32 +172,57 @@ public class IntHashSet {
   }
   
   public int getSpaceUsedInWords() {
-    return keys.length;  // 8 is overhead for this class excluding any Java object overhead
+    return (keys4 != null) ? keys4.length : (keys2.length >> 1);  
   }
   
   public static int getSpaceOverheadInWords() {
-    return 8;
+    return 11;
+  }
+  
+  private int getCapacity() {
+    return (null == keys4) ? keys2.length : keys4.length;
   }
   
   private void increaseTableCapacity() {
-    final int [] oldKeys = keys;
-    final int oldCapacity = oldKeys.length;
-    int newCapacity = 2 * oldCapacity;
+    final int oldCapacity = getCapacity();
+    final int newCapacity = oldCapacity << 1;
     
-    if (TUNE) {
-      System.out.println("Capacity increasing from " + oldCapacity + " to " + newCapacity);
-    }
-    newTableKeepSize(newCapacity);
-    for (int key : oldKeys) {
-      if (key != 0) {
-        addInner(key);
+    if (keys2 != null) {
+      final short[] oldKeys = keys2;
+      newTableKeepSize(newCapacity, false);
+      if (newCapacity > 32768) {
+        // switch to 4
+        if (TUNE) {System.out.println("Switching to 4 byte keys, Capacity increasing from " + oldCapacity + " to " + newCapacity);}
+        for (short adjKey : oldKeys) {
+          if (adjKey != 0) {
+            addInner4(adjKey + offset);
+          }
+        }
+        
+      } else {
+        if (TUNE) {System.out.println("Keeping 2 byte keys, Capacity increasing from " + oldCapacity + " to " + newCapacity);}
+        for (short adjKey : oldKeys) {
+          if (adjKey != 0) {
+            addInner2(adjKey);
+          }
+        }
+      }
+      
+    } else {
+      final int [] oldKeys = keys4;      
+      if (TUNE) {System.out.println("Capacity increasing from " + oldCapacity + " to " + newCapacity);}
+      newTableKeepSize(newCapacity, false);
+      for (int key : oldKeys) {
+        if (key != 0) {
+          addInner4(key);
+        }
       }
     }
   }
   
   // called by clear
   private void newTable(int capacity) {
-    newTableKeepSize(capacity);
+    newTableKeepSize(capacity, false);
     size = 0;
     resetHistogram();
   }
@@ -168,6 +235,19 @@ public class IntHashSet {
     }
   }
   
+  private void resetArray() {
+    if (keys4 == null) {
+      Arrays.fill(keys2,  (short)0);
+    } else {
+      Arrays.fill(keys4, 0);
+    }
+    mostPositive = Integer.MIN_VALUE;
+    mostNegative = Integer.MAX_VALUE;
+    resetHistogram();
+    size = 0;
+  }
+  
+  @Override
   public void clear() {
     // see if size is less than the 1/2 size that triggers expansion
     if (size <  (sizeWhichTriggersExpansion >>> 1)) {
@@ -175,26 +255,22 @@ public class IntHashSet {
       //   this is done to avoid thrashing around the threshold
       if (secondTimeShrinkable) {
         secondTimeShrinkable = false;
-        final int newCapacity = Math.max(initialCapacity, keys.length >>> 1);
-        if (newCapacity < keys.length) { 
+        final int currentCapacity = getCapacity();
+        final int newCapacity = Math.max(initialCapacity, currentCapacity >>> 1);
+        if (newCapacity < currentCapacity) { 
           newTable(newCapacity);  // shrink table by 50%
         } else { // don't shrink below minimum
-          Arrays.fill(keys, 0);
+          resetArray();
         }
-        size = 0;
-        resetHistogram();
         return;
+        
       } else {
         secondTimeShrinkable = true;
       }
     } else {
       secondTimeShrinkable = false; // reset this to require 2 triggers in a row
     }
-    size = 0;
-    Arrays.fill(keys, 0);
-    resetHistogram();
-    mostPositive = Integer.MIN_VALUE;
-    mostNegative = Integer.MAX_VALUE;
+   resetArray();
   }
 
   /** 
@@ -204,24 +280,25 @@ public class IntHashSet {
   *   current value of keys[position] would be 0.
   *   
   *   if the key is found, then keys[position] == key
-  * @param key -
+  * @param adjKey -
   * @return the probeAddr in keys array - might have a 0 value, or the key value if found
   */
-  private int find(final int key) {
-    if (key == 0) {
+  private int findPosition(final int adjKey) {
+    if (adjKey == 0) {
       throw new IllegalArgumentException("0 is an invalid key");
     }
 
-    final int hash = JCasHashMap.hashInt(key);
+    final int hash = JCasHashMap.hashInt(adjKey);
     int nbrProbes = 1;
-    final int[] localKeys = keys;
-    final int bitMask = localKeys.length - 1;
+    final int[] localKeys4 = keys4;
+    final short[] localKeys2 = keys2;
+    final int bitMask = ((localKeys4 == null) ? localKeys2.length : localKeys4.length) - 1;
     int probeAddr = hash & bitMask;
     int probeDelta = 1;
 
     while (true) {
-      final int testKey = localKeys[probeAddr];
-      if (testKey == 0 || testKey == key) {
+      final int testKey = (localKeys4 != null) ? localKeys4[probeAddr] : localKeys2[probeAddr];
+      if (testKey == 0 || testKey == adjKey) {
         break;
       }
       nbrProbes++;
@@ -240,72 +317,141 @@ public class IntHashSet {
     return probeAddr;
   }
    
-  public boolean contains(int key) {
-    return (key == 0) ? false : keys[find(key)] == key;
+  @Override
+  public boolean contains(int rawKey) {
+    if (keys4 == null) {
+      final int adjKey = getAdjKey(rawKey); 
+      return (rawKey == 0) ? false : keys2[findPosition(adjKey)] == adjKey;
+    }
+    return (rawKey == 0) ? false : keys4[findPosition(rawKey)] == rawKey;
+  }
+  
+  @Override
+  public int find(int rawKey) {
+    final int adjKey = getAdjKey(rawKey);
+    final int pos = findPosition(adjKey);
+    if (keys4 == null) {
+      return (keys2[pos] == adjKey) ? pos : -1;
+    } else {
+      return (keys4[pos] == adjKey) ? pos : -1;
+    }
   }
  
+  private int getAdjKey(int rawKey) {
+    if (keys4 != null) {
+      return rawKey;
+    }
+    int adjKey = rawKey - offset;
+    if (adjKey <= 0) {
+      adjKey --;  // because 0 is reserved for null
+    }
+    if ((adjKey > 32767) || (adjKey < -32768)) {
+      // convert to 4 byte because values can't be offset and fit in a short
+      final short[] oldKeys = keys2;
+      newTableKeepSize(getCapacity(), true);  // make a 4 table
+      // next fails to convert short to int
+//      System.arraycopy(oldKeys, 0,  keys4,  0,  oldKeys.length);
+      int i = 0;
+      for (int v : oldKeys) {
+        keys4[i++] = v;
+      }
+      return rawKey; // now using 4 byte table
+    } else {     
+      return adjKey;
+    }
+  }
+  
   /**
    * 
    * @param key
    * @return true if this set did not already contain the specified element
    */
-  public boolean add(int key) {
-    if (key == 0) {
-      throw new IllegalArgumentException("0 not allowed as a key to add");
-    }
+  @Override
+  public boolean add(int rawKey) {
+    final int adjKey = getAdjKey(rawKey);
     if (size == 0) {
-      mostPositive = mostNegative = key;
+      mostPositive = mostNegative = rawKey;
     } else {
-      if (key > mostPositive) {
-        mostPositive = key;
+      if (rawKey > mostPositive) {
+        mostPositive = rawKey;
       }
-      if (key < mostNegative) {
-        mostNegative = key;
+      if (rawKey < mostNegative) {
+        mostNegative = rawKey;
       }
     }
-    final int i = find(key);
-    if (keys[i] == 0) {
-      keys[i] = key;
+    final int i = findPosition(adjKey);
+    if (keys4 == null) {
+      if (keys2[i] == 0) {
+        keys2[i] = (short) adjKey;
+        incrementSize();
+        return true;
+      }
+      return false;
+    } 
+    if (keys4[i] == 0) {
+      keys4[i] = adjKey;
       incrementSize();
       return true;
     }
     return false;
   }
-  
+      
   /**
    * used for increasing table size
-   * @param key
+   * @param rawKey
    */
-  private void addInner(int key) {
-    final int i = find(key);
-    assert(keys[i] == 0);
-    keys[i] = key;
+  private void addInner4(int rawKey) {
+    final int i = findPosition(rawKey);
+    assert(keys4[i] == 0);
+    keys4[i] = rawKey;
   }
-
+  
+  private void addInner2(short adjKey) {
+    final int i = findPosition(adjKey);
+    assert(keys2[i] == 0);
+    keys2[i] = adjKey;
+  }
+  
   /**
    * mostPositive and mostNegative are not updated
    *   for removes.  So these values may be inaccurate,
    *   but mostPositive is always >= actual most positive,
    *   and mostNegative is always <= actual most negative.
-   * @param key -
+   * No conversion from int to short
+   * @param rawKey -
    * @return true if the key was present
    */
-  public boolean remove(int key) {
-    final int i = find(key);
-    if (keys[i] != 0) {
-      keys[i] = 0;
+  @Override
+  public boolean remove(int rawKey) {
+    final int adjKey = getAdjKey(rawKey);
+    final int i = findPosition(adjKey);
+    boolean removed = false;
+    if (keys4 == null) {
+      if (keys2[i] != 0) {
+        keys2[i] = 0;
+        removed = true;
+      }
+    } else {
+      if (keys4[i] != 0) {
+        keys4[i] = 0;
+        removed = true;
+      }
+    }
+    
+    if (removed) {
       size--;
-      if (key == mostPositive) {
+      if (rawKey == mostPositive) {
         mostPositive --;  // a weak adjustment
       }
-      if (key == mostNegative) {
+      if (rawKey == mostNegative) {
         mostNegative ++;  // a weak adjustment
       }
       return true;
-    }
+    } 
     return false;
   }
-
+  
+  @Override
   public int size() {
     return size;
   }
@@ -345,14 +491,27 @@ public class IntHashSet {
         System.out.println(i + ": " + histogram[i]);
       }     
       
-      System.out.println("bytes / entry = " + (float) (keys.length) * 4 / size());
-      System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+      if (keys4 == null) {
+        System.out.println("bytes / entry = " + (float) (keys2.length) * 2 / size());
+        System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
           size(),
-          (int) ((keys.length >>> 1) * loadFactor),
-          (int) (keys.length * loadFactor));
+          (int) ((keys2.length >>> 1) * loadFactor),
+          (int) (keys2.length * loadFactor));
+      } else {
+        System.out.println("bytes / entry = " + (float) (keys4.length) * 4 / size());
+        System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
+          size(),
+          (int) ((keys4.length >>> 1) * loadFactor),
+          (int) (keys4.length * loadFactor));        
+      }
     }
   }
   
+  @Override
+  public int get(int index) {
+    return offset + ((keys4 == null) ? keys2[index] : keys4[index]);
+  }
+  
   /**
    * advance pos until it points to a non 0 or is 1 past end
    * @param pos
@@ -363,12 +522,12 @@ public class IntHashSet {
       pos = 0;
     }
     
-    final int max = keys.length;
+    final int max = getCapacity();
     while (true) {
       if (pos >= max) {
         return pos;
       }
-      if (keys[pos] != 0) {
+      if (get(pos) != 0) {
         return pos;
       }
       pos ++;
@@ -381,7 +540,7 @@ public class IntHashSet {
    * @return
    */
   private int moveToPreviousFilled(int pos) {
-    final int max = keys.length;
+    final int max = getCapacity();
     if (pos > max) {
       pos = max - 1;
     }
@@ -390,7 +549,7 @@ public class IntHashSet {
       if (pos < 0) {
         return pos;
       }
-      if (keys[pos] != 0) {
+      if (get(pos) != 0) {
         return pos;
       }
       pos --;
@@ -407,14 +566,14 @@ public class IntHashSet {
 
     public final boolean hasNext() {
       curPosition = moveToNextFilled(curPosition);
-      return curPosition < keys.length;
+      return curPosition < getCapacity();
     }
 
     public final int next() {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      return keys[curPosition++];
+      return get(curPosition++);
     }
 
     /**
@@ -432,14 +591,14 @@ public class IntHashSet {
       if (!hasPrevious()) {
         throw new NoSuchElementException();
       }
-      return keys[curPosition--];
+      return get(curPosition--);
     }
 
     /**
      * @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
      */
     public void moveToEnd() {
-      curPosition = keys.length - 1;
+      curPosition = getCapacity() - 1;
     }
 
     /**
@@ -448,11 +607,36 @@ public class IntHashSet {
     public void moveToStart() {
       curPosition = 0;
     }
-  }
-  
+  } 
   
-  public IntListIterator getIterator() {
+  @Override
+  public IntListIterator iterator() {
     return new IntHashSetIterator();
   }
 
+  @Override
+  public int moveToFirst() {
+    return (size() == 0) ? -1 : moveToNextFilled(0);
+  }
+
+  @Override
+  public int moveToLast() {
+    return (size() == 0) ? -1 : moveToPreviousFilled(getCapacity() -1);
+  }
+
+  @Override
+  public int moveToNext(int position) {
+    final int n = moveToNextFilled(position + 1); 
+    return (n == getCapacity()) ? -1 : n;
+  }
+
+  @Override
+  public int moveToPrevious(int position) {
+    return moveToPreviousFilled(position - 1);
+  }
+
+  @Override
+  public boolean isValid(int position) {
+    return (position >= 0) && (position < getCapacity());
+  }
 }

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntSet.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntSet.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntSet.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntSet.java Mon Nov  3 22:15:41 2014
@@ -19,19 +19,33 @@
 
 package org.apache.uima.internal.util;
 
+import java.util.NoSuchElementException;
+
 /**
  * This class implements a set of integers. It does not implement the <code>Set</code> interface
  * for performance reasons, though methods with the same name are equivalent.
  * 
+ *  This does not implement shorts + offset, like the IntHashSet does
+ *    because by the time that might be of interest, we would switch to
+ *    IntHashSet to get ~O(1) operations including contains.
+ *    
  */
-public class IntSet {
+public class IntSet implements PositiveIntSet {
 
   /** The data. */
-  private IntVector data;
+  private IntVector iVec;
 
   /** Creates a new instance of this set. */
   public IntSet() {
-    this.data = new IntVector();
+    this.iVec = new IntVector();
+  }
+  
+  /**
+   * 
+   * @param capacity allocate enough space to hold at least this before expanding
+   */
+  public IntSet(int capacity) {
+    this.iVec = new IntVector(capacity);
   }
 
   /**
@@ -42,9 +56,10 @@ public class IntSet {
    * @return <code>true</code> if this set did not already contain this element,
    *         <code>false</code> otherwise.
    */
+  @Override
   public boolean add(int element) {
-    if (!this.data.contains(element)) {
-      this.data.add(element);
+    if (!this.iVec.contains(element)) {
+      this.iVec.add(element);
       return true;
     }
     return false;
@@ -58,23 +73,31 @@ public class IntSet {
    * @return <code>true</code> if the element is contained in this set, <code>false</code>
    *         otherwise.
    */
+  @Override
   public boolean contains(int element) {
-    return this.data.contains(element);
+    return this.iVec.contains(element);
+  }
+
+  @Override
+  public int find(int element) {
+    return this.iVec.indexOf(element);
   }
 
   /** @return the size of this set. */
+  @Override
   public int size() {
-    return this.data.size();
+    return this.iVec.size();
   }
 
   /** @return the <code>n</code>-th element in this set. */
+  @Override
   public int get(int n) {
-    return this.data.get(n);
+    return this.iVec.get(n);
   }
 
   /** Removes the <code>n</code>-th element in this set. */
-  public void remove(int n) {
-    this.data.remove(n);
+  public void removeElementAt(int n) {
+    this.iVec.remove(n);
   }
 
   /**
@@ -102,14 +125,14 @@ public class IntSet {
       int sum1 = 0;
       int sum2 = 0;
       for (int i = 0; i < size; i++) {
-        sum1 += this.data.get(i);
+        sum1 += this.iVec.get(i);
         sum2 += s.get(i);
       }
       if (sum1 != sum2)
         return false;
 
       for (int i = 0; i < size; i++) {
-        if (!s.contains(this.data.get(i)))
+        if (!s.contains(this.iVec.get(i)))
           return false;
       }
       return true;
@@ -118,17 +141,120 @@ public class IntSet {
   }
 
   public int hashCode() {
-    if (this.data == null) {
+    if (this.iVec == null) {
       return 0;
     }
     int sum = 0;
     for (int i = 0; i < this.size(); i++) {
-      sum += this.data.get(i);
+      sum += this.iVec.get(i);
     }
     return sum;
   }
   
   public int indexOf(int element) {
-	return  this.data.indexOf(element);
+	return  this.iVec.indexOf(element);
   }
+
+  @Override
+  public void clear() {
+    iVec.removeAllElements(); // doesn't reallocate
+  }
+
+  @Override
+  public boolean remove(int key) {
+    int i = iVec.indexOf(key);
+    if (i != -1) {
+      iVec.remove(i);
+    }
+    return i != -1;
+  }
+
+  private class IntSetIterator implements IntListIterator {
+
+    protected int pos = 0;
+    
+    protected IntSetIterator() {}
+ 
+    @Override
+    public final boolean hasNext() {
+      return (pos >= 0 && pos < size());
+    }
+
+    @Override
+    public final int next() {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      return iVec.get(pos++);
+    }
+
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#hasPrevious()
+     */
+    @Override
+    public boolean hasPrevious() {
+      final int posm1 = pos - 1;
+      return (posm1 >= 0 && posm1 < size());
+    }
+
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#previous()
+     */
+    @Override
+    public int previous() {
+      if (!hasPrevious()) {
+        throw new NoSuchElementException();
+      }
+      return iVec.get(pos--);      
+    }
+
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
+     */
+    @Override
+    public void moveToEnd() {
+      pos = size() - 1;
+    }
+
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToStart()
+     */
+    @Override
+    public void moveToStart() {
+      pos = 0;
+    }
+
+  }
+
+  @Override
+  public IntSetIterator iterator() {
+    return new IntSetIterator();
+  }
+
+  @Override
+  public int moveToFirst() {
+    return (size() == 0) ? -1 : 0;
+  }
+
+  @Override
+  public int moveToLast() {
+    return size() -1;
+  }
+
+  @Override
+  public int moveToNext(int position) {
+    return (size() == (position + 1)) ? -1 : position + 1;
+  }
+
+  @Override
+  public int moveToPrevious(int position) {
+    return position - 1;
+  }
+
+  @Override
+  public boolean isValid(int position) {
+    // TODO Auto-generated method stub
+    return (position >= 0) && (position < size());
+  }
+  
 }

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntVector.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntVector.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntVector.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntVector.java Mon Nov  3 22:15:41 2014
@@ -290,14 +290,15 @@ public class IntVector implements Serial
    * @return The position, or <code>-1</code> if it doesn't exist.
    */
   public int position(int elem) {
-    int i = 0;
-    while (i < this.pos) {
-      if (this.array[i] == elem) {
-        return i;
-      }
-      ++i;
-    }
-    return -1;
+    return indexOf(elem);
+//    int i = 0;
+//    while (i < this.pos) {
+//      if (this.array[i] == elem) {
+//        return i;
+//      }
+//      ++i;
+//    }
+//    return -1;
   }
 
   /**

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java Mon Nov  3 22:15:41 2014
@@ -16,387 +16,85 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.uima.internal.util;
 
-import java.util.Arrays;
-import java.util.NoSuchElementException;
-
-
+public interface PositiveIntSet {
 
-/**
- * An set of non-zero ints.
- * 
- *   Uses 2 1/2 different representations depending on the ratio of the max int stored and the total number of ints stored.
- *     For large sparse ints, it uses the IntHashSet
- *     
- *     For more dense ints, it uses bitSets or bitSets with an offset
- *     
- *     It switches when adding new members if the other representation is sufficiently smaller 
- */
-public class PositiveIntSet {
-  
-  // number of words a inthashset has to be better than intbitset to cause switch
-  private static final int HYSTERESIS = 16;
-  
-  // Extra space in bitset for initial capacity - can add an int up to that size 
-  //   64 *4 is 8 words, = 256.
-  private static final int BIT_SET_OVERALLOCATE = 64 * 4;
-  
-  // Extra space in hashset for initial capacity - can add a minimum of that many more members
-  //   without hitting the hash-set doubling.
-  private static final int HASH_SET_OVERALLOCATE = 8;
-  // Extra space in hashset for initial capacity - when creating from bitmap set - multiplier of existing size
-  private static final int HASH_SET_OVERALLOCATE_MULTIPLIER = 1; // bit shift right = divide by 2 = 50% of existing capacity
-  
-  private IntBitSet intBitSet = null;
-  
-  private IntHashSet intHashSet = null;
-  
-  //package private for test case
-  boolean isBitSet = true;  
-  
   /**
-   * Set false once we find we have to reduce the bit offset
+   * remove all members of the set
    */
-  //package private for test case
-  boolean useOffset = true;
-  
-  public IntListIterator getUnorderedIterator() {
-    if (isBitSet) {
-      return intBitSet.getIterator();
-    }
-    return intHashSet.getIterator();
-  }
-  
-  public IntListIterator getOrderedIterator() {
-    if (isBitSet) {
-      return intBitSet.getIterator();
-    }
-    
-    int[] allValues = new int[size()];
-    IntListIterator it = intHashSet.getIterator();
-    int i = 0;
-    while (it.hasNext()) {
-      allValues[i++] = it.next();
-    }
-    Arrays.sort(allValues);
-    return new IntListIteratorOverArray(allValues);
-  }
-  
-  private class IntListIteratorOverArray implements IntListIterator {
-    private int[] a;
-    private int pos;
-    
-    IntListIteratorOverArray(int[] a) {
-      this.a = a;
-      pos = 0;
-    }
-    
-    @Override
-    public boolean hasNext() {
-      return pos >= 0 && pos < a.length;
-    }
-
-    @Override
-    public int next() throws NoSuchElementException {
-      if (hasNext()) {
-        return a[pos++];
-      }
-      throw new NoSuchElementException();
-    }
+  void clear();
 
-    @Override
-    public boolean hasPrevious() {
-      return pos >= 0 && pos < a.length;      
-    }
-
-    @Override
-    public int previous() {
-      if (hasPrevious()) {
-        return a[pos--];
-      }
-      throw new NoSuchElementException();
-    }
-
-    @Override
-    public void moveToStart() {
-      pos = 0;
-    }
+  /**
+   * @param key -
+   * @return true if key is in the set
+   */
+  boolean contains(int key);
 
-    @Override
-    public void moveToEnd() {
-      pos = a.length - 1;
-    }   
-  }
-  
-  public void clear() {
-    if (isBitSet) {
-      intBitSet.clear();
-    } else {
-      intHashSet.clear();
-    }
-  }
-  
-  public boolean contains(int key) {
-    if (isBitSet) {
-      return intBitSet.contains(key);
-    } else {
-      return intHashSet.contains(key);
-    }
-  }
- 
   /**
    * 
    * @param key -
    * @return true if this set did not already contain the specified element
    */
-  public boolean add(int key) {
-    maybeSwitchRepresentation(key);
-    if (isBitSet) {
-      return intBitSet.add(key);
-    } else {
-      return intHashSet.add(key);
-    }
-  }
+  boolean add(int key);
 
   /**
-   * When a key is added which is lower than the offset, we need to adjust the whole int set table.
-   * Although an array copy could do this, we don't have access to the bitset impl.
-   * 
-   * So we just set the offset to 0, and either convert the whole thing to a inthashset or copy all the bits to a new
-   * bit set.
    * 
-   * @param key
+   * @param key -
+   * @return true if the set had this element before the remove
    */
-  private void adjustBitSetForLowerOffset(int key) {
-    final int largestInt = intBitSet.getLargestMenber();
-    final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(largestInt, 0);  // doesn't use offset
-    final int hashSetSpaceNeeded = getHashSetSpace(intBitSet.size());
-    
-    if (hashSetSpaceNeeded < (bitSetSpaceNeeded - HYSTERESIS)) {
-      switchToHashSet(size());
-    } else {
-      IntBitSet bs = new IntBitSet(largestInt + BIT_SET_OVERALLOCATE);
-      IntListIterator it = intBitSet.getIterator();
-      while (it.hasNext()) {
-        bs.add(it.next());
-      }
-      intBitSet = bs;      
-    }
-    useOffset = false;  // stop using because we have evidence it isn't appropriate
-  }
-  
-  public void add(int[] vs) {
-    for (int i : vs) {
-      add(i);
-    }
-  }
-  
-  public boolean remove(int key) {
-    if (isBitSet) {
-      return intBitSet.remove(key);
-    } else {
-      return intHashSet.remove(key);
-    }   
-  }
+  boolean remove(int key);
 
-  public int size() {
-    if (isBitSet) {
-      return intBitSet.size();
-    } else {
-      return intHashSet.size();
-    }
-  }
-  
-  public int[] toUnorderedIntArray() {
-    final int[] a = new int[size()];
-    IntListIterator it = getUnorderedIterator();
-    int i = 0;
-    while (it.hasNext()) {
-      a[i++] = it.next();
-    }
-    return a;
-  }
+  /**
+   * @return number of elements in the set
+   */
+  int size();
   
-  public int[] toOrderedIntArray() {
-    int [] a = toUnorderedIntArray();
-    if (isBitSet) {
-      return a;
-    } 
-    Arrays.sort(a);
-    return a;
-  }
+  /**
+   * @return an iterator (may be ordered or unordered) over the members of the set
+   */
+  IntListIterator iterator();
   
-  /** 
-   * Only called if key won't fit in existing hashset allocation
-   * 
-   * Using existing bitset, compute what its size() (capacity) would be if the 
-   * key were added; include the overhead of the intbitset class.
-   * 
-   * long words required = larger of double the current size, or the number of words required 
+  /**
+   * @param element an item which may be in the set
+   * @return -1 if the item is not in the set, or a position value that can be used with iterators to start at that item.
+   */
+  int find(int element);
 
-   * @param maxKeyLessOffset
-   * @return number of words needed to store the highest value
+  /**
+   * For FSBagIndex low level iterator use
+   * @param position - get the element at this position
+   * @return the element
    */
-  private int getBitSetSpaceFromMaxInt(int maxKeyLessOffset, int spaceUsed) {
-    int w64 = 1 + (maxKeyLessOffset >> 6); // 64 bits per long, add one because 0 to 63 takes one word)
-    spaceUsed = spaceUsed >> 5;  // in # of long words, * 2 because the alloc doubles the space 
-    
-    int newSpace = Math.max(spaceUsed, w64) << 1; // size is in 32 bit words at the end  
-    // e.g., 31 key => 1 word, 32 key => 2 words
-    return newSpace + 2;  // 2 more for IntBitSet object overhead
-  }
+  int get(int position);
   
   /**
-   * Only called if key won't fit in existing allocation
-   * 
-   * returns new hash table size ( usually double) + overhead 
-   * 
-   * @param numberOfEntries
-   * @return
+   * For FSBagIndex low level iterator use
+   * @return the position of the first element, or -1;
    */
-  private int getHashSetSpace() {
-    return intHashSet.getSpaceUsedInWords() * 2 + IntHashSet.getSpaceOverheadInWords();
-  }
+  int moveToFirst();
   
   /**
-   * When converting from bitset to hash set, the initial hash set should be 
-   * big enough so that it takes a while before it is doubled-expanded.
-   * 
-   * @param existingSize
-   * @return
+   * For FSBagIndex low level iterator use
+   * @return the position of the last element, or -1;
    */
-  private int getHashSetOverAllocateSize(int existingSize) {
-    return existingSize + (existingSize >> HASH_SET_OVERALLOCATE_MULTIPLIER) + HASH_SET_OVERALLOCATE;
-  }
+  int moveToLast();
   
   /**
-   * For the case where the intHashSet doesn't yet exist
-   * @param numberOfElements -
-   * @return the size in words
-   */
-  private int getHashSetSpace(int numberOfElements) {
-    return IntHashSet.tableSpace(numberOfElements, IntHashSet.DEFAULT_LOAD_FACTOR) + 
-           IntHashSet.getSpaceOverheadInWords();
-  }
+   * For FSBagIndex low level iterator use
+   * @return the position of the next element, or -1;
+   */
+  int moveToNext(int position);
   
   /**
-   * logic for switching representation
-   * 
-   * BitSet preferred - is faster, can be more compact, and permits ordered iteration
-   * HashSet used when BitSet is too space inefficient.
-   * 
-   * Avoid switching if the new key being added won't increase the representation size
-   *   - for hash: if number of elements +1 won't cause doubling of the hash table
-   *   - for bitset: if key will fit in existing "capacity"
-   *   
-   * Compute space needed after adding
-   *   - bitset:  array doubles
-   *   - hashset: array doubles
-   *
-   * Preventing jitters back and forth:
-   *   - removes don't cause switching
-   *   - compare against other size including its non-linear space jumps.   
+   * For FSBagIndex low level iterator use
+   * @return the position of the next element, or -1;
    */
-  
-  private void maybeSwitchRepresentation(int key) {
-    if (isBitSet) {
-      /********************
-       *    Bit Set 
-       ********************/
-      
-      /*********
-       * Size is 0, 
-       * use this opportunity to set the offset
-       * unless that has been a bad strategy for this 
-       * instance already 
-       ********/
-      if (intBitSet == null) {
-        if (useOffset) {
-      
-          // initialize bitSetOffset to the key, minus some amount to allow some storage of previous values
-          //   a minimum of 63, a maximum of 512 == 16 words, when key is > 4K (128 words) 
-          final int spaceForLesserKeys = Math.min(1023,  Math.max(63, key >> 3));
-          final int offset = Math.max(0, key - spaceForLesserKeys);
-          intBitSet = new IntBitSet(BIT_SET_OVERALLOCATE + key - offset, offset);
-          // because of offset, won't have to switch to hash for this key
-          // 
-          // force an initial allocation of this sufficient to keep
-          //   a) the space below, plus
-          //   b) 
-          return;
-        } else {
-          intBitSet = new IntBitSet(BIT_SET_OVERALLOCATE + key);
-          // don't return, see if we should immediately switch to hash representation
-        }
-      }
-      
-      final int offset = intBitSet.getOffset();
-      
-      if (key < offset) {
-        adjustBitSetForLowerOffset(key);
-        return;
-      }
-      
-      // return if key fits in existing allocation
-      final int spaceUsed = intBitSet.getSpaceUsed_in_bits();
-      if ((key - offset) < spaceUsed) {
-        return;
-      }
+  int moveToPrevious(int position);
 
-      // space used in words (32 bit words)
-      final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(key - offset, spaceUsed);
-      final int hashSetOverAllocateSize = getHashSetOverAllocateSize(intBitSet.size() + 1);
-      final int hashSetOverAllocateSpace = getHashSetSpace(hashSetOverAllocateSize);
-      // switch if hashmap space plus hysteresis would be < bitmap space
-      if (hashSetOverAllocateSpace < (bitSetSpaceNeeded - HYSTERESIS)) {
-        switchToHashSet(hashSetOverAllocateSize);
-      }
-      return;    
-    } 
-    
-    /********************
-     *    Hash Set 
-     ********************/
-    // maybe switch to IntBitSet
-    // space used in words (32 bit words)
-    // don't bother to include the new element - it might not be "added"
-    //   if it was already there, and only serves to decrease hysteresis a bit
-  
-    if (intHashSet.wontExpand()) {
-      return;
-    }
-    
-    final int hashSetSpaceNeeded = getHashSetSpace();    
-    
-    final int maxInt = intHashSet.getMostPositive();
-    final int offset = useOffset ? intHashSet.getMostNegative() : 0;
-    final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(BIT_SET_OVERALLOCATE + maxInt - offset, 0);
- 
-    if (bitSetSpaceNeeded < (hashSetSpaceNeeded - HYSTERESIS)) {
-      switchToBitSet(maxInt, offset);
-    }  
-  }
-    
-  private void switchToHashSet(int size) {
-    isBitSet = false;
-    intHashSet = new IntHashSet(size);
-    IntListIterator it = intBitSet.getIterator();
-    while (it.hasNext()) {
-      intHashSet.add(it.next());
-    }
-    intBitSet = null;
-  }
-  
-  private void switchToBitSet(int maxInt, int offset) {
-    isBitSet = true;
-    intBitSet = new IntBitSet(BIT_SET_OVERALLOCATE + maxInt - offset, offset);
-    IntListIterator it = intHashSet.getIterator();
-    while (it.hasNext()) {
-      intBitSet.add(it.next());
-    }
-    intHashSet = null;
-  }
-   
-}
+  /**
+   * For FSBagIndex low level iterator use
+   * @return true if the position is between the first and last element inclusive.
+   */
+  boolean isValid(int position);
+}
\ No newline at end of file

Copied: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet_impl.java (from r1634141, uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java)
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet_impl.java?p2=uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet_impl.java&p1=uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java&r1=1634141&r2=1636458&rev=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet_impl.java Mon Nov  3 22:15:41 2014
@@ -25,36 +25,66 @@ import java.util.NoSuchElementException;
 
 
 /**
- * An set of non-zero ints.
+ * An set of non-zero integers, ability to iterate over them (possibly in a sorted way),
+ * with O(1) operations for adding, removing, and testing for contains.
  * 
- *   Uses 2 1/2 different representations depending on the ratio of the max int stored and the total number of ints stored.
- *     For large sparse ints, it uses the IntHashSet
- *     
- *     For more dense ints, it uses bitSets or bitSets with an offset
+ * Optimized for adding mostly increasing integers (with some space for less-than-first-one-added).
+ *   - major uses: indexes of Feature Structures.
+ * Optimize for small sets 
+ * 
+ * Graceful degradation for completely random integer operations on large sets; 
+ *   keep O(1) operations, at the expense of extra space (< 3 x size).
+ * 
+ *   Uses several different representations depending on the range of ints stored and the total number of ints stored.
+ *   
+ *     Sizes:  Tiny, medium, large 
+ *     Ranges: semi-knowable, unknowable;  
+ *               if semi-knowable, dense, small-range (< 65K), large-range
  *     
- *     It switches when adding new members if the other representation is sufficiently smaller 
+ *     For all sizes, 
+ *       if dense, use IntBitSet (with offset)
+ *       
+ *     else 
+ *       For large, (implies large-range, too) use IntHashSet  
+ *       
+ *       For medium,
+ *         if small-range < 65K, use IntHashSet with offset 
+ *         else use IntHashSet 
+ *   
+ *   Arrange switching between representations
+ *     to occur infrequently, especially as cost of switching (size) grows
+ *   Arrange checking for switching to occur infrequently, taking into account how underlying data structures grow
+ *     (which is often by doubling)
+ *   Switch when adding new member(s) if alternative representation is sufficiently smaller
+ *   
  */
-public class PositiveIntSet {
+public class PositiveIntSet_impl implements PositiveIntSet {
   
   // number of words a inthashset has to be better than intbitset to cause switch
   private static final int HYSTERESIS = 16;
   
-  // Extra space in bitset for initial capacity - can add an int up to that size 
+  private static final int INT_SET_MAX_SIZE = 16;
+  
+  private static final int HASH_SET_SHORT_MAX_SIZE = (1 << 16) - 1; // 65535 
+  
+  // Extra space in bitset for expansion - can add an int (- offset) up to that size
+  // Initial space = 2 words = 1 long.
   //   64 *4 is 8 words, = 256.
-  private static final int BIT_SET_OVERALLOCATE = 64 * 4;
+  private static final int BIT_SET_OVERALLOCATE = 64 * 4; // 8 words minimum
   
-  // Extra space in hashset for initial capacity - can add a minimum of that many more members
-  //   without hitting the hash-set doubling.
-  private static final int HASH_SET_OVERALLOCATE = 8;
   // Extra space in hashset for initial capacity - when creating from bitmap set - multiplier of existing size
-  private static final int HASH_SET_OVERALLOCATE_MULTIPLIER = 1; // bit shift right = divide by 2 = 50% of existing capacity
-  
-  private IntBitSet intBitSet = null;
+  private static final int HASH_SET_OVERALLOCATE_DIVIDER_SHIFT = 1; // bit shift right = divide by 2 = 50% of existing capacity
   
-  private IntHashSet intHashSet = null;
+  private static final int[] EMPTY_INT_ARRAY = new int[0];
+
+  private PositiveIntSet intSet;  // one of 3 representations: IntBitSet, IntHashSet, IntSet (based on IntVector)
   
   //package private for test case
-  boolean isBitSet = true;  
+  boolean isBitSet = false;
+  boolean isIntSet = false;  
+  boolean isHashSet = false;
+  
+  boolean secondTimeShrinkable = false;
   
   /**
    * Set false once we find we have to reduce the bit offset
@@ -62,20 +92,90 @@ public class PositiveIntSet {
   //package private for test case
   boolean useOffset = true;
   
+  public PositiveIntSet_impl() {
+    this(0, 0, 0);
+  }
+  
+  /**
+   * Set up a Positive Bit Set
+   * @param initialSize - if 0, don't allocate yet, wait for first add.
+   *           If isBitSetDense, then this number is interpreted as  
+   *             the first int to be added, typically with an offset.  
+   *           The next two params are used only if initialSize is not 0.
+   * @param estMin - the estimated minimum int value to be added
+   * @param estMax - the estimated max int value to be added
+   * @param isBitSetDense - true if we should use a bit set
+   * @param offsetFlag - a number >= 0, the offset to use, or -1 to automatically
+   *                 calculate the offset based on the initialSize
+   */
+  public PositiveIntSet_impl(int initialSize, int estMin, int estMax) {
+    if (initialSize == 0) {
+      isBitSet = true;  // try first as bit set with offset perhaps
+      return;  // delay allocation until we know what the 1st int will be
+    }
+    
+    if ((initialSize < 0) || 
+        (estMin < 0)      || 
+        (estMax < estMin)) {
+      throw new IllegalArgumentException();
+    }
+
+    estMax = Math.max(initialSize, estMax); // because its a positive int set
+
+    
+    // offsets are different for bit set and hash set
+    //   for bit set, it's set to the estMin - capped 12.5% from the estMin (cap is 16 words)
+    //   for hash set, if the size > 65K, it's ignored
+    //                 it's set to 3/4 up the "range" of est max - est min
+    
+    int offsetBitSet = estimatedBitSetOffset(estMin, -1);
+
+    int offsetHashSet = getHashSetOffset(estMax, estMin);
+    
+    int bitSetWordsNeeded = getBitSetSpaceFromRange(estMax - offsetBitSet, 0);
+    int hashSetWordsNeeded = getHashSetSpace(initialSize, estMax, estMin);
+
+    // no HYSTERESIS in test below - pick absolute smaller for initial alloc
+    if (bitSetWordsNeeded < hashSetWordsNeeded) {
+      allocateIntBitSet(estMax, estMin, offsetBitSet);
+      isBitSet = true;
+      return;
+    }
+    
+    // choose between IntHashSet and IntSet based on size
+    if (initialSize <= INT_SET_MAX_SIZE) {
+      intSet = new IntSet(initialSize);
+      isIntSet = true;
+      return;
+    }
+
+    intSet = new IntHashSet(initialSize, offsetHashSet);
+    isHashSet = true;
+    
+  }
+  
+  @Override
+  public IntListIterator iterator() {
+    return getUnorderedIterator();
+  }
+  
   public IntListIterator getUnorderedIterator() {
-    if (isBitSet) {
-      return intBitSet.getIterator();
+    if (null == intSet) {
+      intSet = new IntSet(0);
     }
-    return intHashSet.getIterator();
+    return intSet.iterator();
   }
   
   public IntListIterator getOrderedIterator() {
     if (isBitSet) {
-      return intBitSet.getIterator();
+      if (null == intSet) {
+        return new IntListIteratorOverArray(EMPTY_INT_ARRAY);
+      }
+      return intSet.iterator();
     }
     
     int[] allValues = new int[size()];
-    IntListIterator it = intHashSet.getIterator();
+    IntListIterator it = intSet.iterator();
     int i = 0;
     while (it.hasNext()) {
       allValues[i++] = it.next();
@@ -130,36 +230,74 @@ public class PositiveIntSet {
     }   
   }
   
+  /* (non-Javadoc)
+   * @see org.apache.uima.internal.util.PositiveIntSetI#clear()
+   */
+  @Override
   public void clear() {
-    if (isBitSet) {
-      intBitSet.clear();
-    } else {
-      intHashSet.clear();
+    if (null != intSet) {
+      if (isBitSet) {
+        if (secondTimeShrinkable) {
+          secondTimeShrinkable = false;
+          IntBitSet ib = (IntBitSet) intSet;
+          int max = ib.getLargestMenber();
+          int offset = ib.getOffset();
+          intSet = new IntBitSet((max - offset) >> 1, offset);
+        } else {
+          intSet.clear();
+          secondTimeShrinkable = true;
+        }
+      } else {
+        intSet.clear();
+      }
     }
   }
   
+  /* (non-Javadoc)
+   * @see org.apache.uima.internal.util.PositiveIntSetI#contains(int)
+   */
+  @Override
   public boolean contains(int key) {
-    if (isBitSet) {
-      return intBitSet.contains(key);
-    } else {
-      return intHashSet.contains(key);
-    }
+    return (null != intSet) ? intSet.contains(key) : false;
   }
  
-  /**
-   * 
-   * @param key -
-   * @return true if this set did not already contain the specified element
+  @Override
+  public int find(int element) {
+    return (null != intSet) ? intSet.find(element) : -1;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.uima.internal.util.PositiveIntSetI#add(int)
    */
+  @Override
   public boolean add(int key) {
     maybeSwitchRepresentation(key);
-    if (isBitSet) {
-      return intBitSet.add(key);
-    } else {
-      return intHashSet.add(key);
-    }
+    return intSet.add(key);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.uima.internal.util.PositiveIntSetI#remove(int)
+   */
+  @Override
+  public boolean remove(int key) {
+    return (null != intSet) ? intSet.remove(key) : false;
   }
 
+  /* (non-Javadoc)
+   * @see org.apache.uima.internal.util.PositiveIntSetI#size()
+   */
+  @Override
+  public int size() {
+    return (null != intSet) ? intSet.size() : 0;
+  }
+  
+//  public void add(int[] vs) {
+//    for (int i : vs) {
+//      add(i);
+//    }
+//  }
+  
+  
   /**
    * When a key is added which is lower than the offset, we need to adjust the whole int set table.
    * Although an array copy could do this, we don't have access to the bitset impl.
@@ -170,43 +308,36 @@ public class PositiveIntSet {
    * @param key
    */
   private void adjustBitSetForLowerOffset(int key) {
-    final int largestInt = intBitSet.getLargestMenber();
-    final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(largestInt, 0);  // doesn't use offset
-    final int hashSetSpaceNeeded = getHashSetSpace(intBitSet.size());
+    final IntBitSet intBitSet = (IntBitSet) intSet; 
+    final int size = intBitSet.size() + 1;
+    final int largestInt = intBitSet.getLargestMenber();  // not called if key > largest member
+    final int bitSetSpaceNeeded = getBitSetSpaceFromRange(largestInt, 0);  // doesn't use offset
+    final int hashSetSpaceNeeded = getHashSetSpace(size, largestInt, key); // computes using load factor,
+    useOffset = false;  // stop using because we have evidence it isn't appropriate
     
     if (hashSetSpaceNeeded < (bitSetSpaceNeeded - HYSTERESIS)) {
-      switchToHashSet(size());
+      int offset = getHashSetOffset(largestInt, key);
+      switchFromBitSet(size, offset);
+      
+      // keep as bit set - it's smaller, but drop the offset
     } else {
-      IntBitSet bs = new IntBitSet(largestInt + BIT_SET_OVERALLOCATE);
-      IntListIterator it = intBitSet.getIterator();
+      IntListIterator it = intSet.iterator();
+      allocateIntBitSet(largestInt, key, 0);
       while (it.hasNext()) {
-        bs.add(it.next());
+        intSet.add(it.next());
       }
-      intBitSet = bs;      
-    }
-    useOffset = false;  // stop using because we have evidence it isn't appropriate
-  }
-  
-  public void add(int[] vs) {
-    for (int i : vs) {
-      add(i);
     }
   }
   
-  public boolean remove(int key) {
-    if (isBitSet) {
-      return intBitSet.remove(key);
-    } else {
-      return intHashSet.remove(key);
-    }   
-  }
-
-  public int size() {
-    if (isBitSet) {
-      return intBitSet.size();
-    } else {
-      return intHashSet.size();
-    }
+  private static int getHashSetOffset(int estMax, int estMin) {
+    int range = estMax - estMin;
+    assert (range >= 0);
+    return  (range > HASH_SET_SHORT_MAX_SIZE) ?
+        Integer.MIN_VALUE :  // signal to force use of 4 byte ints
+        // make the offset such that the 0 point is 3/4 of the way
+        // toward the top of the range, because there's more likelyhood
+        // of expansion in that direction
+        estMax - (range >> 2);   
   }
   
   public int[] toUnorderedIntArray() {
@@ -229,26 +360,37 @@ public class PositiveIntSet {
   }
   
   /** 
-   * Only called if key won't fit in existing hashset allocation
+   * Only called if key won't fit in existing IntHashSet, IntSet or IntBitSet allocation
    * 
-   * Using existing bitset, compute what its size() (capacity) would be if the 
-   * key were added; include the overhead of the intbitset class.
+   * If IntBitSet (spaceUsed != 0) compute what its size() (capacity) would be if the 
+   * key were added (causing doubling); include the overhead of the intbitset class.
    * 
-   * long words required = larger of double the current size, or the number of words required 
+   * long words required = larger of double the current size, or the number of long words required 
 
-   * @param maxKeyLessOffset
-   * @return number of words needed to store the highest value
+   * @param adjKey - the range of bits needed
+   * @param spaceUsed - the number of bits currently allocated (if currently a bit set), else 0
+   * @return number of words needed to store the range, 
    */
-  private int getBitSetSpaceFromMaxInt(int maxKeyLessOffset, int spaceUsed) {
-    int w64 = 1 + (maxKeyLessOffset >> 6); // 64 bits per long, add one because 0 to 63 takes one word)
-    spaceUsed = spaceUsed >> 5;  // in # of long words, * 2 because the alloc doubles the space 
+  private static int getBitSetSpaceFromRange(int adjKey, int spaceUsed) {
+    int w64 = 1 + (adjKey >> 6); // 64 bits per long, add one because 0 to 63 takes one word)
+    spaceUsed = spaceUsed >> (6 - 1);  // in # of long words, * 2 because the alloc doubles the space 
     
-    int newSpace = Math.max(spaceUsed, w64) << 1; // size is in 32 bit words at the end  
+    int newSpace = Math.max(spaceUsed, w64) << 1; // <<1 to convert to # of words from # of longs  
     // e.g., 31 key => 1 word, 32 key => 2 words
     return newSpace + 2;  // 2 more for IntBitSet object overhead
   }
   
   /**
+   * 
+   * @param size including new element to be added
+   * @return number of words including overhead to store that many elements
+   *    unless is bigger than INT_SET_MAX_SIZE, then return MAX_VALUE
+   */
+  private static int getIntSetSpace(int size) {
+    return (size < INT_SET_MAX_SIZE) ? size + 4 : Integer.MAX_VALUE;
+  }
+  
+  /**
    * Only called if key won't fit in existing allocation
    * 
    * returns new hash table size ( usually double) + overhead 
@@ -257,28 +399,40 @@ public class PositiveIntSet {
    * @return
    */
   private int getHashSetSpace() {
-    return intHashSet.getSpaceUsedInWords() * 2 + IntHashSet.getSpaceOverheadInWords();
+    return ((IntHashSet)intSet).getSpaceUsedInWords() * 2 + IntHashSet.getSpaceOverheadInWords();
   }
   
   /**
+   * For the case where the intHashSet doesn't yet exist
+   * @param numberOfElements -
+   * @param estMax - the largest int to be stored
+   * @param estMin - the smallest int to be stored
+   * @return the size in words
+   */
+  private static int getHashSetSpace(int numberOfElements, int estMax, int estMin) {
+    boolean isShort;
+    if (numberOfElements > HASH_SET_SHORT_MAX_SIZE) {
+      isShort = false;
+    } else {
+      isShort = (estMax - estMin) < HASH_SET_SHORT_MAX_SIZE;
+    }
+    int numberOfTableElements =  IntHashSet.tableSpace(numberOfElements, IntHashSet.DEFAULT_LOAD_FACTOR);
+    return (numberOfTableElements >> ((isShort) ? 1 : 0)) 
+        + IntHashSet.getSpaceOverheadInWords();
+  }
+
+  /**
    * When converting from bitset to hash set, the initial hash set should be 
    * big enough so that it takes a while before it is doubled-expanded.
    * 
+   * Reduce overallocation for small sizes because the cost to switch is low, and the % overallocate
+   *   could be a large.
+   * 
    * @param existingSize
    * @return
    */
-  private int getHashSetOverAllocateSize(int existingSize) {
-    return existingSize + (existingSize >> HASH_SET_OVERALLOCATE_MULTIPLIER) + HASH_SET_OVERALLOCATE;
-  }
-  
-  /**
-   * For the case where the intHashSet doesn't yet exist
-   * @param numberOfElements -
-   * @return the size in words
-   */
-  private int getHashSetSpace(int numberOfElements) {
-    return IntHashSet.tableSpace(numberOfElements, IntHashSet.DEFAULT_LOAD_FACTOR) + 
-           IntHashSet.getSpaceOverheadInWords();
+  private static int getHashSetOverAllocateSize(int existingSize) {
+    return existingSize + (existingSize >> HASH_SET_OVERALLOCATE_DIVIDER_SHIFT);
   }
   
   /**
@@ -299,104 +453,265 @@ public class PositiveIntSet {
    *   - removes don't cause switching
    *   - compare against other size including its non-linear space jumps.   
    */
-  
-  private void maybeSwitchRepresentation(int key) {
+
+  private void maybeSwitchRepresentation(int newKey) {
     if (isBitSet) {
-      /********************
-       *    Bit Set 
-       ********************/
-      
+      handleBitSet(newKey);
+      return;
+    } 
+    if (isIntSet) {
+      handleIntSet(newKey);
+      return;
+    }
+    handleHashSet(newKey);
+  }
+  
+  
+  /********************
+   *    Bit Set 
+   ********************/
+  private void handleBitSet(int newKey) {
+    IntBitSet intBitSet = (IntBitSet) intSet;
+
+    if (intBitSet == null) {
       /*********
        * Size is 0, 
        * use this opportunity to set the offset
        * unless that has been a bad strategy for this 
        * instance already 
        ********/
-      if (intBitSet == null) {
-        if (useOffset) {
-      
-          // initialize bitSetOffset to the key, minus some amount to allow some storage of previous values
-          //   a minimum of 63, a maximum of 512 == 16 words, when key is > 4K (128 words) 
-          final int spaceForLesserKeys = Math.min(1023,  Math.max(63, key >> 3));
-          final int offset = Math.max(0, key - spaceForLesserKeys);
-          intBitSet = new IntBitSet(BIT_SET_OVERALLOCATE + key - offset, offset);
-          // because of offset, won't have to switch to hash for this key
-          // 
-          // force an initial allocation of this sufficient to keep
-          //   a) the space below, plus
-          //   b) 
-          return;
-        } else {
-          intBitSet = new IntBitSet(BIT_SET_OVERALLOCATE + key);
-          // don't return, see if we should immediately switch to hash representation
-        }
-      }
-      
-      final int offset = intBitSet.getOffset();
-      
-      if (key < offset) {
-        adjustBitSetForLowerOffset(key);
-        return;
-      }
-      
-      // return if key fits in existing allocation
-      final int spaceUsed = intBitSet.getSpaceUsed_in_bits();
-      if ((key - offset) < spaceUsed) {
+      boolean guaranteedFits = allocateIntBitSet(newKey, newKey, -1);
+      intBitSet = (IntBitSet) intSet;
+      if (guaranteedFits) {
         return;
       }
+    }
+    
+    final int offset = intBitSet.getOffset();
+    
+    if (newKey < offset) {
+      // may change representation; always abandons this bit set
+      adjustBitSetForLowerOffset(newKey);
+      return;
+    }
+    
+    // return if newKey fits in existing allocation
+    final int spaceUsed = intBitSet.getSpaceUsed_in_bits();
+    final int adjKey = newKey - offset;
+    if (adjKey < spaceUsed) {
+      return;
+    }
+    
+    
+    // this newKey doesn't fit in existing bit set allocation; it's too large
+    //   figure out if we should expand this bit set or switch representations
 
-      // space used in words (32 bit words)
-      final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(key - offset, spaceUsed);
-      final int hashSetOverAllocateSize = getHashSetOverAllocateSize(intBitSet.size() + 1);
-      final int hashSetOverAllocateSpace = getHashSetSpace(hashSetOverAllocateSize);
-      // switch if hashmap space plus hysteresis would be < bitmap space
-      if (hashSetOverAllocateSpace < (bitSetSpaceNeeded - HYSTERESIS)) {
-        switchToHashSet(hashSetOverAllocateSize);
-      }
-      return;    
-    } 
+    final int bitSetSpaceNeeded = getBitSetSpaceFromRange(adjKey, spaceUsed);  // in 32 bit words
+    final int sizeNeeded = intBitSet.size() + 1;
     
-    /********************
-     *    Hash Set 
-     ********************/
+    if (getIntSetSpace(sizeNeeded) < bitSetSpaceNeeded) {
+      switchToIntSet(sizeNeeded);
+      return;
+    }
+    
+    // when computing hashset size, overallocate by 50% to avoid immediately doubling
+    final int hashSetOverAllocateSize = getHashSetOverAllocateSize(sizeNeeded);
+    final int hashSetOverAllocateSpace = getHashSetSpace(hashSetOverAllocateSize, newKey, offset);
+    // switch if hashmap space plus hysteresis would be < bitmap space
+    if (hashSetOverAllocateSpace < (bitSetSpaceNeeded - HYSTERESIS)) {
+      switchToHashSet(hashSetOverAllocateSize, getHashSetOffset(newKey, offset));
+    }
+    return;    
+  }
+  
+  /********************
+   *     IntSet       * 
+   ********************/
+  private void handleIntSet(int newKey) {
+    IntSet is = (IntSet) intSet;
+    final int size = is.size() + 1;
+    if (size < INT_SET_MAX_SIZE) {
+      return;
+    }
+    
+  
+    // switch to bit or IntHash
+    
+    // first, compute max and min values
+    int mostPos = Integer.MIN_VALUE;
+    int mostNeg = Integer.MAX_VALUE;
+    for (int i = 0; i < size - 1; i++) {
+      final int v = is.get(i);
+      if (v > mostPos) { mostPos = v;}
+      if (v < mostNeg) { mostNeg = v;}
+    }
+    // include the newKey in the computation of these
+    if (newKey > mostPos) { mostPos = newKey;}
+    if (newKey < mostNeg) { mostNeg = newKey;}
+    
+    // make a bit set or hash set.
+    //   have 16 members, so the hash set is approx 32 + 11 words = 43.
+    //   bit set is approx 2 + range / 32
+    final int bitSetSpace = getBitSetSpaceFromRange(mostPos - mostNeg, 0);
+    final int hashSetOverAllocSize = getHashSetOverAllocateSize(size);
+    final int hashSetSpace = getHashSetSpace(hashSetOverAllocSize, mostPos, mostNeg);
+    if (bitSetSpace < hashSetSpace) {
+      switchToBitSet(mostPos, mostNeg, estimatedBitSetOffset(mostNeg, -1));
+      return;
+    }
+    switchToHashSet(hashSetOverAllocSize, getHashSetOffset(mostPos, mostNeg));
+  }
+  
+  /********************
+   *    Hash Set 
+   ********************/
+  private void handleHashSet(int newKey) {
     // maybe switch to IntBitSet
     // space used in words (32 bit words)
     // don't bother to include the new element - it might not be "added"
     //   if it was already there, and only serves to decrease hysteresis a bit
-  
+    final IntHashSet intHashSet = (IntHashSet) intSet;
     if (intHashSet.wontExpand()) {
       return;
     }
     
-    final int hashSetSpaceNeeded = getHashSetSpace();    
+    final int hashSetSpaceNeeded = getHashSetSpace(); //computes the doubline of space   
     
     final int maxInt = intHashSet.getMostPositive();
     final int offset = useOffset ? intHashSet.getMostNegative() : 0;
-    final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(BIT_SET_OVERALLOCATE + maxInt - offset, 0);
+    final int bitSetSpaceNeeded = getBitSetSpaceFromRange(BIT_SET_OVERALLOCATE + maxInt - offset, 0);
  
     if (bitSetSpaceNeeded < (hashSetSpaceNeeded - HYSTERESIS)) {
-      switchToBitSet(maxInt, offset);
+      switchToBitSet(maxInt, intHashSet.getMostNegative(), offset);
     }  
   }
+  
+  /**
+   * Allocate a new IntBitSet 
+   * @param estMax the largest int to store in the bit set - determines the initial size
+   * @param estMin this is a lower bound on the value that can be in the bit set
+   * @param offsetSpec this is the offset to use, or -1, to calculate the offset from
+   *                   the initial_size (assuming a small amount of adds will be for
+   *                   ints somewhat less than the initial_size entry (being treated
+   *                   as the first int to be added)
+   * @return true if allocated with offset, implying guarantee the estMax key fits.
+   */
+  private boolean allocateIntBitSet(int estMax, int estMin, int offsetSpec) {
+    isBitSet = true;
+    isHashSet = isIntSet = false;
+    if (useOffset) {
+      
+      // initialize bitSetOffset to the key, minus some amount to allow some storage of previous values
+      //   a minimum of 63, a maximum of 512 == 16 words, when key is > 4K (128 words) 
+      final int offset = estimatedBitSetOffset(estMin, offsetSpec);
+      intSet = new IntBitSet(BIT_SET_OVERALLOCATE + estMax - offset, offset);
+      // because of offset, won't have to switch to hash for this key
+      return true;
+    } else {
+      intSet = new IntBitSet(BIT_SET_OVERALLOCATE + estMax);
+      return false;
+    }
+  }
+  
+  private static int estimatedBitSetOffset(int estMin, int offsetSpec) {
+    return   // initialize bitSetOffset to the key, minus some amount to allow some storage of previous values
+             //   a minimum of 63, a maximum of 512 == 16 words, when key is > 4K (128 words) 
+       (offsetSpec == -1) ? 
+            Math.max(0, estMin - Math.min(1023,  Math.max(63, estMin >> 3))) :
+            offsetSpec;
+  }
+  
+  /**
+   * switching from bit set to either IntSet or IntHashSet
+   * @param size - space needed including new item
+   * @param offset - for IntHashSet values kept as short, the offset subtracted from values before storing them
+   *                 offset == Integer.MIN_VALUE means use 4 byte ints              
+   */
+  private void switchFromBitSet(int size, int offset) {
+    if (size < INT_SET_MAX_SIZE) {
+      switchToIntSet(size);
+      return;
+    }
+    switchToHashSet(size, offset);
+  }
+  
+  /**
+   * switch to IntSet
+   * @param size number of elements to be able to store without expanding
+   */
+  private void switchToIntSet(int size) {
+    IntListIterator it = intSet.iterator();
+
+    intSet = new IntSet(size);
+    isIntSet = true;
+    isBitSet = isHashSet = false;
     
-  private void switchToHashSet(int size) {
-    isBitSet = false;
-    intHashSet = new IntHashSet(size);
-    IntListIterator it = intBitSet.getIterator();
     while (it.hasNext()) {
-      intHashSet.add(it.next());
+      intSet.add(it.next());
     }
-    intBitSet = null;
+    return;
   }
   
-  private void switchToBitSet(int maxInt, int offset) {
-    isBitSet = true;
-    intBitSet = new IntBitSet(BIT_SET_OVERALLOCATE + maxInt - offset, offset);
-    IntListIterator it = intHashSet.getIterator();
+  /**
+   * Switch from any intSet impl to Hash Set
+   * @param size - the capacity - this many elements could be stored before swtiching
+   * @param offset - used only when the size < 65K, and is a value subtracted from elements before
+   *                 storing them, in an attempt to fit the results into just "short"s instead of "int"s
+   *                 if == MIN_VALUE, then force 4 byte ints
+   */
+  private void switchToHashSet(int size, int offset) {
+    IntListIterator it = intSet.iterator();
+
+    intSet = new IntHashSet(size);
+    isIntSet = isBitSet = false;
+    isHashSet = true;
+    
+    while (it.hasNext()) {
+      intSet.add(it.next());
+    }
+  }
+  
+  private void switchToBitSet(int estMax, int estMin, int offset) {
+    final IntListIterator it = intSet.iterator();
+    
+    allocateIntBitSet(estMax, estMin, offset);
+    
     while (it.hasNext()) {
-      intBitSet.add(it.next());
+      intSet.add(it.next());
+    }
+  }
+
+  @Override
+  public int get(int position) {
+    if (null == intSet) {
+      throw new NoSuchElementException();
     }
-    intHashSet = null;
+    return intSet.get(position);
+  }
+
+  @Override
+  public int moveToFirst() {
+    return (null == intSet) ? -1 : intSet.moveToFirst();
+  }
+
+  @Override
+  public int moveToLast() {
+    return (null == intSet) ? -1 : intSet.moveToLast();
+  }
+
+  @Override
+  public int moveToNext(int position) {
+    return (null == intSet) ? -1 : intSet.moveToNext(position);
+  }
+
+  @Override
+  public int moveToPrevious(int position) {
+    return (null == intSet) ? -1 : intSet.moveToPrevious(position);
   }
-   
+
+  @Override
+  public boolean isValid(int position) {
+    return (null == intSet) ? false : intSet.isValid(position);
+  }
+
 }

Modified: uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java Mon Nov  3 22:15:41 2014
@@ -37,7 +37,7 @@ public class IntBitSetTest extends TestC
     ibs.add(15);
     ibs.add(188);
     
-    IntListIterator it = ibs.getIterator();    
+    IntListIterator it = ibs.iterator();    
     assertTrue(it.hasNext());
     assertEquals(15, it.next());
     assertTrue(it.hasNext());
@@ -50,7 +50,7 @@ public class IntBitSetTest extends TestC
     
     ibs.add(1015);
     ibs.add(1188);
-    it = ibs.getIterator();    
+    it = ibs.iterator();    
     assertTrue(it.hasNext());
     assertEquals(1015, it.next());
     assertTrue(it.hasNext());
@@ -89,7 +89,7 @@ public class IntBitSetTest extends TestC
     ibs.remove(188);
     assertEquals(101, ibs.getLargestMenber());
     assertEquals(2,ibs.size());
-    IntListIterator it = ibs.getIterator();    
+    IntListIterator it = ibs.iterator();    
     assertTrue(it.hasNext());
     assertEquals(15, it.next());
     assertTrue(it.hasNext());
@@ -113,4 +113,18 @@ public class IntBitSetTest extends TestC
     assertEquals(2,ibs.size());
     
   }
+  
+  public void testIterator() {
+    ibs = new IntBitSet(63, 1000);
+    for (int i = 0; i < 10; i = i + 4) {
+      ibs.add(1000 + i);
+    }
+    
+    IntListIterator it = ibs.iterator();
+    for (int i = 0; i < 10; i += 4) {
+      assertTrue(it.hasNext());
+      assertEquals(1000 + i, it.next());        
+    }
+    assertFalse(it.hasNext());
+  }
 }

Modified: uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java?rev=1636458&r1=1636457&r2=1636458&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java Mon Nov  3 22:15:41 2014
@@ -64,7 +64,7 @@ public class IntHashSetTest extends Test
   }
   
   private int[] getSortedValues(IntHashSet s) {
-    IntListIterator it = s.getIterator();
+    IntListIterator it = s.iterator();
     int[] r = new int[s.size()];
     int i = 0;
     while (it.hasNext()) {
@@ -91,7 +91,7 @@ public class IntHashSetTest extends Test
   
   public void testWontExpand() {
     ihs = new IntHashSet(21);
-    assertEquals(32, ihs.getSpaceUsedInWords());
+    assertEquals(16, ihs.getSpaceUsedInWords());
     assertTrue(ihs.wontExpand(20));
     assertFalse(ihs.wontExpand(21));
   }