You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/02/03 13:38:06 UTC

svn commit: r906032 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/analysis/ src/test/org/apache/lucene/analysis/

Author: uschindler
Date: Wed Feb  3 12:37:53 2010
New Revision: 906032

URL: http://svn.apache.org/viewvc?rev=906032&view=rev
Log:
LUCENE-2247: Add a CharArrayMap<V> for performance improvements in some stemmers and synonym filters

Added:
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArrayMap.java
      - copied, changed from r905065, lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArrayMap.java   (with props)
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=906032&r1=906031&r2=906032&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Feb  3 12:37:53 2010
@@ -137,6 +137,9 @@
   on the provided Version. Version < 3.1 will use the char-API.
   (Simon Willnauer via Uwe Schindler)
 
+* LUCENE-2247: Added a CharArrayMap<V> for performance improvements
+  in some stemmers and synonym filters. (Uwe Schindler)
+
 Optimizations
 
 * LUCENE-2086: When resolving deleted terms, do so in term sort order

Copied: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArrayMap.java (from r905065, lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java)
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArrayMap.java?p2=lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArrayMap.java&p1=lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java&r1=905065&r2=906032&rev=906032&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArrayMap.java Wed Feb  3 12:37:53 2010
@@ -1,15 +1,5 @@
 package org.apache.lucene.analysis;
 
-import java.util.Arrays;
-import java.util.AbstractSet;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Set;
-
-import org.apache.lucene.util.CharacterUtils;
-import org.apache.lucene.util.Version;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -27,17 +17,26 @@
  * limitations under the License.
  */
 
+import java.util.Arrays;
+import java.util.AbstractMap;
+import java.util.AbstractSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.util.Version;
 
 /**
- * A simple class that stores Strings as char[]'s in a
- * hash table.  Note that this is not a general purpose
+ * A simple class that stores key Strings as char[]'s in a
+ * hash table. Note that this is not a general purpose
  * class.  For example, it cannot remove items from the
- * set, nor does it resize its hash table to be smaller,
- * etc.  It is designed to be quick to test if a char[]
- * is in the set without the necessity of converting it
+ * map, nor does it resize its hash table to be smaller,
+ * etc.  It is designed to be quick to retrieve items
+ * by char[] keys without the necessity of converting
  * to a String first.
  * <p>You must specify the required {@link Version}
- * compatibility when creating {@link CharArraySet}:
+ * compatibility when creating {@link CharArrayMap}:
  * <ul>
  *   <li> As of 3.1, supplementary characters are
  *       properly lowercased.</li>
@@ -45,30 +44,23 @@
  * Before 3.1 supplementary characters could not be
  * lowercased correctly due to the lack of Unicode 4
  * support in JDK 1.4. To use instances of
- * {@link CharArraySet} with the behavior before Lucene
- * 3.1 pass a {@link Version} < 3.1 to the constructors.
- * <P>
- * <em>Please note:</em> This class implements {@link java.util.Set Set} but
- * does not behave like it should in all cases. The generic type is
- * {@code Set<Object>}, because you can add any object to it,
- * that has a string representation. The add methods will use
- * {@link Object#toString} and store the result using a {@code char[]}
- * buffer. The same behavior have the {@code contains()} methods.
- * The {@link #iterator()} returns an {@code Iterator<String>}.
- * For type safety also {@link #stringIterator()} is provided.
+ * {@link CharArrayMap} with the behavior before Lucene
+ * 3.1 pass a {@link Version} &lt; 3.1 to the constructors.
  */
-public class CharArraySet extends AbstractSet<Object> {
+public class CharArrayMap<V> extends AbstractMap<Object,V> {
+  // private only because missing generics
+  private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<Object>();
+
   private final static int INIT_SIZE = 8;
-  private char[][] entries;
-  private int count;
-  private final boolean ignoreCase;
-  public static final CharArraySet EMPTY_SET = new EmptyCharArraySet();
-  
   private final CharacterUtils charUtils;
-  private final Version matchVersion;
+  private boolean ignoreCase;  
+  private int count;
+  final Version matchVersion; // package private because used in CharArraySet
+  char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+  V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
 
   /**
-   * Create set with enough capacity to hold startSize terms
+   * Create map with enough capacity to hold startSize terms
    * 
    * @param matchVersion
    *          compatibility match version see <a href="#version">Version
@@ -79,101 +71,103 @@
    *          <code>false</code> if and only if the set should be case sensitive
    *          otherwise <code>true</code>.
    */
-  public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
+  @SuppressWarnings("unchecked")
+  public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) {
     this.ignoreCase = ignoreCase;
     int size = INIT_SIZE;
     while(startSize + (startSize>>2) > size)
       size <<= 1;
-    entries = new char[size][];
+    keys = new char[size][];
+    values = (V[]) new Object[size];
     this.charUtils = CharacterUtils.getInstance(matchVersion);
     this.matchVersion = matchVersion;
   }
 
   /**
-   * Creates a set from a Collection of objects. 
+   * Creates a map from the mappings in another map. 
    * 
    * @param matchVersion
    *          compatibility match version see <a href="#version">Version
    *          note</a> above for details.
    * @param c
-   *          a collection whose elements to be placed into the set
+   *          a map whose mappings to be copied
    * @param ignoreCase
    *          <code>false</code> if and only if the set should be case sensitive
    *          otherwise <code>true</code>.
    */
-  public CharArraySet(Version matchVersion, Collection<? extends Object> c, boolean ignoreCase) {
+  public CharArrayMap(Version matchVersion, Map<?,? extends V> c, boolean ignoreCase) {
     this(matchVersion, c.size(), ignoreCase);
-    addAll(c);
-  }
-
-  /**
-   * Creates a set with enough capacity to hold startSize terms
-   * 
-   * @param startSize
-   *          the initial capacity
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   * @deprecated use {@link #CharArraySet(Version, int, boolean)} instead
-   */
-  @Deprecated
-  public CharArraySet(int startSize, boolean ignoreCase) {
-    this(Version.LUCENE_30, startSize, ignoreCase);
+    putAll(c);
   }
   
-  /**
-   * Creates a set from a Collection of objects. 
-   * 
-   * @param c
-   *          a collection whose elements to be placed into the set
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead         
-   */  
-  @Deprecated
-  public CharArraySet(Collection<? extends Object> c, boolean ignoreCase) {
-    this(Version.LUCENE_30, c.size(), ignoreCase);
-    addAll(c);
+  /** Create set from the supplied map (used internally for readonly maps...) */
+  private CharArrayMap(CharArrayMap<V> toCopy){
+    this.keys = toCopy.keys;
+    this.values = toCopy.values;
+    this.ignoreCase = toCopy.ignoreCase;
+    this.count = toCopy.count;
+    this.charUtils = toCopy.charUtils;
+    this.matchVersion = toCopy.matchVersion;
   }
   
-  /** Create set from entries */
-  private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){
-    this.entries = entries;
-    this.ignoreCase = ignoreCase;
-    this.count = count;
-    this.charUtils = CharacterUtils.getInstance(matchVersion);
-    this.matchVersion = matchVersion;
-  }
-  
-  /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
+  /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
   @Override
   public void clear() {
     count = 0;
-    Arrays.fill(entries, null);
+    Arrays.fill(keys, null);
+    Arrays.fill(values, null);
   }
 
   /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
-   * are in the set */
-  public boolean contains(char[] text, int off, int len) {
-    return entries[getSlot(text, off, len)] != null;
+   * are in the {@link #keySet} */
+  public boolean containsKey(char[] text, int off, int len) {
+    return keys[getSlot(text, off, len)] != null;
+  }
+
+  /** true if the <code>CharSequence</code> is in the {@link #keySet} */
+  public boolean containsKey(CharSequence cs) {
+    return keys[getSlot(cs)] != null;
+  }
+
+  @Override
+  public boolean containsKey(Object o) {
+    if (o instanceof char[]) {
+      final char[] text = (char[])o;
+      return containsKey(text, 0, text.length);
+    } 
+    return containsKey(o.toString());
+  }
+
+  /** returns the value of the mapping of <code>len</code> chars of <code>text</code>
+   * starting at <code>off</code> */
+  public V get(char[] text, int off, int len) {
+    return values[getSlot(text, off, len)];
   }
 
-  /** true if the <code>CharSequence</code> is in the set */
-  public boolean contains(CharSequence cs) {
-    return entries[getSlot(cs)] != null;
+  /** returns the value of the mapping of the chars inside this {@code CharSequence} */
+  public V get(CharSequence cs) {
+    return values[getSlot(cs)];
+  }
+
+  @Override
+  public V get(Object o) {
+    if (o instanceof char[]) {
+      final char[] text = (char[])o;
+      return get(text, 0, text.length);
+    } 
+    return get(o.toString());
   }
 
   private int getSlot(char[] text, int off, int len) {
     int code = getHashCode(text, off, len);
-    int pos = code & (entries.length-1);
-    char[] text2 = entries[pos];
+    int pos = code & (keys.length-1);
+    char[] text2 = keys[pos];
     if (text2 != null && !equals(text, off, len, text2)) {
       final int inc = ((code>>8)+code)|1;
       do {
         code += inc;
-        pos = code & (entries.length-1);
-        text2 = entries[pos];
+        pos = code & (keys.length-1);
+        text2 = keys[pos];
       } while (text2 != null && !equals(text, off, len, text2));
     }
     return pos;
@@ -182,34 +176,42 @@
   /** Returns true if the String is in the set */  
   private int getSlot(CharSequence text) {
     int code = getHashCode(text);
-    int pos = code & (entries.length-1);
-    char[] text2 = entries[pos];
+    int pos = code & (keys.length-1);
+    char[] text2 = keys[pos];
     if (text2 != null && !equals(text, text2)) {
       final int inc = ((code>>8)+code)|1;
       do {
         code += inc;
-        pos = code & (entries.length-1);
-        text2 = entries[pos];
+        pos = code & (keys.length-1);
+        text2 = keys[pos];
       } while (text2 != null && !equals(text, text2));
     }
     return pos;
   }
 
-  /** Add this CharSequence into the set */
-  public boolean add(CharSequence text) {
-    return add(text.toString()); // could be more efficient
+  /** Add the given mapping. */
+  public V put(CharSequence text, V value) {
+    return put(text.toString(), value); // could be more efficient
+  }
+
+  @Override
+  public V put(Object o, V value) {
+    if (o instanceof char[]) {
+      return put((char[])o, value);
+    }
+    return put(o.toString(), value);
   }
   
-  /** Add this String into the set */
-  public boolean add(String text) {
-    return add(text.toCharArray());
+  /** Add the given mapping. */
+  public V put(String text, V value) {
+    return put(text.toCharArray(), value);
   }
 
-  /** Add this char[] directly to the set.
+  /** Add the given mapping.
    * If ignoreCase is true for this Set, the text array will be directly modified.
    * The user should never modify this text array after calling this method.
    */
-  public boolean add(char[] text) {
+  public V put(char[] text, V value) {
     if (ignoreCase)
       for(int i=0;i<text.length;){
         i += Character.toChars(
@@ -217,17 +219,42 @@
                   charUtils.codePointAt(text, i)), text, i);
       }
     int slot = getSlot(text, 0, text.length);
-    if (entries[slot] != null) return false;
-    entries[slot] = text;
+    if (keys[slot] != null) {
+      final V oldValue = values[slot];
+      values[slot] = value;
+      return oldValue;
+    }
+    keys[slot] = text;
+    values[slot] = value;
     count++;
 
-    if (count + (count>>2) > entries.length) {
+    if (count + (count>>2) > keys.length) {
       rehash();
     }
 
-    return true;
+    return null;
   }
 
+  @SuppressWarnings("unchecked")
+  private void rehash() {
+    assert keys.length == values.length;
+    final int newSize = 2*keys.length;
+    final char[][] oldkeys = keys;
+    final V[] oldvalues = values;
+    keys = new char[newSize][];
+    values = (V[]) new Object[newSize];
+
+    for(int i=0; i<oldkeys.length; i++) {
+      char[] text = oldkeys[i];
+      if (text != null) {
+        // todo: could be faster... no need to compare strings on collision
+        final int slot = getSlot(text,0,text.length);
+        keys[slot] = text;
+        values[slot] = oldvalues[i];
+      }
+    }
+  }
+  
   private boolean equals(char[] text1, int off, int len, char[] text2) {
     if (len != text2.length)
       return false;
@@ -268,23 +295,9 @@
     return true;
   }
   
-
-
-  private void rehash() {
-    final int newSize = 2*entries.length;
-    char[][] oldEntries = entries;
-    entries = new char[newSize][];
-
-    for(int i=0;i<oldEntries.length;i++) {
-      char[] text = oldEntries[i];
-      if (text != null) {
-        // todo: could be faster... no need to compare strings on collision
-        entries[getSlot(text,0,text.length)] = text;
-      }
-    }
-  }
-  
   private int getHashCode(char[] text, int offset, int len) {
+    if (text == null)
+      throw new NullPointerException();
     int code = 0;
     final int stop = offset + len;
     if (ignoreCase) {
@@ -302,6 +315,8 @@
   }
 
   private int getHashCode(CharSequence text) {
+    if (text == null)
+      throw new NullPointerException();
     int code = 0;
     int len = text.length();
     if (ignoreCase) {
@@ -318,143 +333,124 @@
     return code;
   }
 
-
   @Override
-  public int size() {
-    return count;
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return count==0;
+  public V remove(Object key) {
+    throw new UnsupportedOperationException();
   }
 
   @Override
-  public boolean contains(Object o) {
-    if (o instanceof char[]) {
-      final char[] text = (char[])o;
-      return contains(text, 0, text.length);
-    } 
-    return contains(o.toString());
+  public int size() {
+    return count;
   }
 
   @Override
-  public boolean add(Object o) {
-    if (o instanceof char[]) {
-      return add((char[])o);
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("{");
+    for (Map.Entry<Object,V> entry : entrySet()) {
+      if (sb.length()>1) sb.append(", ");
+      sb.append(entry);
     }
-    return add(o.toString());
-  }
-  
-  /**
-   * Returns an unmodifiable {@link CharArraySet}. This allows to provide
-   * unmodifiable views of internal sets for "read-only" use.
-   * 
-   * @param set
-   *          a set for which the unmodifiable set is returned.
-   * @return an new unmodifiable {@link CharArraySet}.
-   * @throws NullPointerException
-   *           if the given set is <code>null</code>.
-   */
-  public static CharArraySet unmodifiableSet(CharArraySet set) {
-    if (set == null)
-      throw new NullPointerException("Given set is null");
-    if (set == EMPTY_SET)
-      return EMPTY_SET;
-    if (set instanceof UnmodifiableCharArraySet)
-      return set;
-
-    /*
-     * Instead of delegating calls to the given set copy the low-level values to
-     * the unmodifiable Subclass
-     */
-    return new UnmodifiableCharArraySet(set.matchVersion, set.entries, set.ignoreCase, set.count);
+    return sb.append('}').toString();
   }
 
-  /**
-   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
-   * is a {@link CharArraySet} the ignoreCase property will be preserved.
-   * 
-   * @param set
-   *          a set to copy
-   * @return a copy of the given set as a {@link CharArraySet}. If the given set
-   *         is a {@link CharArraySet} the ignoreCase and matchVersion property will be
-   *         preserved.
-   * @deprecated use {@link #copy(Version, Set)} instead.
-   */
-  @Deprecated
-  public static CharArraySet copy(final Set<?> set) {
-    if(set == EMPTY_SET)
-      return EMPTY_SET;
-    return copy(Version.LUCENE_30, set);
-  }
+  private EntrySet entrySet = null;
+  private CharArraySet keySet = null;
   
-  /**
-   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
-   * is a {@link CharArraySet} the ignoreCase property will be preserved.
-   * <p>
-   * <b>Note:</b> If you intend to create a copy of another {@link CharArraySet} where
-   * the {@link Version} of the source set differs from its copy
-   * {@link #CharArraySet(Version, Collection, boolean)} should be used instead.
-   * The {@link #copy(Version, Set)} will preserve the {@link Version} of the
-   * source set it is an instance of {@link CharArraySet}.
-   * </p>
-   * 
-   * @param matchVersion
-   *          compatibility match version see <a href="#version">Version
-   *          note</a> above for details. This argument will be ignored if the
-   *          given set is a {@link CharArraySet}.
-   * @param set
-   *          a set to copy
-   * @return a copy of the given set as a {@link CharArraySet}. If the given set
-   *         is a {@link CharArraySet} the ignoreCase property as well as the
-   *         matchVersion will be of the given set will be preserved.
-   */
-  public static CharArraySet copy(final Version matchVersion, final Set<?> set) {
-    if(set == EMPTY_SET)
-      return EMPTY_SET;
-    if(set instanceof CharArraySet) {
-      final CharArraySet source = (CharArraySet) set;
-      // use fast path instead of iterating all values
-      // this is even on very small sets ~10 times faster than iterating
-      final char[][] entries = new char[source.entries.length][];
-      System.arraycopy(source.entries, 0, entries, 0, entries.length);
-      return new CharArraySet(source.matchVersion, entries, source.ignoreCase, source.count);
-    }
-    return new CharArraySet(matchVersion, set, false);
+  EntrySet createEntrySet() {
+    return new EntrySet(true);
   }
   
-
-  /** The Iterator<String> for this set.  Strings are constructed on the fly, so
-   * use <code>nextCharArray</code> for more efficient access. */
-  public class CharArraySetIterator implements Iterator<String> {
-    int pos=-1;
-    char[] next;
-    CharArraySetIterator() {
+  @Override
+  public final EntrySet entrySet() {
+    if (entrySet == null) {
+      entrySet = createEntrySet();
+    }
+    return entrySet;
+  }
+  
+  // helper for CharArraySet to not produce endless recursion
+  final Set<Object> originalKeySet() {
+    return super.keySet();
+  }
+
+  /** Returns an {@link CharArraySet} view on the map's keys.
+   * The set will use the same {@code matchVersion} as this map. */
+  @Override @SuppressWarnings("unchecked")
+  public final CharArraySet keySet() {
+    if (keySet == null) {
+      // prevent adding of entries
+      keySet = new CharArraySet((CharArrayMap) this) {
+        @Override
+        public boolean add(Object o) {
+          throw new UnsupportedOperationException();
+        }
+        @Override
+        public boolean add(CharSequence text) {
+          throw new UnsupportedOperationException();
+        }
+        @Override
+        public boolean add(String text) {
+          throw new UnsupportedOperationException();
+        }
+        @Override
+        public boolean add(char[] text) {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+    return keySet;
+  }
+
+  /** public iterator class so efficient methods are exposed to users */
+  public class EntryIterator implements Iterator<Map.Entry<Object,V>> {
+    private int pos=-1;
+    private int lastPos;
+    private final boolean allowModify;
+    
+    private EntryIterator(boolean allowModify) {
+      this.allowModify = allowModify;
       goNext();
     }
 
     private void goNext() {
-      next = null;
+      lastPos = pos;
       pos++;
-      while (pos < entries.length && (next=entries[pos]) == null) pos++;
+      while (pos < keys.length && keys[pos] == null) pos++;
     }
 
     public boolean hasNext() {
-      return next != null;
+      return pos < keys.length;
     }
 
-    /** do not modify the returned char[] */
-    public char[] nextCharArray() {
-      char[] ret = next;
+    /** gets the next key... do not modify the returned char[] */
+    public char[] nextKey() {
       goNext();
-      return ret;
+      return keys[lastPos];
     }
 
-    /** Returns the next String, as a Set<String> would...
-     * use nextCharArray() for better efficiency. */
-    public String next() {
-      return new String(nextCharArray());
+    /** gets the next key as a newly created String object */
+    public String nextKeyString() {
+      return new String(nextKey());
+    }
+
+    /** returns the value associated with the last key returned */
+    public V currentValue() {
+      return values[lastPos];
+    }
+
+    /** sets the value associated with the last key returned */    
+    public V setValue(V value) {
+      if (!allowModify)
+        throw new UnsupportedOperationException();
+      V old = values[lastPos];
+      values[lastPos] = value;
+      return old;      
+    }
+
+    /** use nextCharArray() + currentValue() for better efficiency. */
+    public Map.Entry<Object,V> next() {
+      goNext();
+      return new MapEntry(lastPos, allowModify);
     }
 
     public void remove() {
@@ -462,30 +458,155 @@
     }
   }
 
-  /** returns an iterator of new allocated Strings */
-  public Iterator<String> stringIterator() {
-    return new CharArraySetIterator();
+  private final class MapEntry implements Map.Entry<Object,V> {
+    private final int pos;
+    private final boolean allowModify;
+
+    private MapEntry(int pos, boolean allowModify) {
+      this.pos = pos;
+      this.allowModify = allowModify;
+    }
+
+    public Object getKey() {
+      // we must clone here, as putAll to another CharArrayMap
+      // with other case sensitivity flag would corrupt the keys
+      return keys[pos].clone();
+    }
+
+    public V getValue() {
+      return values[pos];
+    }
+
+    public V setValue(V value) {
+      if (!allowModify)
+        throw new UnsupportedOperationException();
+      final V old = values[pos];
+      values[pos] = value;
+      return old;
+    }
+
+    @Override
+    public String toString() {
+      return new StringBuilder().append(keys[pos]).append('=')
+        .append(((Object) values[pos] == (Object) CharArrayMap.this) ? "(this Map)" : values[pos])
+        .toString();
+    }
   }
 
-  /** returns an iterator of new allocated Strings, this method violates the Set interface */
-  @Override
-  @SuppressWarnings("unchecked")
-  public Iterator<Object> iterator() {
-    return (Iterator) stringIterator();
+  /** public EntrySet class so efficient methods are exposed to users */
+  public final class EntrySet extends AbstractSet<Map.Entry<Object,V>> {
+    private final boolean allowModify;
+    
+    private EntrySet(boolean allowModify) {
+      this.allowModify = allowModify;
+    }
+  
+    @Override
+    public EntryIterator iterator() {
+      return new EntryIterator(allowModify);
+    }
+    
+    @Override
+    public boolean contains(Object o) {
+      if (!(o instanceof Map.Entry))
+        return false;
+      final Map.Entry e = (Map.Entry)o;
+      final Object key = e.getKey();
+      final Object val = e.getValue();
+      final Object v = get(key);
+      return v == null ? val == null : v.equals(val);
+    }
+    
+    @Override
+    public boolean remove(Object o) {
+      throw new UnsupportedOperationException();
+    }
+    
+    @Override
+    public int size() {
+      return count;
+    }
+    
+    @Override
+    public void clear() {
+      if (!allowModify)
+        throw new UnsupportedOperationException();
+      CharArrayMap.this.clear();
+    }
   }
   
   /**
-   * Efficient unmodifiable {@link CharArraySet}. This implementation does not
-   * delegate calls to a give {@link CharArraySet} like
-   * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes
-   * the internal representation of a {@link CharArraySet} to a super
-   * constructor and overrides all mutators. 
+   * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
+   * unmodifiable views of internal map for "read-only" use.
+   * 
+   * @param map
+   *          a map for which the unmodifiable map is returned.
+   * @return an new unmodifiable {@link CharArrayMap}.
+   * @throws NullPointerException
+   *           if the given map is <code>null</code>.
    */
-  private static class UnmodifiableCharArraySet extends CharArraySet {
+  public static <V> CharArrayMap<V> unmodifiableMap(CharArrayMap<V> map) {
+    if (map == null)
+      throw new NullPointerException("Given map is null");
+    if (map == emptyMap() || map.isEmpty())
+      return emptyMap();
+    if (map instanceof UnmodifiableCharArrayMap)
+      return map;
+    return new UnmodifiableCharArrayMap<V>(map);
+  }
 
-    private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase,
-        int count) {
-      super(matchVersion, entries, ignoreCase, count);
+  /**
+   * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
+   * is a {@link CharArrayMap} the ignoreCase property will be preserved.
+   * <p>
+   * <b>Note:</b> If you intend to create a copy of another {@link CharArrayMap} where
+   * the {@link Version} of the source map differs from its copy
+   * {@link #CharArrayMap(Version, Map, boolean)} should be used instead.
+   * The {@link #copy(Version, Map)} will preserve the {@link Version} of the
+   * source map it is an instance of {@link CharArrayMap}.
+   * </p>
+   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details. This argument will be ignored if the
+   *          given map is a {@link CharArrayMap}.
+   * @param map
+   *          a map to copy
+   * @return a copy of the given map as a {@link CharArrayMap}. If the given map
+   *         is a {@link CharArrayMap} the ignoreCase property as well as the
+   *         matchVersion will be of the given map will be preserved.
+   */
+  @SuppressWarnings("unchecked")
+  public static <V> CharArrayMap<V> copy(final Version matchVersion, final Map<?,? extends V> map) {
+    if(map == EMPTY_MAP)
+      return emptyMap();
+    if(map instanceof CharArrayMap) {
+      CharArrayMap<V> m = (CharArrayMap<V>) map;
+      // use fast path instead of iterating all values
+      // this is even on very small sets ~10 times faster than iterating
+      final char[][] keys = new char[m.keys.length][];
+      System.arraycopy(m.keys, 0, keys, 0, keys.length);
+      final V[] values = (V[]) new Object[m.values.length];
+      System.arraycopy(m.values, 0, values, 0, values.length);
+      m = new CharArrayMap<V>(m);
+      m.keys = keys;
+      m.values = values;
+      return m;
+    }
+    return new CharArrayMap<V>(matchVersion, map, false);
+  }
+  
+  /** Returns an empty, unmodifiable map. */
+  @SuppressWarnings("unchecked")
+  public static <V> CharArrayMap<V> emptyMap() {
+    return (CharArrayMap<V>) EMPTY_MAP;
+  }
+  
+  // package private CharArraySet instanceof check in CharArraySet
+  static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V> {
+
+    UnmodifiableCharArrayMap(CharArrayMap<V> map) {
+      super(map);
     }
 
     @Override
@@ -494,61 +615,86 @@
     }
 
     @Override
-    public boolean add(Object o){
+    public V put(Object o, V val){
       throw new UnsupportedOperationException();
     }
     
     @Override
-    public boolean addAll(Collection<? extends Object> coll) {
+    public V put(char[] text, V val) {
       throw new UnsupportedOperationException();
     }
-    
+
     @Override
-    public boolean add(char[] text) {
+    public V put(CharSequence text, V val) {
       throw new UnsupportedOperationException();
     }
 
     @Override
-    public boolean add(CharSequence text) {
+    public V put(String text, V val) {
       throw new UnsupportedOperationException();
     }
-
+    
     @Override
-    public boolean add(String text) {
+    public V remove(Object key) {
       throw new UnsupportedOperationException();
     }
+  
+    @Override
+    EntrySet createEntrySet() {
+      return new EntrySet(false);
+    }
   }
   
   /**
-   * Empty {@link UnmodifiableCharArraySet} optimized for speed.
+   * Empty {@link UnmodifiableCharArrayMap} optimized for speed.
    * Contains checks will always return <code>false</code> or throw
    * NPE if necessary.
    */
-  private static final class EmptyCharArraySet extends UnmodifiableCharArraySet {
-
-    private EmptyCharArraySet() {
-      super(Version.LUCENE_CURRENT, new char[0][], false, 0);
+  private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
+    EmptyCharArrayMap() {
+      super(new CharArrayMap<V>(Version.LUCENE_CURRENT, 0, false));
     }
     
     @Override
-    public boolean contains(char[] text, int off, int len) {
+    public boolean containsKey(char[] text, int off, int len) {
       if(text == null)
         throw new NullPointerException();
       return false;
     }
 
     @Override
-    public boolean contains(CharSequence cs) {
+    public boolean containsKey(CharSequence cs) {
       if(cs == null)
         throw new NullPointerException();
       return false;
     }
 
     @Override
-    public boolean contains(Object o) {
+    public boolean containsKey(Object o) {
       if(o == null)
         throw new NullPointerException();
       return false;
     }
+    
+    @Override
+    public V get(char[] text, int off, int len) {
+      if(text == null)
+        throw new NullPointerException();
+      return null;
+    }
+
+    @Override
+    public V get(CharSequence cs) {
+      if(cs == null)
+        throw new NullPointerException();
+      return null;
+    }
+
+    @Override
+    public V get(Object o) {
+      if(o == null)
+        throw new NullPointerException();
+      return null;
+    }
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java?rev=906032&r1=906031&r2=906032&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java Wed Feb  3 12:37:53 2010
@@ -1,15 +1,5 @@
 package org.apache.lucene.analysis;
 
-import java.util.Arrays;
-import java.util.AbstractSet;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Set;
-
-import org.apache.lucene.util.CharacterUtils;
-import org.apache.lucene.util.Version;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -27,6 +17,13 @@
  * limitations under the License.
  */
 
+import java.util.Arrays;
+import java.util.AbstractSet;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.apache.lucene.util.Version;
 
 /**
  * A simple class that stores Strings as char[]'s in a
@@ -58,15 +55,11 @@
  * For type safety also {@link #stringIterator()} is provided.
  */
 public class CharArraySet extends AbstractSet<Object> {
-  private final static int INIT_SIZE = 8;
-  private char[][] entries;
-  private int count;
-  private final boolean ignoreCase;
-  public static final CharArraySet EMPTY_SET = new EmptyCharArraySet();
+  public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
+  private static final Object PLACEHOLDER = new Object();
+  
+  private final CharArrayMap<Object> map;
   
-  private final CharacterUtils charUtils;
-  private final Version matchVersion;
-
   /**
    * Create set with enough capacity to hold startSize terms
    * 
@@ -80,13 +73,7 @@
    *          otherwise <code>true</code>.
    */
   public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
-    this.ignoreCase = ignoreCase;
-    int size = INIT_SIZE;
-    while(startSize + (startSize>>2) > size)
-      size <<= 1;
-    entries = new char[size][];
-    this.charUtils = CharacterUtils.getInstance(matchVersion);
-    this.matchVersion = matchVersion;
+    this(new CharArrayMap<Object>(matchVersion, startSize, ignoreCase));
   }
 
   /**
@@ -101,7 +88,7 @@
    *          <code>false</code> if and only if the set should be case sensitive
    *          otherwise <code>true</code>.
    */
-  public CharArraySet(Version matchVersion, Collection<? extends Object> c, boolean ignoreCase) {
+  public CharArraySet(Version matchVersion, Collection<?> c, boolean ignoreCase) {
     this(matchVersion, c.size(), ignoreCase);
     addAll(c);
   }
@@ -132,77 +119,51 @@
    * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead         
    */  
   @Deprecated
-  public CharArraySet(Collection<? extends Object> c, boolean ignoreCase) {
+  public CharArraySet(Collection<?> c, boolean ignoreCase) {
     this(Version.LUCENE_30, c.size(), ignoreCase);
     addAll(c);
   }
   
-  /** Create set from entries */
-  private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){
-    this.entries = entries;
-    this.ignoreCase = ignoreCase;
-    this.count = count;
-    this.charUtils = CharacterUtils.getInstance(matchVersion);
-    this.matchVersion = matchVersion;
+  /** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
+  CharArraySet(final CharArrayMap<Object> map){
+    this.map = map;
   }
   
   /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
   @Override
   public void clear() {
-    count = 0;
-    Arrays.fill(entries, null);
+    map.clear();
   }
 
   /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
    * are in the set */
   public boolean contains(char[] text, int off, int len) {
-    return entries[getSlot(text, off, len)] != null;
+    return map.containsKey(text, off, len);
   }
 
   /** true if the <code>CharSequence</code> is in the set */
   public boolean contains(CharSequence cs) {
-    return entries[getSlot(cs)] != null;
+    return map.containsKey(cs);
   }
 
-  private int getSlot(char[] text, int off, int len) {
-    int code = getHashCode(text, off, len);
-    int pos = code & (entries.length-1);
-    char[] text2 = entries[pos];
-    if (text2 != null && !equals(text, off, len, text2)) {
-      final int inc = ((code>>8)+code)|1;
-      do {
-        code += inc;
-        pos = code & (entries.length-1);
-        text2 = entries[pos];
-      } while (text2 != null && !equals(text, off, len, text2));
-    }
-    return pos;
+  @Override
+  public boolean contains(Object o) {
+    return map.containsKey(o);
   }
 
-  /** Returns true if the String is in the set */  
-  private int getSlot(CharSequence text) {
-    int code = getHashCode(text);
-    int pos = code & (entries.length-1);
-    char[] text2 = entries[pos];
-    if (text2 != null && !equals(text, text2)) {
-      final int inc = ((code>>8)+code)|1;
-      do {
-        code += inc;
-        pos = code & (entries.length-1);
-        text2 = entries[pos];
-      } while (text2 != null && !equals(text, text2));
-    }
-    return pos;
+  @Override
+  public boolean add(Object o) {
+    return map.put(o, PLACEHOLDER) == null;
   }
 
   /** Add this CharSequence into the set */
   public boolean add(CharSequence text) {
-    return add(text.toString()); // could be more efficient
+    return map.put(text, PLACEHOLDER) == null;
   }
   
   /** Add this String into the set */
   public boolean add(String text) {
-    return add(text.toCharArray());
+    return map.put(text, PLACEHOLDER) == null;
   }
 
   /** Add this char[] directly to the set.
@@ -210,140 +171,12 @@
    * The user should never modify this text array after calling this method.
    */
   public boolean add(char[] text) {
-    if (ignoreCase)
-      for(int i=0;i<text.length;){
-        i += Character.toChars(
-              Character.toLowerCase(
-                  charUtils.codePointAt(text, i)), text, i);
-      }
-    int slot = getSlot(text, 0, text.length);
-    if (entries[slot] != null) return false;
-    entries[slot] = text;
-    count++;
-
-    if (count + (count>>2) > entries.length) {
-      rehash();
-    }
-
-    return true;
-  }
-
-  private boolean equals(char[] text1, int off, int len, char[] text2) {
-    if (len != text2.length)
-      return false;
-    final int limit = off+len;
-    if (ignoreCase) {
-      for(int i=0;i<len;) {
-        final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
-        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
-          return false;
-        i += Character.charCount(codePointAt); 
-      }
-    } else {
-      for(int i=0;i<len;i++) {
-        if (text1[off+i] != text2[i])
-          return false;
-      }
-    }
-    return true;
-  }
-
-  private boolean equals(CharSequence text1, char[] text2) {
-    int len = text1.length();
-    if (len != text2.length)
-      return false;
-    if (ignoreCase) {
-      for(int i=0;i<len;) {
-        final int codePointAt = charUtils.codePointAt(text1, i);
-        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
-          return false;
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for(int i=0;i<len;i++) {
-        if (text1.charAt(i) != text2[i])
-          return false;
-      }
-    }
-    return true;
-  }
-  
-
-
-  private void rehash() {
-    final int newSize = 2*entries.length;
-    char[][] oldEntries = entries;
-    entries = new char[newSize][];
-
-    for(int i=0;i<oldEntries.length;i++) {
-      char[] text = oldEntries[i];
-      if (text != null) {
-        // todo: could be faster... no need to compare strings on collision
-        entries[getSlot(text,0,text.length)] = text;
-      }
-    }
-  }
-  
-  private int getHashCode(char[] text, int offset, int len) {
-    int code = 0;
-    final int stop = offset + len;
-    if (ignoreCase) {
-      for (int i=offset; i<stop;) {
-        final int codePointAt = charUtils.codePointAt(text, i, stop);
-        code = code*31 + Character.toLowerCase(codePointAt);
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for (int i=offset; i<stop; i++) {
-        code = code*31 + text[i];
-      }
-    }
-    return code;
-  }
-
-  private int getHashCode(CharSequence text) {
-    int code = 0;
-    int len = text.length();
-    if (ignoreCase) {
-      for (int i=0; i<len;) {
-        int codePointAt = charUtils.codePointAt(text, i);
-        code = code*31 + Character.toLowerCase(codePointAt);
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for (int i=0; i<len; i++) {
-        code = code*31 + text.charAt(i);
-      }
-    }
-    return code;
+    return map.put(text, PLACEHOLDER) == null;
   }
 
-
   @Override
   public int size() {
-    return count;
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return count==0;
-  }
-
-  @Override
-  public boolean contains(Object o) {
-    if (o instanceof char[]) {
-      final char[] text = (char[])o;
-      return contains(text, 0, text.length);
-    } 
-    return contains(o.toString());
-  }
-
-  @Override
-  public boolean add(Object o) {
-    if (o instanceof char[]) {
-      return add((char[])o);
-    }
-    return add(o.toString());
+    return map.size();
   }
   
   /**
@@ -361,14 +194,9 @@
       throw new NullPointerException("Given set is null");
     if (set == EMPTY_SET)
       return EMPTY_SET;
-    if (set instanceof UnmodifiableCharArraySet)
+    if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap)
       return set;
-
-    /*
-     * Instead of delegating calls to the given set copy the low-level values to
-     * the unmodifiable Subclass
-     */
-    return new UnmodifiableCharArraySet(set.matchVersion, set.entries, set.ignoreCase, set.count);
+    return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
   }
 
   /**
@@ -415,29 +243,27 @@
       return EMPTY_SET;
     if(set instanceof CharArraySet) {
       final CharArraySet source = (CharArraySet) set;
-      // use fast path instead of iterating all values
-      // this is even on very small sets ~10 times faster than iterating
-      final char[][] entries = new char[source.entries.length][];
-      System.arraycopy(source.entries, 0, entries, 0, entries.length);
-      return new CharArraySet(source.matchVersion, entries, source.ignoreCase, source.count);
+      return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map));
     }
     return new CharArraySet(matchVersion, set, false);
   }
   
-
   /** The Iterator<String> for this set.  Strings are constructed on the fly, so
-   * use <code>nextCharArray</code> for more efficient access. */
+   * use <code>nextCharArray</code> for more efficient access.
+   * @deprecated Use the standard iterator, which returns {@code char[]} instances.
+   */
+  @Deprecated
   public class CharArraySetIterator implements Iterator<String> {
     int pos=-1;
     char[] next;
-    CharArraySetIterator() {
+    private CharArraySetIterator() {
       goNext();
     }
 
     private void goNext() {
       next = null;
       pos++;
-      while (pos < entries.length && (next=entries[pos]) == null) pos++;
+      while (pos < map.keys.length && (next=map.keys[pos]) == null) pos++;
     }
 
     public boolean hasNext() {
@@ -462,93 +288,41 @@
     }
   }
 
-  /** returns an iterator of new allocated Strings */
+  /** returns an iterator of new allocated Strings (an instance of {@link CharArraySetIterator}).
+   * @deprecated Use {@link #iterator}, which returns {@code char[]} instances.
+   */
+  @Deprecated
   public Iterator<String> stringIterator() {
     return new CharArraySetIterator();
   }
 
-  /** returns an iterator of new allocated Strings, this method violates the Set interface */
-  @Override
-  @SuppressWarnings("unchecked")
-  public Iterator<Object> iterator() {
-    return (Iterator) stringIterator();
-  }
-  
-  /**
-   * Efficient unmodifiable {@link CharArraySet}. This implementation does not
-   * delegate calls to a give {@link CharArraySet} like
-   * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes
-   * the internal representation of a {@link CharArraySet} to a super
-   * constructor and overrides all mutators. 
+  /** Returns an {@link Iterator} depending on the version used:
+   * <ul>
+   * <li>if {@code matchVersion} &ge; 3.1, it returns {@code char[]} instances in this set.</li>
+   * <li>if {@code matchVersion} is 3.0 or older, it returns new
+   * allocated Strings, so this method violates the Set interface.
+   * It is kept this way for backwards compatibility, normally it should
+   * return {@code char[]} on {@code next()}</li>
+   * </ul>
    */
-  private static class UnmodifiableCharArraySet extends CharArraySet {
-
-    private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase,
-        int count) {
-      super(matchVersion, entries, ignoreCase, count);
-    }
-
-    @Override
-    public void clear() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean add(Object o){
-      throw new UnsupportedOperationException();
-    }
-    
-    @Override
-    public boolean addAll(Collection<? extends Object> coll) {
-      throw new UnsupportedOperationException();
-    }
-    
-    @Override
-    public boolean add(char[] text) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean add(CharSequence text) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean add(String text) {
-      throw new UnsupportedOperationException();
-    }
+  @Override @SuppressWarnings("unchecked")
+  public Iterator<Object> iterator() {
+    // use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
+    return map.matchVersion.onOrAfter(Version.LUCENE_31) ?
+      map.originalKeySet().iterator() : (Iterator) stringIterator();
   }
   
-  /**
-   * Empty {@link UnmodifiableCharArraySet} optimized for speed.
-   * Contains checks will always return <code>false</code> or throw
-   * NPE if necessary.
-   */
-  private static final class EmptyCharArraySet extends UnmodifiableCharArraySet {
-
-    private EmptyCharArraySet() {
-      super(Version.LUCENE_CURRENT, new char[0][], false, 0);
-    }
-    
-    @Override
-    public boolean contains(char[] text, int off, int len) {
-      if(text == null)
-        throw new NullPointerException();
-      return false;
-    }
-
-    @Override
-    public boolean contains(CharSequence cs) {
-      if(cs == null)
-        throw new NullPointerException();
-      return false;
-    }
-
-    @Override
-    public boolean contains(Object o) {
-      if(o == null)
-        throw new NullPointerException();
-      return false;
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("[");
+    for (Object item : this) {
+      if (sb.length()>1) sb.append(", ");
+      if (item instanceof char[]) {
+        sb.append((char[]) item);
+      } else {
+        sb.append(item);
+      }
     }
+    return sb.append(']').toString();
   }
 }

Added: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArrayMap.java?rev=906032&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArrayMap.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArrayMap.java Wed Feb  3 12:37:53 2010
@@ -0,0 +1,243 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.util.*;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestCharArrayMap extends LuceneTestCase {
+  Random r = newRandom();
+
+  public void doRandom(int iter, boolean ignoreCase) {
+    CharArrayMap<Integer> map = new CharArrayMap<Integer>(Version.LUCENE_CURRENT, 1, ignoreCase);
+    HashMap<String,Integer> hmap = new HashMap<String,Integer>();
+
+    char[] key;
+    for (int i=0; i<iter; i++) {
+      int len = r.nextInt(5);
+      key = new char[len];
+      for (int j=0; j<key.length; j++) {
+        key[j] = (char)r.nextInt(127);
+      }
+      String keyStr = new String(key);
+      String hmapKey = ignoreCase ? keyStr.toLowerCase() : keyStr; 
+
+      int val = r.nextInt();
+
+      Object o1 = map.put(key, val);
+      Object o2 = hmap.put(hmapKey,val);
+      assertEquals(o1,o2);
+
+      // add it again with the string method
+      assertEquals(val, map.put(keyStr,val).intValue());
+
+      assertEquals(val, map.get(key,0,key.length).intValue());
+      assertEquals(val, map.get(key).intValue());
+      assertEquals(val, map.get(keyStr).intValue());
+
+      assertEquals(hmap.size(), map.size());
+    }
+  }
+
+  public void testCharArrayMap() {
+    for (int i=0; i<5; i++) {  // pump this up for more random testing
+      doRandom(1000,false);
+      doRandom(1000,true);      
+    }
+  }
+
+  public void testMethods() {
+    CharArrayMap<Integer> cm = new CharArrayMap<Integer>(Version.LUCENE_CURRENT, 2, false);
+    HashMap<String,Integer> hm = new HashMap<String,Integer>();
+    hm.put("foo",1);
+    hm.put("bar",2);
+    cm.putAll(hm);
+    assertEquals(hm.size(), cm.size());
+    hm.put("baz", 3);
+    cm.putAll(hm);
+    assertEquals(hm.size(), cm.size());
+
+    CharArraySet cs = cm.keySet();
+    int n=0;
+    for (Object o : cs) {
+      assertTrue(cm.containsKey(o));
+      assertTrue(cm.containsKey((char[]) o));
+      n++;
+    }
+    assertEquals(hm.size(), n);
+    assertEquals(hm.size(), cs.size());
+    assertEquals(cm.size(), cs.size());
+    cs.clear();
+    assertEquals(0, cs.size());
+    assertEquals(0, cm.size());
+    try {
+      cs.add("test");
+      fail("keySet() allows adding new keys");
+    } catch (UnsupportedOperationException ue) {
+      // pass
+    }
+    cm.putAll(hm);
+    assertEquals(hm.size(), cs.size());
+    assertEquals(cm.size(), cs.size());
+
+    Iterator<Map.Entry<Object,Integer>> iter1 = cm.entrySet().iterator();
+    n=0;
+    while (iter1.hasNext()) {
+      Map.Entry<Object,Integer> entry = iter1.next();
+      Object key = entry.getKey();
+      Integer val = entry.getValue();
+      assertEquals(cm.get(key), val);
+      entry.setValue(val*100);
+      assertEquals(val*100, (int)cm.get(key));
+      n++;
+    }
+    assertEquals(hm.size(), n);
+    cm.clear();
+    cm.putAll(hm);
+    assertEquals(cm.size(), n);
+
+    CharArrayMap<Integer>.EntryIterator iter2 = cm.entrySet().iterator();
+    n=0;
+    while (iter2.hasNext()) {
+      char[] keyc = iter2.nextKey();
+      Integer val = iter2.currentValue();
+      assertEquals(hm.get(new String(keyc)), val);
+      iter2.setValue(val*100);
+      assertEquals(val*100, (int)cm.get(keyc));
+      n++;
+    }
+    assertEquals(hm.size(), n);
+
+    cm.entrySet().clear();
+    assertEquals(0, cm.size());
+    assertEquals(0, cm.entrySet().size());
+    assertTrue(cm.isEmpty());
+  }
+
+  public void testModifyOnUnmodifiable(){
+    CharArrayMap<Integer> map = new CharArrayMap<Integer>(Version.LUCENE_CURRENT, 2, false);
+    map.put("foo",1);
+    map.put("bar",2);
+    final int size = map.size();
+    assertEquals(2, size);
+    assertTrue(map.containsKey("foo"));  
+    assertEquals(1, map.get("foo").intValue());  
+    assertTrue(map.containsKey("bar"));  
+    assertEquals(2, map.get("bar").intValue());  
+
+    map = CharArrayMap.unmodifiableMap(map);
+    assertEquals("Map size changed due to unmodifiableMap call" , size, map.size());
+    String NOT_IN_MAP = "SirGallahad";
+    assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP));
+    assertNull("Test String already exists in map", map.get(NOT_IN_MAP));
+    
+    try{
+      map.put(NOT_IN_MAP.toCharArray(), 3);  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.put(NOT_IN_MAP, 3);  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.put(new StringBuilder(NOT_IN_MAP), 3);  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.clear();  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.entrySet().clear();  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.keySet().clear();  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.put((Object) NOT_IN_MAP, 3);  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.putAll(Collections.singletonMap(NOT_IN_MAP, 3));  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    assertTrue(map.containsKey("foo"));  
+    assertEquals(1, map.get("foo").intValue());  
+    assertTrue(map.containsKey("bar"));  
+    assertEquals(2, map.get("bar").intValue());  
+  }
+  
+  public void testToString() {
+    CharArrayMap<Integer> cm = new CharArrayMap<Integer>(Version.LUCENE_CURRENT, Collections.singletonMap("test",1), false);
+    assertEquals("[test]",cm.keySet().toString());
+    assertEquals("[1]",cm.values().toString());
+    assertEquals("[test=1]",cm.entrySet().toString());
+    assertEquals("{test=1}",cm.toString());
+    cm.put("test2", 2);
+    assertTrue(cm.keySet().toString().contains(", "));
+    assertTrue(cm.values().toString().contains(", "));
+    assertTrue(cm.entrySet().toString().contains(", "));
+    assertTrue(cm.toString().contains(", "));
+  }
+}
+

Propchange: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArrayMap.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArrayMap.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java?rev=906032&r1=906031&r2=906032&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java Wed Feb  3 12:37:53 2010
@@ -19,6 +19,7 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -92,7 +93,7 @@
   }
   
   public void testModifyOnUnmodifiable(){
-    CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10,true);
+    CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10, true);
     set.addAll(Arrays.asList(TEST_STOP_WORDS));
     final int size = set.size();
     set = CharArraySet.unmodifiableSet(set);
@@ -143,8 +144,12 @@
       assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
       assertEquals("Size of unmodifiable set has changed", size, set.size());
     }
+    
+    // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
+    // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
+    // remove() on the iterator
     try{
-      set.removeAll(Arrays.asList(TEST_STOP_WORDS));  
+      set.removeAll(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(TEST_STOP_WORDS), true));  
       fail("Modified unmodifiable set");
     }catch (UnsupportedOperationException e) {
       // expected
@@ -152,7 +157,7 @@
     }
     
     try{
-      set.retainAll(Arrays.asList(new String[]{NOT_IN_SET}));  
+      set.retainAll(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(NOT_IN_SET), true));  
       fail("Modified unmodifiable set");
     }catch (UnsupportedOperationException e) {
       // expected
@@ -491,4 +496,11 @@
       fail("null value must raise NPE");
     } catch (NullPointerException e) {}
   }
+  
+  public void testToString() {
+    CharArraySet set = CharArraySet.copy(Version.LUCENE_CURRENT, Collections.singleton("test"));
+    assertEquals("[test]", set.toString());
+    set.add("test2");
+    assertTrue(set.toString().contains(", "));
+  }
 }