You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/03/02 16:59:56 UTC

svn commit: r1296268 - in /lucene/dev/trunk: lucene/contrib/ modules/suggest/src/java/org/apache/lucene/search/suggest/ modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/ mo...

Author: simonw
Date: Fri Mar  2 15:59:55 2012
New Revision: 1296268

URL: http://svn.apache.org/viewvc?rev=1296268&view=rev
Log:
LUCENE-3807: Cleanup Suggest / Lookup API

Removed:
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FloatMagic.java
    lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FloatMagicTest.java
Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
    lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
    lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
    lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
    lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
    lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
    lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java
    lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Fri Mar  2 15:59:55 2012
@@ -115,6 +115,13 @@ Changes in backwards compatibility polic
  * LUCENE-3626: The internal implementation classes in PKIndexSplitter
    and MultiPassIndexSplitter were made private as they now work
    per segment.  (Uwe Schindler)
+   
+ * LUCENE-3807: Cleaned up Suggest / Lookup API. Term weights (freqs) are now
+   64bit signed integers instead of 32bit floats. Sorting of terms is now a 
+   disk based merge sort instead of an in-memory sort. The Lookup API now 
+   accepts and returns CharSequence instead of String which should be converted
+   into a String before used in a datastructure that relies on hashCode / equals.
+   (Simon Willnauer)
   
 Changes in Runtime Behavior
 

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java Fri Mar  2 15:59:55 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef;
 
 /**
  * This wrapper buffers incoming elements.
+ * @lucene.experimental
  */
 public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
   // TODO keep this for now

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java Fri Mar  2 15:59:55 2012
@@ -18,81 +18,113 @@ package org.apache.lucene.search.suggest
  */
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Comparator;
 
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.SorterTemplate;
 
-final class BytesRefList {
-
+/**
+ * A simple append only random-access {@link BytesRef} array that stores full
+ * copies of the appended bytes in a {@link ByteBlockPool}.
+ * 
+ * 
+ * <b>Note: This class is not Thread-Safe!</b>
+ * 
+ * @lucene.internal
+ * @lucene.experimental
+ */
+public final class BytesRefList {
+  // TODO rename to BytesRefArray
   private final ByteBlockPool pool;
   private int[] offsets = new int[1];
-  private int currentElement = 0;
+  private int lastElement = 0;
   private int currentOffset = 0;
-
+  private final Counter bytesUsed = Counter.newCounter(false);
+  
+  /**
+   * Creates a new {@link BytesRefList}
+   */
   public BytesRefList() {
-    this(new ByteBlockPool(new ByteBlockPool.DirectAllocator()));
-  }
-
-  public BytesRefList(ByteBlockPool pool) {
-    this.pool = pool;
+    this.pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(
+        bytesUsed));
     pool.nextBuffer();
+    bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+        + RamUsageEstimator.NUM_BYTES_INT);
   }
-
+ 
+  /**
+   * Clears this {@link BytesRefList}
+   */
+  public void clear() {
+    lastElement = 0;
+    currentOffset = 0;
+    Arrays.fill(offsets, 0);
+    pool.reset();
+  }
+  
+  /**
+   * Appends a copy of the given {@link BytesRef} to this {@link BytesRefList}.
+   * @param bytes the bytes to append
+   * @return the ordinal of the appended bytes
+   */
   public int append(BytesRef bytes) {
-    if (currentElement >= offsets.length) {
+    if (lastElement >= offsets.length) {
+      int oldLen = offsets.length;
       offsets = ArrayUtil.grow(offsets, offsets.length + 1);
+      bytesUsed.addAndGet((offsets.length - oldLen)
+          * RamUsageEstimator.NUM_BYTES_INT);
     }
     pool.copy(bytes);
-    offsets[currentElement++] = currentOffset;
+    offsets[lastElement++] = currentOffset;
     currentOffset += bytes.length;
-    return currentElement;
+    return lastElement;
   }
-
+  
+  /**
+   * Returns the current size of this {@link BytesRefList}
+   * @return the current size of this {@link BytesRefList}
+   */
   public int size() {
-    return currentElement;
+    return lastElement;
   }
-
-  public BytesRef get(BytesRef bytes, int pos) {
-    if (currentElement > pos) {
-      bytes.offset = offsets[pos];
-      bytes.length = pos == currentElement - 1 ? currentOffset - bytes.offset
-          : offsets[pos + 1] - bytes.offset;
-      pool.copyFrom(bytes);
-      return bytes;
+  
+  /**
+   * Returns the <i>n'th</i> element of this {@link BytesRefList}
+   * @param spare a spare {@link BytesRef} instance
+   * @param ord the elements ordinal to retrieve 
+   * @return the <i>n'th</i> element of this {@link BytesRefList}
+   */
+  public BytesRef get(BytesRef spare, int ord) {
+    if (lastElement > ord) {
+      spare.offset = offsets[ord];
+      spare.length = ord == lastElement - 1 ? currentOffset - spare.offset
+          : offsets[ord + 1] - spare.offset;
+      pool.copyFrom(spare);
+      return spare;
     }
-    throw new IndexOutOfBoundsException("index " + pos
-        + " must be less than the size: " + currentElement);
-
-  }
-
-  public BytesRefIterator iterator() {
-    final int numElements = currentElement;
+    throw new IndexOutOfBoundsException("index " + ord
+        + " must be less than the size: " + lastElement);
     
-    return new BytesRefIterator() {
-      private final BytesRef spare = new BytesRef();
-      private int pos = 0;
-
-      @Override
-      public BytesRef next() throws IOException {
-        if (pos < numElements) {
-          get(spare, pos++);
-          return spare;
-        }
-        return null;
-      }
-
-      @Override
-      public Comparator<BytesRef> getComparator() {
-        return null;
-      }
-    };
   }
   
-  public int[] sort(final Comparator<BytesRef> comp) {
+  /**
+   * Returns the number internally used bytes to hold the appended bytes in
+   * memory
+   * 
+   * @return the number internally used bytes to hold the appended bytes in
+   *         memory
+   */
+  public long bytesUsed() {
+    return bytesUsed.get();
+  }
+  
+  private int[] sort(final Comparator<BytesRef> comp) {
     final int[] orderdEntries = new int[size()];
     for (int i = 0; i < orderdEntries.length; i++) {
       orderdEntries[i] = i;
@@ -110,22 +142,65 @@ final class BytesRefList {
         final int ord1 = orderdEntries[i], ord2 = orderdEntries[j];
         return comp.compare(get(scratch1, ord1), get(scratch2, ord2));
       }
-
+      
       @Override
       protected void setPivot(int i) {
         final int ord = orderdEntries[i];
         get(pivot, ord);
       }
-  
+      
       @Override
       protected int comparePivot(int j) {
         final int ord = orderdEntries[j];
         return comp.compare(pivot, get(scratch2, ord));
       }
       
-      private final BytesRef pivot = new BytesRef(),
-        scratch1 = new BytesRef(), scratch2 = new BytesRef();
+      private final BytesRef pivot = new BytesRef(), scratch1 = new BytesRef(),
+          scratch2 = new BytesRef();
     }.quickSort(0, size() - 1);
     return orderdEntries;
   }
+  
+  /**
+   * sugar for {@link #iterator(Comparator)} with a <code>null</code> comparator
+   */
+  public BytesRefIterator iterator() {
+    return iterator(null);
+  }
+  
+  /**
+   * <p>
+   * Returns a {@link BytesRefIterator} with point in time semantics. The
+   * iterator provides access to all so far appended {@link BytesRef} instances.
+   * </p>
+   * <p>
+   * If a non <code>null</code> {@link Comparator} is provided the iterator will
+   * iterate the byte values in the order specified by the comparator. Otherwise
+   * the order is the same as the values were appended.
+   * </p>
+   * <p>
+   * This is a non-destructive operation.
+   * </p>
+   */
+  public BytesRefIterator iterator(final Comparator<BytesRef> comp) {
+    final BytesRef spare = new BytesRef();
+    final int size = size();
+    final int[] ords = comp == null ? null : sort(comp);
+    return new BytesRefIterator() {
+      int pos = 0;
+      
+      @Override
+      public BytesRef next() throws IOException {
+        if (pos < size) {
+          return get(spare, ords == null ? pos++ : ords[pos++]);
+        }
+        return null;
+      }
+      
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return comp;
+      }
+    };
+  }
 }

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java Fri Mar  2 15:59:55 2012
@@ -75,7 +75,11 @@ public class FileDictionary implements D
         String[] fields = line.split("\t");
         if (fields.length > 1) {
           // keep reading floats for bw compat
-          curFreq = (int)Float.parseFloat(fields[1]);
+          try {
+            curFreq = Long.parseLong(fields[1]);
+          } catch (NumberFormatException e) {
+            curFreq = (long)Double.parseDouble(fields[1]);
+          }
           spare.copyChars(fields[0]);
         } else {
           spare.copyChars(line);

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java Fri Mar  2 15:59:55 2012
@@ -29,15 +29,19 @@ import org.apache.lucene.search.spell.Te
 import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.PriorityQueue;
 
+/**
+ * Simple Lookup interface for {@link CharSequence} suggestions.
+ * @lucene.experimental
+ */
 public abstract class Lookup {
   /**
    * Result of a lookup.
    */
   public static final class LookupResult implements Comparable<LookupResult> {
     public final CharSequence key;
-    public final float value;
+    public final long value;
     
-    public LookupResult(CharSequence key, float value) {
+    public LookupResult(CharSequence key, long value) {
       this.key = key;
       this.value = value;
     }
@@ -112,6 +116,10 @@ public abstract class Lookup {
     build(tfit);
   }
   
+  /**
+   * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}.
+   * The implementation might re-sort the data internally.
+   */
   public abstract void build(TermFreqIterator tfit) throws IOException;
   
   /**
@@ -124,23 +132,8 @@ public abstract class Lookup {
    */
   public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num);
 
-  /**
-   * Modify the lookup data by recording additional data. Optional operation.
-   * @param key new lookup key
-   * @param value value to associate with this key
-   * @return true if new key is added, false if it already exists or operation
-   * is not supported.
-   */
-  public abstract boolean add(CharSequence key, Object value);
   
   /**
-   * Get value associated with a specific key.
-   * @param key lookup key
-   * @return associated value
-   */
-  public abstract Object get(CharSequence key);
-
-  /**
    * Persist the constructed lookup data to a directory. Optional operation.
    * @param output {@link OutputStream} to write the data to.
    * @return true if successful, false if unsuccessful or not supported.
@@ -173,4 +166,5 @@ public abstract class Lookup {
    * @throws IOException when fatal IO error occurs.
    */
   public abstract boolean load(File storeDir) throws IOException;
+  
 }

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java Fri Mar  2 15:59:55 2012
@@ -17,45 +17,166 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
+import java.io.File;
 import java.io.IOException;
 import java.util.Comparator;
 
 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.suggest.fst.Sort;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
 
 /**
- * This wrapper buffers incoming elements and makes sure they are sorted in
- * ascending lexicographic order.
+ * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
+ * @lucene.experimental
  */
-public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
-  // TODO keep this for now - but the consumer should really sort this stuff on disk with sorter...
-  private final int[] sortedOrds;
-  private int currentOrd = -1;
-  private final BytesRef spare = new BytesRef();
-  private final Comparator<BytesRef> comp;
-
-  public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
-    super(source);
-    this.sortedOrds = entries.sort(comp);
-    this.comp = comp;
+public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
+  
+  private final TermFreqIterator source;
+  private File tempInput;
+  private File tempSorted;
+  private final ByteSequencesReader reader;
+  private boolean done = false;
+  
+  private long weight;
+  private final BytesRef scratch = new BytesRef();
+  private final Comparator<BytesRef> comparator;
+  
+  public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
+    this(source, comparator, false);
   }
-
-  @Override
-  public long weight() {
-    return freqs[currentOrd];
+  
+  public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException {
+    this.source = source;
+    this.comparator = comparator;
+    this.reader = sort(compareRawBytes ? comparator : new BytesOnlyComparator(this.comparator));
   }
-
+  
   @Override
   public BytesRef next() throws IOException {
-    if (++curPos < entries.size()) {
-      return entries.get(spare, (currentOrd = sortedOrds[curPos]));  
+    boolean success = false;
+    if (done) {
+      return null;
+    }
+    try {
+      ByteArrayDataInput input = new ByteArrayDataInput();
+      if (reader.read(scratch)) {
+        weight = decode(scratch, input);
+        success = true;
+        return scratch;
+      }
+      close();
+      success = done = true;
+      return null;
+    } finally {
+      if (!success) {
+        done = true;
+        close();
+      }
     }
-    return null;
   }
-
+  
   @Override
   public Comparator<BytesRef> getComparator() {
-    return comp;
+    return comparator;
+  }
+  
+  @Override
+  public long weight() {
+    return weight;
+  }
+  
+  private Sort.ByteSequencesReader sort(Comparator<BytesRef> comparator) throws IOException {
+    String prefix = getClass().getSimpleName();
+    File directory = Sort.defaultTempDir();
+    tempInput = File.createTempFile(prefix, ".input", directory);
+    tempSorted = File.createTempFile(prefix, ".sorted", directory);
+    
+    final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
+    boolean success = false;
+    try {
+      BytesRef spare;
+      byte[] buffer = new byte[0];
+      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+      while ((spare = source.next()) != null) {
+        encode(writer, output, buffer, spare, source.weight());
+      }
+      writer.close();
+      new Sort(comparator).sort(tempInput, tempSorted);
+      ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted);
+      success = true;
+      return reader;
+      
+    } finally {
+      if (success) {
+        IOUtils.close(writer);
+      } else {
+        try {
+          IOUtils.closeWhileHandlingException(writer);
+        } finally {
+          close();
+        }
+      }
+      
+    }
+  }
+  
+  private void close() throws IOException {
+    if (tempInput != null) {
+      tempInput.delete();
+    }
+    if (tempSorted != null) {
+      tempSorted.delete();
+    }
+    IOUtils.close(reader);
+  }
+  
+  private final static class BytesOnlyComparator implements Comparator<BytesRef> {
+
+    final Comparator<BytesRef> other;
+    private final BytesRef leftScratch = new BytesRef();
+    private final BytesRef rightScratch = new BytesRef();
+    
+    public BytesOnlyComparator(Comparator<BytesRef> other) {
+      this.other = other;
+    }
+
+    @Override
+    public int compare(BytesRef left, BytesRef right) {
+      wrap(leftScratch, left);
+      wrap(rightScratch, right);
+      return other.compare(leftScratch, rightScratch);
+    }
+    
+    private void wrap(BytesRef wrapper, BytesRef source) {
+      wrapper.bytes = source.bytes;
+      wrapper.offset = source.offset;
+      wrapper.length = source.length - 8;
+      
+    }
+  }
+  
+  protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
+    if (spare.length + 8 >= buffer.length) {
+      buffer = ArrayUtil.grow(buffer, spare.length + 8);
+    }
+    output.reset(buffer);
+    output.writeBytes(spare.bytes, spare.offset, spare.length);
+    output.writeLong(weight);
+    writer.write(buffer, 0, output.getPosition());
+  }
+  
+  protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+    tmpInput.reset(scratch.bytes);
+    tmpInput.skipBytes(scratch.length - 8); // suggestion + separator
+    scratch.length -= 8; // sep + long
+    return tmpInput.readLong();
   }
   
 }

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java Fri Mar  2 15:59:55 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.util.BytesRef;
 /**
  * This wrapper buffers the incoming elements and makes sure they are in
  * random order.
+ * @lucene.experimental
  */
 public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
   // TODO keep this for now

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java Fri Mar  2 15:59:55 2012
@@ -18,13 +18,16 @@ package org.apache.lucene.search.suggest
  */
 
 import java.io.IOException;
-import java.util.Iterator;
+import java.util.Comparator;
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 
 /**
  * Collects {@link BytesRef} and then allows one to iterate over their sorted order. Implementations
- * of this interface will be called in a single-threaded scenario.  
+ * of this interface will be called in a single-threaded scenario.
+ * @lucene.experimental
+ * @lucene.internal  
  */
 public interface BytesRefSorter {
   /**
@@ -42,5 +45,7 @@ public interface BytesRefSorter {
    * 
    * @throws IOException If an I/O exception occurs.
    */
-  Iterator<BytesRef> iterator() throws IOException;
+   BytesRefIterator iterator() throws IOException;
+   
+   Comparator<BytesRef> getComparator();
 }

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java Fri Mar  2 15:59:55 2012
@@ -18,59 +18,63 @@ package org.apache.lucene.search.suggest
  */
 
 import java.io.*;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
+import java.util.Comparator;
 
 import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.IOUtils;
 
 /**
  * Builds and iterates over sequences stored on disk.
+ * @lucene.experimental
+ * @lucene.internal
  */
 public class ExternalRefSorter implements BytesRefSorter, Closeable {
   private final Sort sort;
   private Sort.ByteSequencesWriter writer;
   private File input;
-  private File sorted; 
-
+  private File sorted;
+  
   /**
    * Will buffer all sequences to a temporary file and then sort (all on-disk).
    */
   public ExternalRefSorter(Sort sort) throws IOException {
     this.sort = sort;
-    this.input = File.createTempFile("RefSorter-", ".raw", Sort.defaultTempDir());
+    this.input = File.createTempFile("RefSorter-", ".raw",
+        Sort.defaultTempDir());
     this.writer = new Sort.ByteSequencesWriter(input);
   }
-
+  
   @Override
   public void add(BytesRef utf8) throws IOException {
-    if (writer == null)
-      throw new IllegalStateException();
+    if (writer == null) throw new IllegalStateException();
     writer.write(utf8);
   }
-
-  @Override
-  public Iterator<BytesRef> iterator() throws IOException {
+  
+  public BytesRefIterator iterator() throws IOException {
     if (sorted == null) {
       closeWriter();
-
-      sorted = File.createTempFile("RefSorter-", ".sorted", Sort.defaultTempDir());
+      
+      sorted = File.createTempFile("RefSorter-", ".sorted",
+          Sort.defaultTempDir());
       sort.sort(input, sorted);
-
+      
       input.delete();
       input = null;
     }
-
-    return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted));
+    
+    return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted),
+        sort.getComparator());
   }
-
+  
   private void closeWriter() throws IOException {
     if (writer != null) {
       writer.close();
       writer = null;
     }
   }
-
+  
   /**
    * Removes any written temporary files.
    */
@@ -83,40 +87,54 @@ public class ExternalRefSorter implement
       if (sorted != null) sorted.delete();
     }
   }
-
+  
   /**
    * Iterate over byte refs in a file.
    */
-  class ByteSequenceIterator implements Iterator<BytesRef> {
-    private ByteSequencesReader reader;
-    private byte[] next;
-
-    public ByteSequenceIterator(ByteSequencesReader reader) throws IOException {
+  class ByteSequenceIterator implements BytesRefIterator {
+    private final ByteSequencesReader reader;
+    private BytesRef scratch = new BytesRef();
+    private final Comparator<BytesRef> comparator;
+    
+    public ByteSequenceIterator(ByteSequencesReader reader,
+        Comparator<BytesRef> comparator) {
       this.reader = reader;
-      this.next = reader.read();
-    }
-
-    @Override
-    public boolean hasNext() {
-      return next != null;
+      this.comparator = comparator;
     }
     
     @Override
-    public BytesRef next() {
-      if (next == null) throw new NoSuchElementException();
-      BytesRef r = new BytesRef(next);
+    public BytesRef next() throws IOException {
+      if (scratch == null) {
+        return null;
+      }
+      boolean success = false;
       try {
-        next = reader.read();
-        if (next == null) {
-          reader.close();
+        byte[] next = reader.read();
+        if (next != null) {
+          scratch.bytes = next;
+          scratch.length = next.length;
+          scratch.offset = 0;
+        } else {
+          IOUtils.close(reader);
+          scratch = null;
+        }
+        success = true;
+        return scratch;
+      } finally {
+        if (!success) {
+          IOUtils.closeWhileHandlingException(reader);
         }
-      } catch (IOException e) {
-        throw new RuntimeException(e);
       }
-      return r;
     }
-
+    
     @Override
-    public void remove() { throw new UnsupportedOperationException(); }
+    public Comparator<BytesRef> getComparator() {
+      return comparator;
+    }
+  }
+
+  @Override
+  public Comparator<BytesRef> getComparator() {
+    return sort.getComparator();
   }
 }

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java Fri Mar  2 15:59:55 2012
@@ -28,6 +28,7 @@ import org.apache.lucene.util.fst.FST.Ar
  * Finite state automata based implementation of "autocomplete" functionality.
  * 
  * @see FSTCompletionBuilder
+ * @lucene.experimental
  */
 
 // TODO: we could store exact weights as outputs from the FST (int4 encoded
@@ -159,10 +160,10 @@ public class FSTCompletion {
    * @param utf8
    *          The sequence of utf8 bytes to follow.
    * 
-   * @return Returns the bucket number of the match or <code>null</code> if no
+   * @return Returns the bucket number of the match or <code>-1</code> if no
    *         match was found.
    */
-  private Integer getExactMatchStartingFromRootArc(
+  private int getExactMatchStartingFromRootArc(
       int rootArcIndex, BytesRef utf8) {
     // Get the UTF-8 bytes representation of the input key.
     try {
@@ -186,7 +187,7 @@ public class FSTCompletion {
     }
     
     // No match.
-    return null;
+    return -1;
   }
   
   /**
@@ -273,8 +274,8 @@ public class FSTCompletion {
           // exact match, if requested.
           if (exactFirst) {
             if (!checkExistingAndReorder(res, key)) {
-              Integer exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
-              if (exactMatchBucket != null) {
+              int exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
+              if (exactMatchBucket != -1) {
                 // Insert as the first result and truncate at num.
                 while (res.size() >= num) {
                   res.remove(res.size() - 1);
@@ -385,10 +386,10 @@ public class FSTCompletion {
   }
 
   /**
-   * Returns the bucket assigned to a given key (if found) or <code>null</code> if
+   * Returns the bucket assigned to a given key (if found) or <code>-1</code> if
    * no exact match exists.
    */
-  public Integer getBucket(CharSequence key) {
+  public int getBucket(CharSequence key) {
     return getExactMatchStartingFromRootArc(0, new BytesRef(key));
   }
 

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java Fri Mar  2 15:59:55 2012
@@ -19,9 +19,9 @@ package org.apache.lucene.search.suggest
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.util.Iterator;
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.fst.*;
 
@@ -98,6 +98,7 @@ import org.apache.lucene.util.fst.*;
  * change, requiring you to rebuild the FST suggest index.
  * 
  * @see FSTCompletion
+ * @lucene.experimental
  */
 public class FSTCompletionBuilder {
   /** 
@@ -143,10 +144,11 @@ public class FSTCompletionBuilder {
 
   /**
    * Creates an {@link FSTCompletion} with default options: 10 buckets, exact match
-   * promoted to first position and {@link InMemorySorter}.
+   * promoted to first position and {@link InMemorySorter} with a comparator obtained from
+   * {@link BytesRef#getUTF8SortedAsUnicodeComparator()}.
    */
   public FSTCompletionBuilder() {
-    this(DEFAULT_BUCKETS, new InMemorySorter(), Integer.MAX_VALUE);
+    this(DEFAULT_BUCKETS, new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()), Integer.MAX_VALUE);
   }
 
   /**
@@ -237,10 +239,12 @@ public class FSTCompletionBuilder {
         shareMaxTailLength, outputs, null, false);
     
     BytesRef scratch = new BytesRef();
+    BytesRef entry;
     final IntsRef scratchIntsRef = new IntsRef();
     int count = 0;
-    for (Iterator<BytesRef> i = sorter.iterator(); i.hasNext(); count++) {
-      BytesRef entry = i.next();
+    BytesRefIterator iter = sorter.iterator();
+    while((entry = iter.next()) != null) {
+      count++;
       if (scratch.compareTo(entry) != 0) {
         builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
         scratch.copyBytes(entry);

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java Fri Mar  2 15:59:55 2012
@@ -59,6 +59,7 @@ import org.apache.lucene.util.fst.NoOutp
  * use {@link FSTCompletion} directly or {@link TSTLookup}, for example.
  * 
  * @see FSTCompletion
+ * @lucene.experimental
  */
 public class FSTCompletionLookup extends Lookup {
   /** 
@@ -171,7 +172,7 @@ public class FSTCompletionLookup extends
         }
 
         output.reset(buffer);
-        output.writeInt(FloatMagic.toSortable(tfit.weight()));
+        output.writeInt(encodeWeight(tfit.weight()));
         output.writeBytes(spare.bytes, spare.offset, spare.length);
         writer.write(buffer, 0, output.getPosition());
       }
@@ -188,13 +189,13 @@ public class FSTCompletionLookup extends
       reader = new Sort.ByteSequencesReader(tempSorted);
       long line = 0;
       int previousBucket = 0;
-      float previousScore = 0;
+      int previousScore = 0;
       ByteArrayDataInput input = new ByteArrayDataInput();
       BytesRef tmp1 = new BytesRef();
       BytesRef tmp2 = new BytesRef();
       while (reader.read(tmp1)) {
         input.reset(tmp1.bytes);
-        float currentScore = FloatMagic.fromSortable(input.readInt());
+        int currentScore = input.readInt();
 
         int bucket;
         if (line > 0 && currentScore == previousScore) {
@@ -230,6 +231,14 @@ public class FSTCompletionLookup extends
       tempSorted.delete();
     }
   }
+  
+  /** weight -> cost */
+  private static int encodeWeight(long value) {
+    if (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE) {
+      throw new UnsupportedOperationException("cannot encode value: " + value);
+    }
+    return (int)value;
+  }
 
   @Override
   public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
@@ -250,19 +259,9 @@ public class FSTCompletionLookup extends
     return results;
   }
 
-  @Override
-  public boolean add(CharSequence key, Object value) {
-    // Not supported.
-    return false;
-  }
-
-  @Override
   public Object get(CharSequence key) {
-    Integer bucket = normalCompletion.getBucket(key);
-    if (bucket == null)
-      return null;
-    else
-      return (float) normalCompletion.getBucket(key) / normalCompletion.getBucketCount();
+    final int bucket = normalCompletion.getBucket(key);
+    return bucket == -1 ? null : Long.valueOf(bucket);
   }
 
   /**

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java Fri Mar  2 15:59:55 2012
@@ -17,29 +17,40 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
-import java.util.*;
+import java.util.Comparator;
 
+import org.apache.lucene.search.suggest.BytesRefList;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 
 /**
  * An {@link BytesRefSorter} that keeps all the entries in memory.
+ * @lucene.experimental
+ * @lucene.internal
  */
 public final class InMemorySorter implements BytesRefSorter {
-  // TODO: use a single byte[] to back up all entries?
-  private final ArrayList<BytesRef> refs = new ArrayList<BytesRef>();
-  
+  private final BytesRefList buffer = new BytesRefList();
   private boolean closed = false;
+  private final Comparator<BytesRef> comparator;
 
+  public InMemorySorter(Comparator<BytesRef> comparator) {
+    this.comparator = comparator;
+  }
+  
   @Override
   public void add(BytesRef utf8) {
     if (closed) throw new IllegalStateException();
-    refs.add(BytesRef.deepCopyOf(utf8));
+    buffer.append(utf8);
   }
 
   @Override
-  public Iterator<BytesRef> iterator() {
+  public BytesRefIterator iterator() {
     closed = true;
-    Collections.sort(refs, BytesRef.getUTF8SortedAsUnicodeComparator());
-    return Collections.unmodifiableCollection(refs).iterator();
+    return buffer.iterator(comparator);
+  }
+
+  @Override
+  public Comparator<BytesRef> getComparator() {
+    return comparator;
   }
 }

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java Fri Mar  2 15:59:55 2012
@@ -20,15 +20,10 @@ package org.apache.lucene.search.suggest
 import java.io.*;
 import java.util.*;
 
+import org.apache.lucene.search.suggest.BytesRefList;
 import org.apache.lucene.util.*;
 import org.apache.lucene.util.PriorityQueue;
 
-// TODO: the buffer is currently byte[][] which with very small arrays will terribly overallocate
-// memory (alignments) and make GC very happy.
-// 
-// We could move it to a single byte[] + and use custom sorting, but we'd need to check if this
-// yields any improvement first.
-
 /**
  * On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
  * fields:
@@ -38,6 +33,8 @@ import org.apache.lucene.util.PriorityQu
  * </ul>
  * 
  * @see #sort(File, File)
+ * @lucene.experimental
+ * @lucene.internal
  */
 public final class Sort {
   public final static int MB = 1024 * 1024;
@@ -59,11 +56,6 @@ public final class Sort {
    */
   public final static int MAX_TEMPFILES = 128;
 
-  /**
-   * Minimum slot buffer expansion.
-   */
-  private final static int MIN_EXPECTED_GROWTH = 1000;
-
   /** 
    * A bit more descriptive unit for constructors.
    * 
@@ -112,21 +104,6 @@ public final class Sort {
   }
 
   /**
-   * byte[] in unsigned byte order.
-   */
-  static final Comparator<byte[]> unsignedByteOrderComparator = new Comparator<byte[]>() {
-    public int compare(byte[] left, byte[] right) {
-      final int max = Math.min(left.length, right.length);
-      for (int i = 0, j = 0; i < max; i++, j++) {
-        int diff = (left[i]  & 0xff) - (right[j] & 0xff); 
-        if (diff != 0) 
-          return diff;
-      }
-      return left.length - right.length;
-    }
-  };
-
-  /**
    * Sort info (debugging mostly).
    */
   public class SortInfo {
@@ -149,14 +126,15 @@ public final class Sort {
     }
   }
 
-  private final static byte [][] EMPTY = new byte [0][];
-
   private final BufferSize ramBufferSize;
   private final File tempDirectory;
-
-  private byte [][] buffer = new byte [0][];
+  
+  private final BytesRefList buffer = new BytesRefList();
   private SortInfo sortInfo;
   private int maxTempFiles;
+  private final Comparator<BytesRef> comparator;
+  
+  public static final Comparator<BytesRef> DEFAULT_COMPARATOR = BytesRef.getUTF8SortedAsUnicodeComparator();
 
   /**
    * Defaults constructor.
@@ -165,13 +143,17 @@ public final class Sort {
    * @see BufferSize#automatic()
    */
   public Sort() throws IOException {
-    this(BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
+    this(DEFAULT_COMPARATOR, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
+  }
+  
+  public Sort(Comparator<BytesRef> comparator) throws IOException {
+    this(comparator, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
   }
 
   /**
    * All-details constructor.
    */
-  public Sort(BufferSize ramBufferSize, File tempDirectory, int maxTempfiles) {
+  public Sort(Comparator<BytesRef> comparator, BufferSize ramBufferSize, File tempDirectory, int maxTempfiles) {
     if (ramBufferSize.bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE) {
       throw new IllegalArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.bytes);
     }
@@ -183,6 +165,7 @@ public final class Sort {
     this.ramBufferSize = ramBufferSize;
     this.tempDirectory = tempDirectory;
     this.maxTempFiles = maxTempfiles;
+    this.comparator = comparator;
   }
 
   /** 
@@ -283,23 +266,25 @@ public final class Sort {
 
   /** Sort a single partition in-memory. */
   protected File sortPartition(int len) throws IOException {
-    byte [][] data = this.buffer;
+    BytesRefList data = this.buffer;
     File tempFile = File.createTempFile("sort", "partition", tempDirectory);
 
     long start = System.currentTimeMillis();
-    Arrays.sort(data, 0, len, unsignedByteOrderComparator);
     sortInfo.sortTime += (System.currentTimeMillis() - start);
     
-    ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
+    final ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
+    BytesRef spare;
     try {
-      for (int i = 0; i < len; i++) {
-        assert data[i].length <= Short.MAX_VALUE;
-        out.write(data[i]);
+      BytesRefIterator iter = buffer.iterator(comparator);
+      while((spare = iter.next()) != null) {
+        assert spare.length <= Short.MAX_VALUE;
+        out.write(spare);
       }
+      
       out.close();
 
       // Clean up the buffer for the next partition.
-      this.buffer = EMPTY;
+      data.clear();
       return tempFile;
     } finally {
       IOUtils.close(out);
@@ -314,7 +299,7 @@ public final class Sort {
 
     PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(merges.size()) {
       protected boolean lessThan(FileAndTop a, FileAndTop b) {
-        return a.current.compareTo(b.current) < 0;
+        return comparator.compare(a.current, b.current) < 0;
       }
     };
 
@@ -359,33 +344,18 @@ public final class Sort {
   /** Read in a single partition of data */
   int readPartition(ByteSequencesReader reader) throws IOException {
     long start = System.currentTimeMillis();
-
-    // We will be reallocating from scratch.
-    Arrays.fill(this.buffer, null);
-
-    int bytesLimit = this.ramBufferSize.bytes;
-    byte [][] data = this.buffer;
-    byte[] line;
-    int linesRead = 0;
-    while ((line = reader.read()) != null) {
-      if (linesRead + 1 >= data.length) {
-        data = Arrays.copyOf(data,
-            ArrayUtil.oversize(linesRead + MIN_EXPECTED_GROWTH, 
-                RamUsageEstimator.NUM_BYTES_OBJECT_REF));
-      }
-      data[linesRead++] = line;
-
+    final BytesRef scratch = new BytesRef();
+    while ((scratch.bytes = reader.read()) != null) {
+      scratch.length = scratch.bytes.length; 
+      buffer.append(scratch);
       // Account for the created objects.
       // (buffer slots do not account to buffer size.) 
-      bytesLimit -= line.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
-      if (bytesLimit < 0) {
+      if (ramBufferSize.bytes < buffer.bytesUsed()) {
         break;
       }
     }
-    this.buffer = data;
-
     sortInfo.readTime += (System.currentTimeMillis() - start);
-    return linesRead;
+    return buffer.size();
   }
 
   static class FileAndTop {
@@ -515,5 +485,9 @@ public final class Sort {
         ((Closeable) is).close();
       }
     }
+  }
+
+  public Comparator<BytesRef> getComparator() {
+    return comparator;
   }  
 }
\ No newline at end of file

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java Fri Mar  2 15:59:55 2012
@@ -28,6 +28,8 @@ import java.util.List;
 
 import org.apache.lucene.search.spell.TermFreqIterator;
 import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.InputStreamDataInput;
@@ -102,72 +104,27 @@ public class WFSTCompletionLookup extend
   
   @Override
   public void build(TermFreqIterator iterator) throws IOException {
-    String prefix = getClass().getSimpleName();
-    File directory = Sort.defaultTempDir();
-    File tempInput = File.createTempFile(prefix, ".input", directory);
-    File tempSorted = File.createTempFile(prefix, ".sorted", directory);
-    
-    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
-    Sort.ByteSequencesReader reader = null;
     BytesRef scratch = new BytesRef();
-    
-    boolean success = false;
-    try {
-      byte [] buffer = new byte [0];
-      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
-      BytesRef spare;
-      while ((spare = iterator.next()) != null) {
-        if (spare.length + 5 >= buffer.length) {
-          buffer = ArrayUtil.grow(buffer, spare.length + 5);
-        }
-
-        output.reset(buffer);
-        output.writeBytes(spare.bytes, spare.offset, spare.length);
-        output.writeByte((byte)0); // separator: not used, just for sort order
-        output.writeInt((int)encodeWeight(iterator.weight()));
-        writer.write(buffer, 0, output.getPosition());
-      }
-      writer.close();
-      new Sort().sort(tempInput, tempSorted);
-      reader = new Sort.ByteSequencesReader(tempSorted);
-      
-      PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
-      Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
+    TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator,
+        BytesRef.getUTF8SortedAsUnicodeComparator());
+    IntsRef scratchInts = new IntsRef();
+    BytesRef previous = null;
+    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
+    while ((scratch = iter.next()) != null) {
+      long cost = iter.weight();
       
-      BytesRef previous = null;
-      BytesRef suggestion = new BytesRef();
-      IntsRef scratchInts = new IntsRef();
-      ByteArrayDataInput input = new ByteArrayDataInput();
-      while (reader.read(scratch)) {
-        suggestion.bytes = scratch.bytes;
-        suggestion.offset = scratch.offset;
-        suggestion.length = scratch.length - 5; // int + separator
-
-        input.reset(scratch.bytes);
-        input.skipBytes(suggestion.length + 1); // suggestion + separator
-        long cost = input.readInt();
-   
-        if (previous == null) {
-          previous = new BytesRef();
-        } else if (suggestion.equals(previous)) {
-          continue; // for duplicate suggestions, the best weight is actually added
-        }
-        Util.toIntsRef(suggestion, scratchInts);
-        builder.add(scratchInts, cost);
-        previous.copyBytes(suggestion);
-      }
-      fst = builder.finish();
-      success = true;
-    } finally {
-      if (success) {
-        IOUtils.close(reader, writer);
-      } else {
-        IOUtils.closeWhileHandlingException(reader, writer);
+      if (previous == null) {
+        previous = new BytesRef();
+      } else if (scratch.equals(previous)) {
+        continue; // for duplicate suggestions, the best weight is actually
+                  // added
       }
-      
-      tempInput.delete();
-      tempSorted.delete();
+      Util.toIntsRef(scratch, scratchInts);
+      builder.add(scratchInts, cost);
+      previous.copyBytes(scratch);
     }
+    fst = builder.finish();
   }
 
   @Override
@@ -270,16 +227,10 @@ public class WFSTCompletionLookup extend
     return output;
   }
   
-  @Override
-  public boolean add(CharSequence key, Object value) {
-    return false; // Not supported.
-  }
-
   /**
    * Returns the weight associated with an input string,
    * or null if it does not exist.
    */
-  @Override
   public Object get(CharSequence key) {
     Arc<Long> arc = new Arc<Long>();
     Long result = null;
@@ -289,23 +240,51 @@ public class WFSTCompletionLookup extend
     if (result == null || !arc.isFinal()) {
       return null;
     } else {
-      return decodeWeight(result + arc.nextFinalOutput);
+      return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput));
     }
   }
   
   /** cost -> weight */
-  private static float decodeWeight(long encoded) {
-    return Integer.MAX_VALUE - encoded;
+  private static int decodeWeight(long encoded) {
+    return (int)(Integer.MAX_VALUE - encoded);
   }
   
   /** weight -> cost */
-  private static long encodeWeight(float value) {
-    if (Float.isNaN(value) || Float.isInfinite(value) || value < 0 || value > Integer.MAX_VALUE) {
+  private static int encodeWeight(long value) {
+    if (value < 0 || value > Integer.MAX_VALUE) {
       throw new UnsupportedOperationException("cannot encode value: " + value);
     }
     return Integer.MAX_VALUE - (int)value;
   }
   
+  private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqIteratorWrapper {
+
+    WFSTTermFreqIteratorWrapper(TermFreqIterator source,
+        Comparator<BytesRef> comparator) throws IOException {
+      super(source, comparator, true);
+    }
+
+    @Override
+    protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
+      if (spare.length + 5 >= buffer.length) {
+        buffer = ArrayUtil.grow(buffer, spare.length + 5);
+      }
+      output.reset(buffer);
+      output.writeBytes(spare.bytes, spare.offset, spare.length);
+      output.writeByte((byte)0); // separator: not used, just for sort order
+      output.writeInt(encodeWeight(weight));
+      writer.write(buffer, 0, output.getPosition());
+    }
+    
+    @Override
+    protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+      tmpInput.reset(scratch.bytes);
+      tmpInput.skipBytes(scratch.length - 4); // suggestion + separator
+      scratch.length -= 5; // sep + long
+      return tmpInput.readInt();
+    }
+  }
+  
   static final Comparator<Long> weightComparator = new Comparator<Long> () {
     public int compare(Long left, Long right) {
       return left.compareTo(right);

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java Fri Mar  2 15:59:55 2012
@@ -55,24 +55,22 @@ public class JaspellLookup extends Looku
     final CharsRef charsSpare = new CharsRef();
 
     while ((spare = tfit.next()) != null) {
-      float freq = tfit.weight();
+      final long weight = tfit.weight();
       if (spare.length == 0) {
         continue;
       }
       charsSpare.grow(spare.length);
       UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
-      trie.put(charsSpare.toString(), new Float(freq));
+      trie.put(charsSpare.toString(), Long.valueOf(weight));
     }
   }
 
-  @Override
   public boolean add(CharSequence key, Object value) {
     trie.put(key, value);
     // XXX
     return false;
   }
 
-  @Override
   public Object get(CharSequence key) {
     return trie.get(key);
   }
@@ -95,7 +93,7 @@ public class JaspellLookup extends Looku
     if (onlyMorePopular) {
       LookupPriorityQueue queue = new LookupPriorityQueue(num);
       for (String s : list) {
-        float freq = (Float)trie.get(s);
+        long freq = ((Number)trie.get(s)).longValue();
         queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
       }
       for (LookupResult lr : queue.getResults()) {
@@ -104,7 +102,7 @@ public class JaspellLookup extends Looku
     } else {
       for (int i = 0; i < maxCnt; i++) {
         String s = list.get(i);
-        float freq = (Float)trie.get(s);
+        long freq = ((Number)trie.get(s)).longValue();
         res.add(new LookupResult(new CharsRef(s), freq));
       }      
     }
@@ -131,7 +129,7 @@ public class JaspellLookup extends Looku
     node.splitchar = in.readChar();
     byte mask = in.readByte();
     if ((mask & HAS_VALUE) != 0) {
-      node.data = new Float(in.readFloat());
+      node.data = Long.valueOf(in.readLong());
     }
     if ((mask & LO_KID) != 0) {
       TSTNode kid = trie.new TSTNode('\0', node);
@@ -171,7 +169,7 @@ public class JaspellLookup extends Looku
     if (node.data != null) mask |= HAS_VALUE;
     out.writeByte(mask);
     if (node.data != null) {
-      out.writeFloat((Float)node.data);
+      out.writeLong(((Number)node.data).longValue());
     }
     writeRecursively(out, node.relatives[TSTNode.LOKID]);
     writeRecursively(out, node.relatives[TSTNode.EQKID]);

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java Fri Mar  2 15:59:55 2012
@@ -50,26 +50,24 @@ public class TSTLookup extends Lookup {
     }
 
     ArrayList<String> tokens = new ArrayList<String>();
-    ArrayList<Float> vals = new ArrayList<Float>();
+    ArrayList<Number> vals = new ArrayList<Number>();
     BytesRef spare;
     CharsRef charsSpare = new CharsRef();
     while ((spare = tfit.next()) != null) {
       charsSpare.grow(spare.length);
       UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
       tokens.add(charsSpare.toString());
-      vals.add(new Float(tfit.weight()));
+      vals.add(Long.valueOf(tfit.weight()));
     }
     autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
   }
 
-  @Override
   public boolean add(CharSequence key, Object value) {
     autocomplete.insert(root, key, value, 0);
     // XXX we don't know if a new node was created
     return true;
   }
 
-  @Override
   public Object get(CharSequence key) {
     List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
     if (list == null || list.isEmpty()) {
@@ -107,7 +105,7 @@ public class TSTLookup extends Lookup {
     if (onlyMorePopular) {
       LookupPriorityQueue queue = new LookupPriorityQueue(num);
       for (TernaryTreeNode ttn : list) {
-        queue.insertWithOverflow(new LookupResult(ttn.token, (Float)ttn.val));
+        queue.insertWithOverflow(new LookupResult(ttn.token, ((Number)ttn.val).longValue()));
       }
       for (LookupResult lr : queue.getResults()) {
         res.add(lr);
@@ -115,7 +113,7 @@ public class TSTLookup extends Lookup {
     } else {
       for (int i = 0; i < maxCnt; i++) {
         TernaryTreeNode ttn = list.get(i);
-        res.add(new LookupResult(ttn.token, (Float)ttn.val));
+        res.add(new LookupResult(ttn.token, ((Number)ttn.val).longValue()));
       }
     }
     return res;
@@ -146,7 +144,7 @@ public class TSTLookup extends Lookup {
       node.token = in.readUTF();
     }
     if ((mask & HAS_VALUE) != 0) {
-      node.val = new Float(in.readFloat());
+      node.val = Long.valueOf(in.readLong());
     }
     if ((mask & LO_KID) != 0) {
       node.loKid = new TernaryTreeNode();
@@ -184,7 +182,7 @@ public class TSTLookup extends Lookup {
     if (node.val != null) mask |= HAS_VALUE;
     out.writeByte(mask);
     if (node.token != null) out.writeUTF(node.token);
-    if (node.val != null) out.writeFloat((Float)node.val);
+    if (node.val != null) out.writeLong(((Number)node.val).longValue());
     // recurse and write kids
     if (node.loKid != null) {
       writeRecursively(out, node.loKid);

Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java Fri Mar  2 15:59:55 2012
@@ -17,8 +17,10 @@
 package org.apache.lucene.search.suggest;
 
 import java.io.File;
+import java.util.List;
 
 import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
 import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
 import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
 import org.apache.lucene.search.suggest.tst.TSTLookup;
@@ -74,16 +76,18 @@ public class PersistenceTest extends Luc
     lookup.load(storeDir);
 
     // Assert validity.
-    float previous = Float.NEGATIVE_INFINITY;
+    long previous = Long.MIN_VALUE;
     for (TermFreq k : keys) {
-      Float val = (Float) lookup.get(_TestUtil.bytesToCharSequence(k.term, random));
-      assertNotNull(k.term.utf8ToString(), val);
+      List<LookupResult> list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1);
+      assertEquals(1, list.size());
+      LookupResult lookupResult = list.get(0);
+      assertNotNull(k.term.utf8ToString(), lookupResult.key);
 
       if (supportsExactWeights) { 
-        assertEquals(k.term.utf8ToString(), Float.valueOf(k.v), val);
+        assertEquals(k.term.utf8ToString(), k.v, lookupResult.value);
       } else {
-        assertTrue(val + ">=" + previous, val >= previous);
-        previous = val.floatValue();
+        assertTrue(lookupResult.value + ">=" + previous, lookupResult.value >= previous);
+        previous = lookupResult.value;
       }
     }
   }

Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java Fri Mar  2 15:59:55 2012
@@ -29,59 +29,79 @@ import org.apache.lucene.util.LuceneTest
 import org.apache.lucene.util._TestUtil;
 
 public class TestBytesRefList extends LuceneTestCase {
-
+  
   public void testAppend() throws IOException {
     BytesRefList list = new BytesRefList();
     List<String> stringList = new ArrayList<String>();
-    int entries = atLeast(500);
-    BytesRef spare = new BytesRef();
-    for (int i = 0; i < entries; i++) {
-      String randomRealisticUnicodeString = _TestUtil
-          .randomRealisticUnicodeString(random);
-      spare.copyChars(randomRealisticUnicodeString);
-      list.append(spare);
-      stringList.add(randomRealisticUnicodeString);
-    }
-    for (int i = 0; i < entries; i++) {
-      assertNotNull(list.get(spare, i));
-      assertEquals("entry " + i + " doesn't match", stringList.get(i),
-          spare.utf8ToString());
-    }
-
-    // check random
-    for (int i = 0; i < entries; i++) {
-      int e = random.nextInt(entries);
-      assertNotNull(list.get(spare, e));
-      assertEquals("entry " + i + " doesn't match", stringList.get(e),
-          spare.utf8ToString());
-    }
-    for (int i = 0; i < 2; i++) {
-
-      BytesRefIterator iterator = list.iterator();
-      for (String string : stringList) {
-        assertEquals(string, iterator.next().utf8ToString());
+    for (int j = 0; j < 2; j++) {
+      if (j > 0 && random.nextBoolean()) {
+        list.clear();
+        stringList.clear();
+      }
+      int entries = atLeast(500);
+      BytesRef spare = new BytesRef();
+      for (int i = 0; i < entries; i++) {
+        String randomRealisticUnicodeString = _TestUtil
+            .randomRealisticUnicodeString(random);
+        spare.copyChars(randomRealisticUnicodeString);
+        list.append(spare);
+        stringList.add(randomRealisticUnicodeString);
+      }
+      for (int i = 0; i < entries; i++) {
+        assertNotNull(list.get(spare, i));
+        assertEquals("entry " + i + " doesn't match", stringList.get(i),
+            spare.utf8ToString());
+      }
+      
+      // check random
+      for (int i = 0; i < entries; i++) {
+        int e = random.nextInt(entries);
+        assertNotNull(list.get(spare, e));
+        assertEquals("entry " + i + " doesn't match", stringList.get(e),
+            spare.utf8ToString());
+      }
+      for (int i = 0; i < 2; i++) {
+        
+        BytesRefIterator iterator = list.iterator();
+        for (String string : stringList) {
+          assertEquals(string, iterator.next().utf8ToString());
+        }
       }
     }
   }
-
-  public void testSort() {
+  
+  public void testSort() throws IOException {
     BytesRefList list = new BytesRefList();
     List<String> stringList = new ArrayList<String>();
-    int entries = atLeast(500);
-    BytesRef spare = new BytesRef();
-    for (int i = 0; i < entries; i++) {
-      String randomRealisticUnicodeString = _TestUtil.randomRealisticUnicodeString(random);
-      spare.copyChars(randomRealisticUnicodeString);
-      list.append(spare);
-      stringList.add(randomRealisticUnicodeString);
-    }
-    Collections.sort(stringList);
-    int[] sortedOrds = list.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
-    for (int i = 0; i < entries; i++) {
-      assertNotNull(list.get(spare, sortedOrds[i]));
-      assertEquals("entry " + i + " doesn't match", stringList.get(i),
-          spare.utf8ToString());
+
+    for (int j = 0; j < 2; j++) {
+      if (j > 0 && random.nextBoolean()) {
+        list.clear();
+        stringList.clear();
+      }
+      int entries = atLeast(500);
+      BytesRef spare = new BytesRef();
+      for (int i = 0; i < entries; i++) {
+        String randomRealisticUnicodeString = _TestUtil
+            .randomRealisticUnicodeString(random);
+        spare.copyChars(randomRealisticUnicodeString);
+        list.append(spare);
+        stringList.add(randomRealisticUnicodeString);
+      }
+      
+      Collections.sort(stringList);
+      BytesRefIterator iter = list.iterator(BytesRef
+          .getUTF8SortedAsUTF16Comparator());
+      int i = 0;
+      while ((spare = iter.next()) != null) {
+        assertEquals("entry " + i + " doesn't match", stringList.get(i),
+            spare.utf8ToString());
+        i++;
+      }
+      assertNull(iter.next());
+      assertEquals(i, stringList.size());
     }
     
   }
+  
 }

Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java Fri Mar  2 15:59:55 2012
@@ -17,12 +17,16 @@ package org.apache.lucene.search.suggest
  * the License.
  */
 
+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.TreeMap;
 
 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
 
@@ -38,7 +42,8 @@ public class TestTermFreqIterator extend
   public void testTerms() throws Exception {
     int num = atLeast(10000);
     
-    TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>();
+    Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
+    TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>(comparator);
     TermFreq[] unsorted = new TermFreq[num];
 
     for (int i = 0; i < num; i++) {
@@ -52,13 +57,13 @@ public class TestTermFreqIterator extend
     }
     
     // test the sorted iterator wrapper
-    TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator());
+    TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator);
     Iterator<Map.Entry<BytesRef,Long>> expected = sorted.entrySet().iterator();
     while (expected.hasNext()) {
       Map.Entry<BytesRef,Long> entry = expected.next();
       
       assertEquals(entry.getKey(), wrapper.next());
-      assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F);
+      assertEquals(entry.getValue().longValue(), wrapper.weight());
     }
     assertNull(wrapper.next());
     
@@ -72,4 +77,57 @@ public class TestTermFreqIterator extend
     }
     assertEquals(sorted, actual);
   }
+  
+  
+  public void testRaw() throws Exception {
+    int num = atLeast(10000);
+    
+    Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
+    BytesRefHash sorted = new BytesRefHash();
+    TermFreq[] unsorted = new TermFreq[num];
+    byte[] buffer = new byte[0];
+    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+    for (int i = 0; i < num; i++) {
+      BytesRef spare;
+      long weight;
+      do {
+        spare = new BytesRef(_TestUtil.randomUnicodeString(random));
+        if (spare.length + 8 >= buffer.length) {
+          buffer = ArrayUtil.grow(buffer, spare.length + 8);
+        }
+        output.reset(buffer);
+        output.writeBytes(spare.bytes, spare.offset, spare.length);
+        weight = random.nextLong();
+        output.writeLong(weight);
+        
+      } while (sorted.add(new BytesRef(buffer, 0, output.getPosition())) < 0);
+      unsorted[i] = new TermFreq(spare, weight);
+    }
+    
+    // test the sorted iterator wrapper
+    TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator, true);
+    int[] sort = sorted.sort(comparator);
+    int size = sorted.size();
+    BytesRef spare = new BytesRef();
+    for (int i = 0; i < size; i++) {
+      sorted.get(sort[i], spare);
+      spare.length -= 8; // sub the long value
+      assertEquals(spare, wrapper.next());
+      spare.offset = spare.offset + spare.length;
+      spare.length = 8;
+      assertEquals(asLong(spare), wrapper.weight());
+    }
+    assertNull(wrapper.next());
+  }
+  
+  public static long asLong(BytesRef b) {
+    return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
+        b.offset + 4) & 0xFFFFFFFFL);
+  }
+
+  private static int asIntInternal(BytesRef b, int pos) {
+    return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
+        | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
+  }
 }

Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java Fri Mar  2 15:59:55 2012
@@ -17,9 +17,8 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
-import java.util.Iterator;
-
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;
 
@@ -31,7 +30,7 @@ public class BytesRefSortersTest extends
 
   @Test
   public void testInMemorySorter() throws Exception {
-    check(new InMemorySorter());
+    check(new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()));
   }
 
   private void check(BytesRefSorter sorter) throws Exception {
@@ -42,8 +41,8 @@ public class BytesRefSortersTest extends
     }
 
     // Create two iterators and check that they're aligned with each other.
-    Iterator<BytesRef> i1 = sorter.iterator();
-    Iterator<BytesRef> i2 = sorter.iterator();
+    BytesRefIterator i1 = sorter.iterator();
+    BytesRefIterator i2 = sorter.iterator();
     
     // Verify sorter contract.
     try {
@@ -52,10 +51,12 @@ public class BytesRefSortersTest extends
     } catch (IllegalStateException e) {
       // Expected.
     }
-
-    while (i1.hasNext() && i2.hasNext()) {
-      assertEquals(i1.next(), i2.next());
+    BytesRef spare1;
+    BytesRef spare2;
+    while ((spare1 = i1.next()) != null && (spare2 = i2.next()) != null) {
+      assertEquals(spare1, spare2);
     }
-    assertEquals(i1.hasNext(), i2.hasNext());
+    assertNull(i1.next());
+    assertNull(i2.next());
   }  
 }

Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java Fri Mar  2 15:59:55 2012
@@ -165,9 +165,9 @@ public class FSTCompletionTest extends L
 
     // All the weights were constant, so all returned buckets must be constant, whatever they
     // are.
-    Float previous = null; 
+    Long previous = null; 
     for (TermFreq tf : keys) {
-      Float current = (Float)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random));
+      Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random))).longValue();
       if (previous != null) {
         assertEquals(previous, current);
       }
@@ -181,7 +181,7 @@ public class FSTCompletionTest extends L
     FSTCompletionLookup lookup = new FSTCompletionLookup();
     lookup.build(new TermFreqArrayIterator(input));
     for (TermFreq tf : input) {
-      assertTrue("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)) != null);
+      assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)));
       assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random), true, 1).get(0).key.toString());
     }
 

Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java Fri Mar  2 15:59:55 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.search.suggest
 import java.io.*;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Comparator;
 
 import org.apache.lucene.search.suggest.fst.Sort.BufferSize;
 import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
@@ -61,7 +62,7 @@ public class TestSort extends LuceneTest
   @Test
   public void testIntermediateMerges() throws Exception {
     // Sort 20 mb worth of data with 1mb buffer, binary merging.
-    SortInfo info = checkSort(new Sort(BufferSize.megabytes(1), Sort.defaultTempDir(), 2), 
+    SortInfo info = checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(1), Sort.defaultTempDir(), 2), 
         generateRandom(Sort.MB * 20));
     assertTrue(info.mergeRounds > 10);
   }
@@ -69,7 +70,7 @@ public class TestSort extends LuceneTest
   @Test
   public void testSmallRandom() throws Exception {
     // Sort 20 mb worth of data with 1mb buffer.
-    SortInfo sortInfo = checkSort(new Sort(BufferSize.megabytes(1), Sort.defaultTempDir(), Sort.MAX_TEMPFILES), 
+    SortInfo sortInfo = checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(1), Sort.defaultTempDir(), Sort.MAX_TEMPFILES), 
         generateRandom(Sort.MB * 20));
     assertEquals(1, sortInfo.mergeRounds);
   }
@@ -77,7 +78,7 @@ public class TestSort extends LuceneTest
   @Test @Nightly
   public void testLargerRandom() throws Exception {
     // Sort 100MB worth of data with 15mb buffer.
-    checkSort(new Sort(BufferSize.megabytes(16), Sort.defaultTempDir(), Sort.MAX_TEMPFILES), 
+    checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(16), Sort.defaultTempDir(), Sort.MAX_TEMPFILES), 
         generateRandom(Sort.MB * 100));
   }
 
@@ -92,14 +93,25 @@ public class TestSort extends LuceneTest
     byte [][] bytes = data.toArray(new byte[data.size()][]);
     return bytes;
   }
-
+  
+  static final Comparator<byte[]> unsignedByteOrderComparator = new Comparator<byte[]>() {
+    public int compare(byte[] left, byte[] right) {
+      final int max = Math.min(left.length, right.length);
+      for (int i = 0, j = 0; i < max; i++, j++) {
+        int diff = (left[i]  & 0xff) - (right[j] & 0xff); 
+        if (diff != 0) 
+          return diff;
+      }
+      return left.length - right.length;
+    }
+  };
   /**
    * Check sorting data on an instance of {@link Sort}.
    */
   private SortInfo checkSort(Sort sort, byte[][] data) throws IOException {
     File unsorted = writeAll("unsorted", data);
 
-    Arrays.sort(data, Sort.unsignedByteOrderComparator);
+    Arrays.sort(data, unsignedByteOrderComparator);
     File golden = writeAll("golden", data);
 
     File sorted = new File(tempDir, "sorted");

Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java Fri Mar  2 15:59:55 2012
@@ -117,7 +117,7 @@ public class WFSTCompletionTest extends 
       // TODO: could be faster... but its slowCompletor for a reason
       for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
         if (e.getKey().startsWith(prefix)) {
-          matches.add(new LookupResult(e.getKey(), (float)e.getValue().longValue()));
+          matches.add(new LookupResult(e.getKey(), e.getValue().longValue()));
         }
       }
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java?rev=1296268&r1=1296267&r2=1296268&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java Fri Mar  2 15:59:55 2012
@@ -153,11 +153,6 @@ public class Suggester extends SolrSpell
     build(core, searcher);
   }
 
-  public void add(CharsRef query, int numHits) {
-    LOG.info("add " + query + ", " + numHits);
-    lookup.add(query, new Integer(numHits));
-  }
-  
   static SpellingResult EMPTY_RESULT = new SpellingResult();
 
   @Override
@@ -182,7 +177,7 @@ public class Suggester extends SolrSpell
         Collections.sort(suggestions);
       }
       for (LookupResult lr : suggestions) {
-        res.add(t, lr.key.toString(), ((Number)lr.value).intValue());
+        res.add(t, lr.key.toString(), (int)lr.value);
       }
     }
     return res;