You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2011/03/15 22:35:35 UTC
svn commit: r1081952 [7/17] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/demo/ dev-tools/idea/luce...

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java Tue Mar 15 21:35:17 2011
@@ -72,8 +72,8 @@ public abstract class MergePolicy implem
     long mergeGen;                  // used by IndexWriter
     boolean isExternal;             // used by IndexWriter
     int maxNumSegmentsOptimize;     // used by IndexWriter
-    SegmentReader[] readers;        // used by IndexWriter
-    SegmentReader[] readersClone;   // used by IndexWriter
+    List<SegmentReader> readers;        // used by IndexWriter
+    List<SegmentReader> readerClones;   // used by IndexWriter
     public final SegmentInfos segments;
     boolean aborted;
     Throwable error;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java Tue Mar 15 21:35:17 2011
@@ -129,7 +129,7 @@ public final  class MultiFieldsEnum exte
 
   private final static class FieldMergeQueue extends PriorityQueue<FieldsEnumWithSlice> {
     FieldMergeQueue(int size) {
-      initialize(size);
+      super(size);
     }
 
     @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Tue Mar 15 21:35:17 2011
@@ -469,7 +469,7 @@ public final class MultiTermsEnum extend
   private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
     Comparator<BytesRef> termComp;
     TermMergeQueue(int size) {
-      initialize(size);
+      super(size);
     }
 
     @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/OrdTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/OrdTermState.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/OrdTermState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/OrdTermState.java Tue Mar 15 21:35:17 2011
@@ -30,4 +30,9 @@ public class OrdTermState extends TermSt
     assert other instanceof OrdTermState : "can not copy from " + other.getClass().getName();
     this.ord = ((OrdTermState) other).ord;
   }
+
+  @Override
+  public String toString() {
+    return "OrdTermState ord=" + ord;
+  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Tue Mar 15 21:35:17 2011
@@ -571,6 +571,11 @@ final class SegmentMerger {
     return mergeState.delCounts;
   }
   
+  public boolean getAnyNonBulkMerges() {
+    assert matchedCount <= readers.size();
+    return matchedCount != readers.size();
+  }
+
   private void mergeNorms() throws IOException {
     IndexOutput output = null;
     try {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java Tue Mar 15 21:35:17 2011
@@ -702,7 +702,7 @@ public class SegmentReader extends Index
     }
   }
 
-  private void commitChanges(Map<String,String> commitUserData) throws IOException {
+  private synchronized void commitChanges(Map<String,String> commitUserData) throws IOException {
     if (deletedDocsDirty) {               // re-write deleted
       si.advanceDelGen();
 
@@ -923,26 +923,21 @@ public class SegmentReader extends Index
     return fieldSet;
   }
 
-
   @Override
-  public synchronized boolean hasNorms(String field) {
+  public boolean hasNorms(String field) {
     ensureOpen();
     return norms.containsKey(field);
   }
 
-  // can return null if norms aren't stored
-  protected synchronized byte[] getNorms(String field) throws IOException {
-    Norm norm = norms.get(field);
-    if (norm == null) return null;  // not indexed, or norms not stored
-    return norm.bytes();
-  }
-
-  // returns fake norms if norms aren't available
   @Override
-  public synchronized byte[] norms(String field) throws IOException {
+  public byte[] norms(String field) throws IOException {
     ensureOpen();
-    byte[] bytes = getNorms(field);
-    return bytes;
+    final Norm norm = norms.get(field);
+    if (norm == null) {
+      // not indexed, or norms not stored
+      return null;  
+    }
+    return norm.bytes();
   }
 
   @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermState.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermState.java Tue Mar 15 21:35:17 2011
@@ -44,4 +44,9 @@ public abstract class TermState implemen
       throw new RuntimeException(cnse);
     }
   } 
-}
\ No newline at end of file
+
+  @Override
+  public String toString() {
+    return "TermState";
+  }
+}

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java Tue Mar 15 21:35:17 2011
@@ -51,6 +51,6 @@ public class BlockTermState extends OrdT
 
   @Override
   public String toString() {
-    return super.toString() + "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer;
+    return "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer;
   }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Tue Mar 15 21:35:17 2011
@@ -67,9 +67,6 @@ public class BlockTermsReader extends Fi
 
   private final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
 
-  // Comparator that orders our terms
-  private final Comparator<BytesRef> termComp;
-
   // Caches the most recently looked-up field + terms:
   private final DoubleBarrelLRUCache<FieldAndTerm,BlockTermState> termsCache;
 
@@ -112,13 +109,12 @@ public class BlockTermsReader extends Fi
   //private String segment;
   
   public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize,
-                          Comparator<BytesRef> termComp, int termsCacheSize, String codecId)
+                          int termsCacheSize, String codecId)
     throws IOException {
     
     this.postingsReader = postingsReader;
     termsCache = new DoubleBarrelLRUCache<FieldAndTerm,BlockTermState>(termsCacheSize);
 
-    this.termComp = termComp;
     //this.segment = segment;
     in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION),
                        readBufferSize);
@@ -261,7 +257,7 @@ public class BlockTermsReader extends Fi
 
     @Override
     public Comparator<BytesRef> getComparator() {
-      return termComp;
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
 
     @Override
@@ -343,23 +339,29 @@ public class BlockTermsReader extends Fi
 
       @Override
       public Comparator<BytesRef> getComparator() {
-        return termComp;
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
       }
 
+      // TODO: we may want an alternate mode here which is
+      // "if you are about to return NOT_FOUND I won't use
+      // the terms data from that"; eg FuzzyTermsEnum will
+      // (usually) just immediately call seek again if we
+      // return NOT_FOUND so it's a waste for us to fill in
+      // the term that was actually NOT_FOUND
       @Override
       public SeekStatus seek(final BytesRef target, final boolean useCache) throws IOException {
 
         if (indexEnum == null) {
           throw new IllegalStateException("terms index was not loaded");
         }
-        
-        //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
+   
         /*
+        System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
         if (didIndexNext) {
           if (nextIndexTerm == null) {
-            //System.out.println("  nextIndexTerm=null");
+            System.out.println("  nextIndexTerm=null");
           } else {
-            //System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
+            System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
           }
         }
         */
@@ -387,7 +389,7 @@ public class BlockTermsReader extends Fi
         // is after current term but before next index term:
         if (indexIsCurrent) {
 
-          final int cmp = termComp.compare(term, target);
+          final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term, target);
 
           if (cmp == 0) {
             // Already at the requested term
@@ -405,7 +407,7 @@ public class BlockTermsReader extends Fi
               didIndexNext = true;
             }
 
-            if (nextIndexTerm == null || termComp.compare(target, nextIndexTerm) < 0) {
+            if (nextIndexTerm == null || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) {
               // Optimization: requested term is within the
               // same term block we are now in; skip seeking
               // (but do scanning):
@@ -435,48 +437,175 @@ public class BlockTermsReader extends Fi
             state.ord = indexEnum.ord()-1;
           }
 
-          // NOTE: the first _next() after an index seek is
-          // a bit wasteful, since it redundantly reads some
-          // suffix bytes into the buffer.  We could avoid storing
-          // those bytes in the primary file, but then when
-          // next()ing over an index term we'd have to
-          // special case it:
           term.copy(indexEnum.term());
           //System.out.println("  seek: term=" + term.utf8ToString());
         } else {
-          ////System.out.println("  skip seek");
+          //System.out.println("  skip seek");
+          if (state.termCount == state.blockTermCount && !nextBlock()) {
+            indexIsCurrent = false;
+            return SeekStatus.END;
+          }
         }
 
         seekPending = false;
 
-        // Now scan:
-        while (_next() != null) {
-          final int cmp = termComp.compare(term, target);
-          if (cmp == 0) {
-            // Match!
-            if (useCache) {
-              // Store in cache
-              decodeMetaData();
-              termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone());
+        int common = 0;
+
+        // Scan within block.  We could do this by calling
+        // _next() and testing the resulting term, but this
+        // is wasteful.  Instead, we first confirm the
+        // target matches the common prefix of this block,
+        // and then we scan the term bytes directly from the
+        // termSuffixesreader's byte[], saving a copy into
+        // the BytesRef term per term.  Only when we return
+        // do we then copy the bytes into the term.
+
+        while(true) {
+
+          // First, see if target term matches common prefix
+          // in this block:
+          if (common < termBlockPrefix) {
+            final int cmp = (term.bytes[common]&0xFF) - (target.bytes[target.offset + common]&0xFF);
+            if (cmp < 0) {
+
+              // TODO: maybe we should store common prefix
+              // in block header?  (instead of relying on
+              // last term of previous block)
+
+              // Target's prefix is after the common block
+              // prefix, so term cannot be in this block
+              // but it could be in next block.  We
+              // must scan to end-of-block to set common
+              // prefix for next block:
+              if (state.termCount < state.blockTermCount) {
+                while(state.termCount < state.blockTermCount-1) {
+                  state.termCount++;
+                  state.ord++;
+                  termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
+                }
+                final int suffix = termSuffixesReader.readVInt();
+                term.length = termBlockPrefix + suffix;
+                if (term.bytes.length < term.length) {
+                  term.grow(term.length);
+                }
+                termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+              }
+              state.ord++;
+              
+              if (!nextBlock()) {
+                indexIsCurrent = false;
+                return SeekStatus.END;
+              }
+              common = 0;
+
+            } else if (cmp > 0) {
+              // Target's prefix is before the common prefix
+              // of this block, so we position to start of
+              // block and return NOT_FOUND:
+              assert state.termCount == 0;
+
+              final int suffix = termSuffixesReader.readVInt();
+              term.length = termBlockPrefix + suffix;
+              if (term.bytes.length < term.length) {
+                term.grow(term.length);
+              }
+              termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+              return SeekStatus.NOT_FOUND;
+            } else {
+              common++;
+            }
+
+            continue;
+          }
+
+          // Test every term in this block
+          while (true) {
+            state.termCount++;
+            state.ord++;
+
+            final int suffix = termSuffixesReader.readVInt();
+            
+            // We know the prefix matches, so just compare the new suffix:
+            final int termLen = termBlockPrefix + suffix;
+            int bytePos = termSuffixesReader.getPosition();
+
+            boolean next = false;
+            final int limit = target.offset + (termLen < target.length ? termLen : target.length);
+            int targetPos = target.offset + termBlockPrefix;
+            while(targetPos < limit) {
+              final int cmp = (termSuffixes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
+              if (cmp < 0) {
+                // Current term is still before the target;
+                // keep scanning
+                next = true;
+                break;
+              } else if (cmp > 0) {
+                // Done!  Current term is after target. Stop
+                // here, fill in real term, return NOT_FOUND.
+                term.length = termBlockPrefix + suffix;
+                if (term.bytes.length < term.length) {
+                  term.grow(term.length);
+                }
+                termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+                //System.out.println("  NOT_FOUND");
+                return SeekStatus.NOT_FOUND;
+              }
+            }
+
+            if (!next && target.length <= termLen) {
+              term.length = termBlockPrefix + suffix;
+              if (term.bytes.length < term.length) {
+                term.grow(term.length);
+              }
+              termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+
+              if (target.length == termLen) {
+                // Done!  Exact match.  Stop here, fill in
+                // real term, return FOUND.
+                //System.out.println("  FOUND");
+
+                if (useCache) {
+                  // Store in cache
+                  decodeMetaData();
+                  //System.out.println("  cache! state=" + state);
+                  termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone());
+                }
+
+                return SeekStatus.FOUND;
+              } else {
+                //System.out.println("  NOT_FOUND");
+                return SeekStatus.NOT_FOUND;
+              }
+            }
+
+            if (state.termCount == state.blockTermCount) {
+              // Must pre-fill term for next block's common prefix
+              term.length = termBlockPrefix + suffix;
+              if (term.bytes.length < term.length) {
+                term.grow(term.length);
+              }
+              termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+              break;
+            } else {
+              termSuffixesReader.skipBytes(suffix);
             }
-            //System.out.println("  FOUND");
-            return SeekStatus.FOUND;
-          } else if (cmp > 0) {
-            //System.out.println("  NOT_FOUND term=" + term.utf8ToString());
-            return SeekStatus.NOT_FOUND;
           }
-          
+
           // The purpose of the terms dict index is to seek
           // the enum to the closest index term before the
           // term we are looking for.  So, we should never
           // cross another index term (besides the first
           // one) while we are scanning:
+
           assert indexIsCurrent;
-        }
 
-        indexIsCurrent = false;
-        //System.out.println("  END");
-        return SeekStatus.END;
+          if (!nextBlock()) {
+            //System.out.println("  END");
+            indexIsCurrent = false;
+            return SeekStatus.END;
+          }
+          common = 0;
+        }
       }
 
       @Override
@@ -516,12 +645,10 @@ public class BlockTermsReader extends Fi
          decode all metadata up to the current term. */
       private BytesRef _next() throws IOException {
         //System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termCount + " (vs " + state.blockTermCount + ")");
-        if (state.termCount == state.blockTermCount) {
-          if (!nextBlock()) {
-            //System.out.println("  eof");
-            indexIsCurrent = false;
-            return null;
-          }
+        if (state.termCount == state.blockTermCount && !nextBlock()) {
+          //System.out.println("  eof");
+          indexIsCurrent = false;
+          return null;
         }
 
         // TODO: cutover to something better for these ints!  simple64?
@@ -697,7 +824,7 @@ public class BlockTermsReader extends Fi
         }
         //System.out.println("  termSuffixes len=" + len);
         in.readBytes(termSuffixes, 0, len);
-        termSuffixesReader.reset(termSuffixes);
+        termSuffixesReader.reset(termSuffixes, 0, len);
 
         // docFreq, totalTermFreq
         len = in.readVInt();
@@ -706,7 +833,7 @@ public class BlockTermsReader extends Fi
         }
         //System.out.println("  freq bytes len=" + len);
         in.readBytes(docFreqBytes, 0, len);
-        freqReader.reset(docFreqBytes);
+        freqReader.reset(docFreqBytes, 0, len);
         metaDataUpto = 0;
 
         state.termCount = 0;
@@ -725,23 +852,32 @@ public class BlockTermsReader extends Fi
         if (!seekPending) {
           // lazily catch up on metadata decode:
           final int limit = state.termCount;
+          // We must set/incr state.termCount because
+          // postings impl can look at this
           state.termCount = metaDataUpto;
+          // TODO: better API would be "jump straight to term=N"???
           while (metaDataUpto < limit) {
-            //System.out.println("  decode");
+            //System.out.println("  decode mdUpto=" + metaDataUpto);
             // TODO: we could make "tiers" of metadata, ie,
             // decode docFreq/totalTF but don't decode postings
             // metadata; this way caller could get
             // docFreq/totalTF w/o paying decode cost for
             // postings
+
+            // TODO: if docFreq were bulk decoded we could
+            // just skipN here:
             state.docFreq = freqReader.readVInt();
+            //System.out.println("    dF=" + state.docFreq);
             if (!fieldInfo.omitTermFreqAndPositions) {
               state.totalTermFreq = state.docFreq + freqReader.readVLong();
+              //System.out.println("    totTF=" + state.totalTermFreq);
             }
+
             postingsReader.nextTerm(fieldInfo, state);
             metaDataUpto++;
             state.termCount++;
           }
-        } else {
+        //} else {
           //System.out.println("  skip! seekPending");
         }
       }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java Tue Mar 15 21:35:17 2011
@@ -63,24 +63,23 @@ public class BlockTermsWriter extends Fi
   FieldInfo currentField;
   private final TermsIndexWriterBase termsIndexWriter;
   private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
-  private final Comparator<BytesRef> termComp;
-  private final String segment;
+
+  //private final String segment;
 
   public BlockTermsWriter(
       TermsIndexWriterBase termsIndexWriter,
       SegmentWriteState state,
-      PostingsWriterBase postingsWriter,
-      Comparator<BytesRef> termComp) throws IOException
+      PostingsWriterBase postingsWriter)
+    throws IOException
   {
     final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
     this.termsIndexWriter = termsIndexWriter;
-    this.termComp = termComp;
     out = state.directory.createOutput(termsFileName);
     fieldInfos = state.fieldInfos;
     writeHeader(out);
     currentField = null;
     this.postingsWriter = postingsWriter;
-    segment = state.segmentName;
+    //segment = state.segmentName;
 
     //System.out.println("BTW.init seg=" + state.segmentName);
 
@@ -161,7 +160,6 @@ public class BlockTermsWriter extends Fi
     private long numTerms;
     private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
     long sumTotalTermFreq;
-    private final BytesRef lastTerm = new BytesRef();
 
     private TermEntry[] pendingTerms;
 
@@ -185,12 +183,12 @@ public class BlockTermsWriter extends Fi
     
     @Override
     public Comparator<BytesRef> getComparator() {
-      return termComp;
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
 
     @Override
     public PostingsConsumer startTerm(BytesRef text) throws IOException {
-      //System.out.println("BTW.startTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text);
+      //System.out.println("BTW.startTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment);
       postingsWriter.startTerm();
       return postingsWriter;
     }
@@ -201,7 +199,7 @@ public class BlockTermsWriter extends Fi
     public void finishTerm(BytesRef text, TermStats stats) throws IOException {
 
       assert stats.docFreq > 0;
-      //System.out.println("BTW.finishTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " df=" + stats.docFreq);
+      //System.out.println("BTW.finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
 
       final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
 
@@ -213,6 +211,7 @@ public class BlockTermsWriter extends Fi
           flushBlock();
         }
         fieldIndexWriter.add(text, stats, out.getFilePointer());
+        //System.out.println("  index term!");
       }
 
       if (pendingTerms.length == pendingCount) {
@@ -265,7 +264,7 @@ public class BlockTermsWriter extends Fi
     private final RAMOutputStream bytesWriter = new RAMOutputStream();
 
     private void flushBlock() throws IOException {
-      //System.out.println("BTW.flushBlock pendingCount=" + pendingCount);
+      //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
 
       // First pass: compute common prefix for all terms
       // in the block, against term before first term in

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java Tue Mar 15 21:35:17 2011
@@ -167,7 +167,7 @@ public class BulkVIntCodec extends Codec
 
     success = false;
     try {
-      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
       success = true;
       return ret;
     } finally {
@@ -211,7 +211,6 @@ public class BulkVIntCodec extends Codec
                                                 state.segmentInfo.name,
                                                 postingsReader,
                                                 state.readBufferSize,
-                                                BytesRef.getUTF8SortedAsUnicodeComparator(),
                                                 StandardCodec.TERMS_CACHE_SIZE,
                                                 state.codecId);
       success = true;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java Tue Mar 15 21:35:17 2011
@@ -63,7 +63,7 @@ public class FrameOfRefCodec extends Cod
 
     success = false;
     try {
-      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
       success = true;
       return ret;
     } finally {
@@ -108,7 +108,6 @@ public class FrameOfRefCodec extends Cod
                                                       state.segmentInfo.name,
                                                       postingsReader,
                                                       state.readBufferSize,
-                                                      BytesRef.getUTF8SortedAsUnicodeComparator(),
                                                       StandardCodec.TERMS_CACHE_SIZE,
                                                       state.codecId);
       success = true;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java Tue Mar 15 21:35:17 2011
@@ -63,7 +63,7 @@ public class PatchedFrameOfRefCodec exte
 
     success = false;
     try {
-      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
       success = true;
       return ret;
     } finally {
@@ -108,7 +108,6 @@ public class PatchedFrameOfRefCodec exte
                                                       state.segmentInfo.name,
                                                       postingsReader,
                                                       state.readBufferSize,
-                                                      BytesRef.getUTF8SortedAsUnicodeComparator(),
                                                       StandardCodec.TERMS_CACHE_SIZE,
                                                       state.codecId);
       success = true;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java Tue Mar 15 21:35:17 2011
@@ -183,7 +183,7 @@ public class PForDeltaFixedIntBlockCodec
 
     success = false;
     try {
-      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
       success = true;
       return ret;
     } finally {
@@ -227,7 +227,6 @@ public class PForDeltaFixedIntBlockCodec
                                                        state.segmentInfo.name,
                                                        postingsReader,
                                                        state.readBufferSize,
-                                                       BytesRef.getUTF8SortedAsUnicodeComparator(),
                                                        StandardCodec.TERMS_CACHE_SIZE,
                                                        state.codecId);
       success = true;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java Tue Mar 15 21:35:17 2011
@@ -30,15 +30,14 @@ import org.apache.lucene.index.codecs.Po
 import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.FieldsProducer;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
 import org.apache.lucene.index.codecs.BlockTermsReader;
 import org.apache.lucene.index.codecs.BlockTermsWriter;
 import org.apache.lucene.index.codecs.TermsIndexReaderBase;
 import org.apache.lucene.index.codecs.TermsIndexWriterBase;
 import org.apache.lucene.index.codecs.standard.StandardCodec;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
 
 /** This codec "inlines" the postings for terms that have
  *  low docFreq.  It wraps another codec, which is used for
@@ -78,7 +77,7 @@ public class PulsingCodec extends Codec 
     TermsIndexWriterBase indexWriter;
     boolean success = false;
     try {
-      indexWriter = new FixedGapTermsIndexWriter(state);
+      indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
       success = true;
     } finally {
       if (!success) {
@@ -89,7 +88,7 @@ public class PulsingCodec extends Codec 
     // Terms dict
     success = false;
     try {
-      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter);
       success = true;
       return ret;
     } finally {
@@ -116,12 +115,11 @@ public class PulsingCodec extends Codec 
 
     boolean success = false;
     try {
-      indexReader = new FixedGapTermsIndexReader(state.dir,
-                                                       state.fieldInfos,
-                                                       state.segmentInfo.name,
-                                                       state.termsIndexDivisor,
-                                                       BytesRef.getUTF8SortedAsUnicodeComparator(),
-                                                       state.codecId);
+      indexReader = new VariableGapTermsIndexReader(state.dir,
+                                                    state.fieldInfos,
+                                                    state.segmentInfo.name,
+                                                    state.termsIndexDivisor,
+                                                    state.codecId);
       success = true;
     } finally {
       if (!success) {
@@ -136,7 +134,6 @@ public class PulsingCodec extends Codec 
                                                 state.dir, state.fieldInfos, state.segmentInfo.name,
                                                 pulsingReader,
                                                 state.readBufferSize,
-                                                BytesRef.getUTF8SortedAsUnicodeComparator(),
                                                 StandardCodec.TERMS_CACHE_SIZE,
                                                 state.codecId);
       success = true;
@@ -156,7 +153,7 @@ public class PulsingCodec extends Codec 
   public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
     StandardPostingsReader.files(dir, segmentInfo, id, files);
     BlockTermsReader.files(dir, segmentInfo, id, files);
-    FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
+    VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
   }
 
   @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Tue Mar 15 21:35:17 2011
@@ -145,7 +145,7 @@ public class PulsingPostingsReaderImpl e
     //System.out.println("  count=" + count + " threshold=" + maxPositions);
 
     if (count <= maxPositions) {
-      //System.out.println("  inlined");
+      //System.out.println("  inlined pos=" + termState.inlinedBytesReader.getPosition());
 
       // Inlined into terms dict -- just read the byte[] blob in,
       // but don't decode it now (we only decode when a DocsEnum

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java Tue Mar 15 21:35:17 2011
@@ -150,7 +150,7 @@ public class Simple64Codec extends Codec
 
     success = false;
     try {
-      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
       success = true;
       return ret;
     } finally {
@@ -194,7 +194,6 @@ public class Simple64Codec extends Codec
                                                        state.segmentInfo.name,
                                                        postingsReader,
                                                        state.readBufferSize,
-                                                       BytesRef.getUTF8SortedAsUnicodeComparator(),
                                                        StandardCodec.TERMS_CACHE_SIZE,
                                                        state.codecId);
       success = true;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Tue Mar 15 21:35:17 2011
@@ -23,7 +23,6 @@ import java.util.Set;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.FieldsProducer;
@@ -66,7 +65,7 @@ public class StandardCodec extends Codec
 
     success = false;
     try {
-      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator());
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
       success = true;
       return ret;
     } finally {
@@ -109,7 +108,6 @@ public class StandardCodec extends Codec
                                                 state.segmentInfo.name,
                                                 postings,
                                                 state.readBufferSize,
-                                                BytesRef.getUTF8SortedAsUnicodeComparator(),
                                                 TERMS_CACHE_SIZE,
                                                 state.codecId);
       success = true;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/package.html?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/package.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/package.html Tue Mar 15 21:35:17 2011
@@ -45,7 +45,7 @@ Features:
 <p>
 Lazy loading of Message Strings
 
-<pre>
+<pre class="prettyprint">
 	public class MessagesTestBundle extends NLS {
 	
 	  private static final String BUNDLE_NAME = MessagesTestBundle.class.getName();
@@ -85,7 +85,7 @@ Lazy loading of Message Strings
 <p>
 Normal loading of Message Strings
 
-<pre>
+<pre class="prettyprint">
 	String message1 = NLS.getLocalizedMessage(MessagesTestBundle.Q0004E_INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION);
 	String message2 = NLS.getLocalizedMessage(MessagesTestBundle.Q0004E_INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION, Locale.JAPANESE);
 </pre>

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java Tue Mar 15 21:35:17 2011
@@ -78,9 +78,9 @@ public abstract class QueryParserBase {
   // maps field names to date resolutions
   Map<String,DateTools.Resolution> fieldToDateResolution = null;
 
-  // The collator to use when determining range inclusion,
-  // for use when constructing RangeQuerys.
-  Collator rangeCollator = null;
+  //Whether or not to analyze range terms when constructing RangeQuerys
+  // (For example, analyzing terms into collation keys for locale-sensitive RangeQuery)
+  boolean analyzeRangeTerms = false;
 
   boolean autoGeneratePhraseQueries;
 
@@ -391,27 +391,21 @@ public abstract class QueryParserBase {
   }
 
   /**
-   * Sets the collator used to determine index term inclusion in ranges
-   * for RangeQuerys.
-   * <p/>
-   * <strong>WARNING:</strong> Setting the rangeCollator to a non-null
-   * collator using this method will cause every single index Term in the
-   * Field referenced by lowerTerm and/or upperTerm to be examined.
-   * Depending on the number of index Terms in this Field, the operation could
-   * be very slow.
-   *
-   *  @param rc  the collator to use when constructing RangeQuerys
+   * Set whether or not to analyze range terms when constructing RangeQuerys.
+   * For example, setting this to true can enable analyzing terms into 
+   * collation keys for locale-sensitive RangeQuery.
+   * 
+   * @param analyzeRangeTerms whether or not terms should be analyzed for RangeQuerys
    */
-  public void setRangeCollator(Collator rc) {
-    rangeCollator = rc;
+  public void setAnalyzeRangeTerms(boolean analyzeRangeTerms) {
+    this.analyzeRangeTerms = analyzeRangeTerms;
   }
 
   /**
-   * @return the collator used to determine index term inclusion in ranges
-   * for RangeQuerys.
+   * @return whether or not to analyze range terms when constructing RangeQuerys.
    */
-  public Collator getRangeCollator() {
-    return rangeCollator;
+  public boolean getAnalyzeRangeTerms() {
+    return analyzeRangeTerms;
   }
 
   protected void addClause(List<BooleanClause> clauses, int conj, int mods, Query q) {
@@ -792,6 +786,36 @@ public abstract class QueryParserBase {
     return new FuzzyQuery(term,minimumSimilarity,prefixLength);
   }
 
+  private BytesRef analyzeRangePart(String field, String part) {
+    TokenStream source;
+      
+    try {
+      source = analyzer.reusableTokenStream(field, new StringReader(part));
+      source.reset();
+    } catch (IOException e) {
+      source = analyzer.tokenStream(field, new StringReader(part));
+    }
+      
+    BytesRef result = new BytesRef();
+    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
+      
+    try {
+      if (!source.incrementToken())
+        throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
+      termAtt.toBytesRef(result);
+      if (source.incrementToken())
+        throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part);
+    } catch (IOException e) {
+      throw new RuntimeException("error analyzing range part: " + part, e);
+    }
+      
+    try {
+      source.close();
+    } catch (IOException ignored) {}
+      
+    return result;
+  }
+
   /**
    * Builds a new TermRangeQuery instance
    * @param field Field
@@ -802,7 +826,23 @@ public abstract class QueryParserBase {
    * @return new TermRangeQuery instance
    */
   protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) {
-    final TermRangeQuery query = new TermRangeQuery(field, part1, part2, startInclusive, endInclusive, rangeCollator);
+    final BytesRef start;
+    final BytesRef end;
+     
+    if (part1 == null) {
+      start = null;
+    } else {
+      start = analyzeRangeTerms ? analyzeRangePart(field, part1) : new BytesRef(part1);
+    }
+     
+    if (part2 == null) {
+      end = null;
+    } else {
+      end = analyzeRangeTerms ? analyzeRangePart(field, part2) : new BytesRef(part2);
+    }
+      
+    final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive);
+
     query.setRewriteMethod(multiTermRewriteMethod);
     return query;
   }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldComparator.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldComparator.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldComparator.java Tue Mar 15 21:35:17 2011
@@ -18,8 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.text.Collator;
-import java.util.Locale;
 
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.search.FieldCache.DocTermsIndex;
@@ -718,85 +716,6 @@ public abstract class FieldComparator {
     }
   }
   
-  
-  /** Sorts by a field's value using the Collator for a
-   *  given Locale.
-   *
-   * <p><b>WARNING</b>: this is likely very slow; you'll
-   * get much better performance using the
-   * CollationKeyAnalyzer or ICUCollationKeyAnalyzer. */
-  public static final class StringComparatorLocale extends FieldComparator {
-
-    private final String[] values;
-    private DocTerms currentDocTerms;
-    private final String field;
-    final Collator collator;
-    private String bottom;
-    private final BytesRef tempBR = new BytesRef();
-
-    StringComparatorLocale(int numHits, String field, Locale locale) {
-      values = new String[numHits];
-      this.field = field;
-      collator = Collator.getInstance(locale);
-    }
-
-    @Override
-    public int compare(int slot1, int slot2) {
-      final String val1 = values[slot1];
-      final String val2 = values[slot2];
-      if (val1 == null) {
-        if (val2 == null) {
-          return 0;
-        }
-        return -1;
-      } else if (val2 == null) {
-        return 1;
-      }
-      return collator.compare(val1, val2);
-    }
-
-    @Override
-    public int compareBottom(int doc) {
-      final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString();
-      if (bottom == null) {
-        if (val2 == null) {
-          return 0;
-        }
-        return -1;
-      } else if (val2 == null) {
-        return 1;
-      }
-      return collator.compare(bottom, val2);
-    }
-
-    @Override
-    public void copy(int slot, int doc) {
-      final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
-      if (br == null) {
-        values[slot] = null;
-      } else {
-        values[slot] = br.utf8ToString();
-      }
-    }
-
-    @Override
-    public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
-      currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field);
-      return this;
-    }
-    
-    @Override
-    public void setBottom(final int bottom) {
-      this.bottom = values[bottom];
-    }
-
-    @Override
-    public Comparable<?> value(int slot) {
-      final String s = values[slot];
-      return s == null ? null : new BytesRef(values[slot]);
-    }
-  }
-
   /** Sorts by field's natural Term sort order, using
    *  ordinals.  This is functionally equivalent to {@link
    *  TermValComparator}, but it first resolves the string

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java Tue Mar 15 21:35:17 2011
@@ -56,15 +56,13 @@ public abstract class FieldValueHitQueue
     
     public OneComparatorFieldValueHitQueue(SortField[] fields, int size)
         throws IOException {
-      super(fields);
+      super(fields, size);
 
       SortField field = fields[0];
       setComparator(0,field.getComparator(size, 0));
       oneReverseMul = field.reverse ? -1 : 1;
 
       reverseMul[0] = oneReverseMul;
-      
-      initialize(size);
     }
 
     /**
@@ -98,7 +96,7 @@ public abstract class FieldValueHitQueue
 
     public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size)
         throws IOException {
-      super(fields);
+      super(fields, size);
 
       int numComparators = comparators.length;
       for (int i = 0; i < numComparators; ++i) {
@@ -107,8 +105,6 @@ public abstract class FieldValueHitQueue
         reverseMul[i] = field.reverse ? -1 : 1;
         setComparator(i, field.getComparator(size, i));
       }
-
-      initialize(size);
     }
   
     @Override
@@ -133,7 +129,8 @@ public abstract class FieldValueHitQueue
   }
   
   // prevent instantiation and extension.
-  private FieldValueHitQueue(SortField[] fields) {
+  private FieldValueHitQueue(SortField[] fields, int size) {
+    super(size);
     // When we get here, fields.length is guaranteed to be > 0, therefore no
     // need to check it again.
     

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/HitQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/HitQueue.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/HitQueue.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/HitQueue.java Tue Mar 15 21:35:17 2011
@@ -63,17 +63,15 @@ final class HitQueue extends PriorityQue
    * @see #getSentinelObject()
    */
   HitQueue(int size, boolean prePopulate) {
-    this.prePopulate = prePopulate;
-    initialize(size);
+    super(size, prePopulate);
   }
 
-  // Returns null if prePopulate is false.
   @Override
   protected ScoreDoc getSentinelObject() {
     // Always set the doc Id to MAX_VALUE so that it won't be favored by
     // lessThan. This generally should not happen since if score is not NEG_INF,
     // TopScoreDocCollector will always add the object to the queue.
-    return !prePopulate ? null : new ScoreDoc(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY);
+    return new ScoreDoc(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY);
   }
   
   @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Tue Mar 15 21:35:17 2011
@@ -429,7 +429,7 @@ public class IndexSearcher {
    * <p>NOTE: this does not compute scores by default.  If you
    * need scores, create a {@link TopFieldCollector}
    * instance by calling {@link TopFieldCollector#create} and
-   * then pass that to {@link #search(Weight, Filter,
+   * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter,
    * Collector)}.</p>
    */
   protected TopFieldDocs search(Weight weight, Filter filter, int nDocs,
@@ -442,13 +442,17 @@ public class IndexSearcher {
       // use all leaves here!
       return search (leafContexts, weight, filter, nDocs, sort, fillFields);
     } else {
-      // TODO: make this respect fillFields
-      final FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(nDocs);
+      final TopFieldCollector topCollector = TopFieldCollector.create(sort, nDocs,
+                                                                      fillFields,
+                                                                      fieldSortDoTrackScores,
+                                                                      fieldSortDoMaxScore,
+                                                                      false);
+
       final Lock lock = new ReentrantLock();
       final ExecutionHelper<TopFieldDocs> runner = new ExecutionHelper<TopFieldDocs>(executor);
       for (int i = 0; i < leafSlices.length; i++) { // search each leaf slice
         runner.submit(
-                      new SearcherCallableWithSort(lock, this, leafSlices[i], weight, filter, nDocs, hq, sort));
+                      new SearcherCallableWithSort(lock, this, leafSlices[i], weight, filter, nDocs, topCollector, sort));
       }
       int totalHits = 0;
       float maxScore = Float.NEGATIVE_INFINITY;
@@ -458,11 +462,10 @@ public class IndexSearcher {
           maxScore = Math.max(maxScore, topFieldDocs.getMaxScore());
         }
       }
-      final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
-      for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
-        scoreDocs[i] = hq.pop();
 
-      return new TopFieldDocs(totalHits, scoreDocs, hq.getFields(), maxScore);
+      final TopFieldDocs topDocs = (TopFieldDocs) topCollector.topDocs();
+
+      return new TopFieldDocs(totalHits, topDocs.scoreDocs, topDocs.fields, topDocs.getMaxScore());
     }
   }
   
@@ -475,7 +478,7 @@ public class IndexSearcher {
    * <p>NOTE: this does not compute scores by default.  If you
    * need scores, create a {@link TopFieldCollector}
    * instance by calling {@link TopFieldCollector#create} and
-   * then pass that to {@link #search(Weight, Filter,
+   * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter,
    * Collector)}.</p>
    */
   protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, int nDocs,
@@ -721,12 +724,12 @@ public class IndexSearcher {
     private final Weight weight;
     private final Filter filter;
     private final int nDocs;
-    private final FieldDocSortedHitQueue hq;
+    private final TopFieldCollector hq;
     private final Sort sort;
     private final LeafSlice slice;
 
     public SearcherCallableWithSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight,
-        Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort) {
+        Filter filter, int nDocs, TopFieldCollector hq, Sort sort) {
       this.lock = lock;
       this.searcher = searcher;
       this.weight = weight;
@@ -737,27 +740,58 @@ public class IndexSearcher {
       this.slice = slice;
     }
 
+    private final class FakeScorer extends Scorer {
+      float score;
+      int doc;
+
+      public FakeScorer() {
+        super(null);
+      }
+    
+      @Override
+      public int advance(int target) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int docID() {
+        return doc;
+      }
+
+      @Override
+      public float freq() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int nextDoc() {
+        throw new UnsupportedOperationException();
+      }
+    
+      @Override
+      public float score() {
+        return score;
+      }
+    }
+
+    private final FakeScorer fakeScorer = new FakeScorer();
+
     public TopFieldDocs call() throws IOException {
+      assert slice.leaves.length == 1;
       final TopFieldDocs docs = searcher.search (slice.leaves, weight, filter, nDocs, sort, true);
       lock.lock();
       try {
-        hq.setFields(docs.fields);
+        final int base = slice.leaves[0].docBase;
+        hq.setNextReader(slice.leaves[0]);
+        hq.setScorer(fakeScorer);
+        for(ScoreDoc scoreDoc : docs.scoreDocs) {
+          fakeScorer.doc = scoreDoc.doc - base;
+          fakeScorer.score = scoreDoc.score;
+          hq.collect(scoreDoc.doc-base);
+        }
       } finally {
         lock.unlock();
       }
-
-      final ScoreDoc[] scoreDocs = docs.scoreDocs;
-      for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
-        final FieldDoc fieldDoc = (FieldDoc) scoreDocs[j];
-        //it would be so nice if we had a thread-safe insert 
-        lock.lock();
-        try {
-          if (fieldDoc == hq.insertWithOverflow(fieldDoc))
-            break;
-        } finally {
-          lock.unlock();
-        }
-      }
       return docs;
     }
   }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Tue Mar 15 21:35:17 2011
@@ -432,7 +432,7 @@ class UnionDocsAndPositionsEnum extends 
 
   private static final class DocsQueue extends PriorityQueue<DocsAndPositionsEnum> {
     DocsQueue(List<DocsAndPositionsEnum> docsEnums) throws IOException {
-      initialize(docsEnums.size());
+      super(docsEnums.size());
 
       Iterator<DocsAndPositionsEnum> i = docsEnums.iterator();
       while (i.hasNext()) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Tue Mar 15 21:35:17 2011
@@ -224,7 +224,7 @@ public class PhraseQuery extends Query {
     public Explanation explain(AtomicReaderContext context, int doc)
       throws IOException {
 
-      Explanation result = new Explanation();
+      ComplexExplanation result = new ComplexExplanation();
       result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
 
       StringBuilder docFreqs = new StringBuilder();
@@ -303,10 +303,7 @@ public class PhraseQuery extends Query {
 
       // combine them
       result.setValue(queryExpl.getValue() * fieldExpl.getValue());
-
-      if (queryExpl.getValue() == 1.0f)
-        return fieldExpl;
-
+      result.setMatch(tfExplanation.isMatch());
       return result;
     }
   }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQueue.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQueue.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQueue.java Tue Mar 15 21:35:17 2011
@@ -21,7 +21,7 @@ import org.apache.lucene.util.PriorityQu
 
 final class PhraseQueue extends PriorityQueue<PhrasePositions> {
   PhraseQueue(int size) {
-    initialize(size);
+    super(size);
   }
 
   @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/SortField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/SortField.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/SortField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/SortField.java Tue Mar 15 21:35:17 2011
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.util.Locale;
 
 import org.apache.lucene.search.cache.*;
 import org.apache.lucene.util.StringHelper;
@@ -90,7 +89,6 @@ public class SortField {
 
   private String field;
   private int type;  // defaults to determining type dynamically
-  private Locale locale;    // defaults to "natural order" (no Locale)
   boolean reverse = false;  // defaults to natural order
   private CachedArrayCreator<?> creator;
   public Object missingValue = null; // used for 'sortMissingFirst/Last'
@@ -213,28 +211,6 @@ public class SortField {
     }
     return this;
   }
-  
-
-  /** Creates a sort by terms in the given field sorted
-   * according to the given locale.
-   * @param field  Name of field to sort by, cannot be <code>null</code>.
-   * @param locale Locale of values in the field.
-   */
-  public SortField (String field, Locale locale) {
-    initFieldType(field, STRING);
-    this.locale = locale;
-  }
-
-  /** Creates a sort, possibly in reverse, by terms in the given field sorted
-   * according to the given locale.
-   * @param field  Name of field to sort by, cannot be <code>null</code>.
-   * @param locale Locale of values in the field.
-   */
-  public SortField (String field, Locale locale, boolean reverse) {
-    initFieldType(field, STRING);
-    this.locale = locale;
-    this.reverse = reverse;
-  }
 
   /** Creates a sort with a custom comparison function.
    * @param field Name of field to sort by; cannot be <code>null</code>.
@@ -295,14 +271,6 @@ public class SortField {
     return type;
   }
 
-  /** Returns the Locale by which term values are interpreted.
-   * May return <code>null</code> if no Locale was specified.
-   * @return Locale, or <code>null</code>.
-   */
-  public Locale getLocale() {
-    return locale;
-  }
-
   /** Returns the instance of a {@link FieldCache} parser that fits to the given sort type.
    * May return <code>null</code> if no parser was specified. Sorting is using the default parser then.
    * @return An instance of a {@link FieldCache} parser, or <code>null</code>.
@@ -384,7 +352,6 @@ public class SortField {
         break;
     }
 
-    if (locale != null) buffer.append('(').append(locale).append(')');
     if (creator != null) buffer.append('(').append(creator).append(')');
     if (reverse) buffer.append('!');
 
@@ -404,7 +371,6 @@ public class SortField {
       other.field == this.field // field is always interned
       && other.type == this.type
       && other.reverse == this.reverse
-      && (other.locale == null ? this.locale == null : other.locale.equals(this.locale))
       && (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource))
       && (other.creator == null ? this.creator == null : other.creator.equals(this.creator))
     );
@@ -419,7 +385,6 @@ public class SortField {
   public int hashCode() {
     int hash=type^0x346565dd + Boolean.valueOf(reverse).hashCode()^0xaf5998bb;
     if (field != null) hash += field.hashCode()^0xff5685dd;
-    if (locale != null) hash += locale.hashCode()^0x08150815;
     if (comparatorSource != null) hash += comparatorSource.hashCode();
     if (creator != null) hash += creator.hashCode()^0x3aaf56ff;
     return hash;
@@ -439,13 +404,6 @@ public class SortField {
    */
   public FieldComparator getComparator(final int numHits, final int sortPos) throws IOException {
 
-    if (locale != null) {
-      // TODO: it'd be nice to allow FieldCache.getStringIndex
-      // to optionally accept a Locale so sorting could then use
-      // the faster StringComparator impls
-      return new FieldComparator.StringComparatorLocale(numHits, field, locale);
-    }
-
     switch (type) {
     case SortField.SCORE:
       return new FieldComparator.RelevanceComparator(numHits);

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeFilter.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeFilter.java Tue Mar 15 21:35:17 2011
@@ -1,5 +1,7 @@
 package org.apache.lucene.search;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,15 +19,13 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.text.Collator;
-
 /**
  * A Filter that restricts search results to a range of term
  * values in a given field.
  *
  * <p>This filter matches the documents looking for terms that fall into the
  * supplied range according to {@link
- * String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
+ * Byte#compareTo(Byte)},  It is not intended
  * for numerical ranges; use {@link NumericRangeFilter} instead.
  *
  * <p>If you construct a large number of range filters with different ranges but on the 
@@ -44,39 +44,25 @@ public class TermRangeFilter extends Mul
    *  lowerTerm is null and includeLower is true (similar for upperTerm
    *  and includeUpper)
    */
-  public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
+  public TermRangeFilter(String fieldName, BytesRef lowerTerm, BytesRef upperTerm,
                      boolean includeLower, boolean includeUpper) {
       super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper));
   }
 
   /**
-   * <strong>WARNING:</strong> Using this constructor and supplying a non-null
-   * value in the <code>collator</code> parameter will cause every single 
-   * index Term in the Field referenced by lowerTerm and/or upperTerm to be
-   * examined.  Depending on the number of index Terms in this Field, the 
-   * operation could be very slow.
-   *
-   * @param lowerTerm The lower bound on this range
-   * @param upperTerm The upper bound on this range
-   * @param includeLower Does this range include the lower bound?
-   * @param includeUpper Does this range include the upper bound?
-   * @param collator The collator to use when determining range inclusion; set
-   *  to null to use Unicode code point ordering instead of collation.
-   * @throws IllegalArgumentException if both terms are null or if
-   *  lowerTerm is null and includeLower is true (similar for upperTerm
-   *  and includeUpper)
+   * Factory that creates a new TermRangeFilter using Strings for term text.
    */
-  public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
-                     boolean includeLower, boolean includeUpper,
-                     Collator collator) {
-      super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
+  public static TermRangeFilter newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
+    BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm);
+    BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm);
+    return new TermRangeFilter(field, lower, upper, includeLower, includeUpper);
   }
-
+  
   /**
    * Constructs a filter for field <code>fieldName</code> matching
    * less than or equal to <code>upperTerm</code>.
    */
-  public static TermRangeFilter Less(String fieldName, String upperTerm) {
+  public static TermRangeFilter Less(String fieldName, BytesRef upperTerm) {
       return new TermRangeFilter(fieldName, null, upperTerm, false, true);
   }
 
@@ -84,22 +70,19 @@ public class TermRangeFilter extends Mul
    * Constructs a filter for field <code>fieldName</code> matching
    * greater than or equal to <code>lowerTerm</code>.
    */
-  public static TermRangeFilter More(String fieldName, String lowerTerm) {
+  public static TermRangeFilter More(String fieldName, BytesRef lowerTerm) {
       return new TermRangeFilter(fieldName, lowerTerm, null, true, false);
   }
   
   /** Returns the lower value of this range filter */
-  public String getLowerTerm() { return query.getLowerTerm(); }
+  public BytesRef getLowerTerm() { return query.getLowerTerm(); }
 
   /** Returns the upper value of this range filter */
-  public String getUpperTerm() { return query.getUpperTerm(); }
+  public BytesRef getUpperTerm() { return query.getUpperTerm(); }
   
   /** Returns <code>true</code> if the lower endpoint is inclusive */
   public boolean includesLower() { return query.includesLower(); }
   
   /** Returns <code>true</code> if the upper endpoint is inclusive */
   public boolean includesUpper() { return query.includesUpper(); }
-
-  /** Returns the collator used to determine range inclusion, if any. */
-  public Collator getCollator() { return query.getCollator(); }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java Tue Mar 15 21:35:17 2011
@@ -18,11 +18,11 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.text.Collator;
 
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ToStringUtils;
 
 /**
@@ -30,7 +30,7 @@ import org.apache.lucene.util.ToStringUt
  *
  * <p>This query matches the documents looking for terms that fall into the
  * supplied range according to {@link
- * String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
+ * Byte#compareTo(Byte)}. It is not intended
  * for numerical ranges; use {@link NumericRangeQuery} instead.
  *
  * <p>This query uses the {@link
@@ -40,9 +40,8 @@ import org.apache.lucene.util.ToStringUt
  */
 
 public class TermRangeQuery extends MultiTermQuery {
-  private String lowerTerm;
-  private String upperTerm;
-  private Collator collator;
+  private BytesRef lowerTerm;
+  private BytesRef upperTerm;
   private boolean includeLower;
   private boolean includeUpper;
 
@@ -69,78 +68,48 @@ public class TermRangeQuery extends Mult
    *          If true, the <code>upperTerm</code> is
    *          included in the range.
    */
-  public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
-    this(field, lowerTerm, upperTerm, includeLower, includeUpper, null);
-  }
-
-  /** Constructs a query selecting all terms greater/equal than
-   * <code>lowerTerm</code> but less/equal than <code>upperTerm</code>.
-   * <p>
-   * If an endpoint is null, it is said 
-   * to be "open". Either or both endpoints may be open.  Open endpoints may not 
-   * be exclusive (you can't select all but the first or last term without 
-   * explicitly specifying the term to exclude.)
-   * <p>
-   * If <code>collator</code> is not null, it will be used to decide whether
-   * index terms are within the given range, rather than using the Unicode code
-   * point order in which index terms are stored.
-   * <p>
-   * <strong>WARNING:</strong> Using this constructor and supplying a non-null
-   * value in the <code>collator</code> parameter will cause every single 
-   * index Term in the Field referenced by lowerTerm and/or upperTerm to be
-   * examined.  Depending on the number of index Terms in this Field, the 
-   * operation could be very slow.
-   *
-   * @param lowerTerm The Term text at the lower end of the range
-   * @param upperTerm The Term text at the upper end of the range
-   * @param includeLower
-   *          If true, the <code>lowerTerm</code> is
-   *          included in the range.
-   * @param includeUpper
-   *          If true, the <code>upperTerm</code> is
-   *          included in the range.
-   * @param collator The collator to use to collate index Terms, to determine
-   *  their membership in the range bounded by <code>lowerTerm</code> and
-   *  <code>upperTerm</code>.
-   */
-  public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper,
-                    Collator collator) {
+  public TermRangeQuery(String field, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
     super(field);
     this.lowerTerm = lowerTerm;
     this.upperTerm = upperTerm;
     this.includeLower = includeLower;
     this.includeUpper = includeUpper;
-    this.collator = collator;
+  }
+
+  /**
+   * Factory that creates a new TermRangeQuery using Strings for term text.
+   */
+  public static TermRangeQuery newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
+    BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm);
+    BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm);
+    return new TermRangeQuery(field, lower, upper, includeLower, includeUpper);
   }
 
   /** Returns the lower value of this range query */
-  public String getLowerTerm() { return lowerTerm; }
+  public BytesRef getLowerTerm() { return lowerTerm; }
 
   /** Returns the upper value of this range query */
-  public String getUpperTerm() { return upperTerm; }
+  public BytesRef getUpperTerm() { return upperTerm; }
   
   /** Returns <code>true</code> if the lower endpoint is inclusive */
   public boolean includesLower() { return includeLower; }
   
   /** Returns <code>true</code> if the upper endpoint is inclusive */
   public boolean includesUpper() { return includeUpper; }
-
-  /** Returns the collator used to determine range inclusion, if any. */
-  public Collator getCollator() { return collator; }
   
   @Override
   protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
-    if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
+    if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
       return TermsEnum.EMPTY;
     }
     
     TermsEnum tenum = terms.iterator();
     
-    if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) {
+    if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
       return tenum;
     }
     return new TermRangeTermsEnum(tenum,
-        lowerTerm, upperTerm, includeLower, includeUpper, collator);
+        lowerTerm, upperTerm, includeLower, includeUpper);
   }
 
   /** Prints a user-readable version of this query. */
@@ -152,9 +121,10 @@ public class TermRangeQuery extends Mult
           buffer.append(":");
       }
       buffer.append(includeLower ? '[' : '{');
-      buffer.append(lowerTerm != null ? ("*".equals(lowerTerm) ? "\\*" : lowerTerm)  : "*");
+      // TODO: all these toStrings for queries should just output the bytes, it might not be UTF-8!
+      buffer.append(lowerTerm != null ? ("*".equals(lowerTerm.utf8ToString()) ? "\\*" : lowerTerm.utf8ToString())  : "*");
       buffer.append(" TO ");
-      buffer.append(upperTerm != null ? ("*".equals(upperTerm) ? "\\*" : upperTerm) : "*");
+      buffer.append(upperTerm != null ? ("*".equals(upperTerm.utf8ToString()) ? "\\*" : upperTerm.utf8ToString()) : "*");
       buffer.append(includeUpper ? ']' : '}');
       buffer.append(ToStringUtils.boost(getBoost()));
       return buffer.toString();
@@ -164,7 +134,6 @@ public class TermRangeQuery extends Mult
   public int hashCode() {
     final int prime = 31;
     int result = super.hashCode();
-    result = prime * result + ((collator == null) ? 0 : collator.hashCode());
     result = prime * result + (includeLower ? 1231 : 1237);
     result = prime * result + (includeUpper ? 1231 : 1237);
     result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
@@ -181,11 +150,6 @@ public class TermRangeQuery extends Mult
     if (getClass() != obj.getClass())
       return false;
     TermRangeQuery other = (TermRangeQuery) obj;
-    if (collator == null) {
-      if (other.collator != null)
-        return false;
-    } else if (!collator.equals(other.collator))
-      return false;
     if (includeLower != other.includeLower)
       return false;
     if (includeUpper != other.includeUpper)

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java Tue Mar 15 21:35:17 2011
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.text.Collator;
 import java.util.Comparator;
 
 import org.apache.lucene.index.TermsEnum;
@@ -33,11 +32,8 @@ import org.apache.lucene.util.BytesRef;
  */
 public class TermRangeTermsEnum extends FilteredTermsEnum {
 
-  private Collator collator;
-  private String upperTermText;
-  private String lowerTermText;
-  private boolean includeLower;
-  private boolean includeUpper;
+  final private boolean includeLower;
+  final private boolean includeUpper;
   final private BytesRef lowerBytesRef;
   final private BytesRef upperBytesRef;
   private final Comparator<BytesRef> termComp;
@@ -53,79 +49,61 @@ public class TermRangeTermsEnum extends 
    * 
    * @param tenum
    *          TermsEnum to filter
-   * @param lowerTermText
+   * @param lowerTerm
    *          The term text at the lower end of the range
-   * @param upperTermText
+   * @param upperTerm
    *          The term text at the upper end of the range
    * @param includeLower
    *          If true, the <code>lowerTerm</code> is included in the range.
    * @param includeUpper
    *          If true, the <code>upperTerm</code> is included in the range.
-   * @param collator
-   *          The collator to use to collate index Terms, to determine their
-   *          membership in the range bounded by <code>lowerTerm</code> and
-   *          <code>upperTerm</code>.
    * 
    * @throws IOException
    */
-  public TermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText, 
-    boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
+  public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm, 
+    boolean includeLower, boolean includeUpper) throws IOException {
     super(tenum);
-    this.collator = collator;
-    this.upperTermText = upperTermText;
-    this.lowerTermText = lowerTermText;
-    this.includeLower = includeLower;
-    this.includeUpper = includeUpper;
 
     // do a little bit of normalization...
     // open ended range queries should always be inclusive.
-    if (this.lowerTermText == null) {
-      this.lowerTermText = "";
+    if (lowerTerm == null) {
+      this.lowerBytesRef = new BytesRef();
       this.includeLower = true;
+    } else {
+      this.lowerBytesRef = lowerTerm;
+      this.includeLower = includeLower;
     }
-    lowerBytesRef = new BytesRef(this.lowerTermText);
 
-    if (this.upperTermText == null) {
+    if (upperTerm == null) {
       this.includeUpper = true;
       upperBytesRef = null;
     } else {
-      upperBytesRef = new BytesRef(upperTermText);
+      this.includeUpper = includeUpper;
+      upperBytesRef = upperTerm;
     }
 
-    BytesRef startBytesRef = (collator == null) ? lowerBytesRef : new BytesRef("");
-    setInitialSeekTerm(startBytesRef);
+    setInitialSeekTerm(lowerBytesRef);
     termComp = getComparator();
   }
 
   @Override
   protected AcceptStatus accept(BytesRef term) {
-    if (collator == null) {
-      if (!this.includeLower && term.equals(lowerBytesRef))
-        return AcceptStatus.NO;
-      // Use this field's default sort ordering
-      if (upperBytesRef != null) {
-        final int cmp = termComp.compare(upperBytesRef, term);
-        /*
-         * if beyond the upper term, or is exclusive and this is equal to
-         * the upper term, break out
-         */
-        if ((cmp < 0) ||
-            (!includeUpper && cmp==0)) {
-          return AcceptStatus.END;
-        }
-      }
-      return AcceptStatus.YES;
-    } else {
-      if ((includeLower
-           ? collator.compare(term.utf8ToString(), lowerTermText) >= 0
-           : collator.compare(term.utf8ToString(), lowerTermText) > 0)
-          && (upperTermText == null
-              || (includeUpper
-                  ? collator.compare(term.utf8ToString(), upperTermText) <= 0
-                  : collator.compare(term.utf8ToString(), upperTermText) < 0))) {
-        return AcceptStatus.YES;
-      }
+    if (!this.includeLower && term.equals(lowerBytesRef))
       return AcceptStatus.NO;
+    
+    // Use this field's default sort ordering
+    if (upperBytesRef != null) {
+      final int cmp = termComp.compare(upperBytesRef, term);
+      /*
+       * if beyond the upper term, or is exclusive and this is equal to
+       * the upper term, break out
+       */
+      if ((cmp < 0) ||
+          (!includeUpper && cmp==0)) {
+        return AcceptStatus.END;
+      }
     }
+
+    return AcceptStatus.YES;
   }
 }