You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/13 13:18:25 UTC

svn commit: r1102677 [3/6] - in /lucene/dev/branches/docvalues: ./ dev-tools/eclipse/ dev-tools/maven/ dev-tools/maven/solr/contrib/dataimporthandler/src/extras/ dev-tools/maven/solr/src/ dev-tools/maven/solr/src/solrj/ dev-tools/scripts/ lucene/ lucen...

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Fri May 13 11:18:19 2011
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Comparator;
 import java.util.List;
 import java.util.Set;
 
@@ -595,7 +594,7 @@ public abstract class LogMergePolicy ext
         } else if (!anyTooLarge) {
           if (spec == null)
             spec = new MergeSpecification();
-          final SegmentInfos mergeInfos = new SegmentInfos();
+          final List<SegmentInfo> mergeInfos = new ArrayList<SegmentInfo>();
           for(int i=start;i<end;i++) {
             mergeInfos.add(levels.get(i).info);
             assert infos.contains(levels.get(i).info);

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java Fri May 13 11:18:19 2011
@@ -32,7 +32,7 @@ final class MergeDocIDRemapper {
 
   public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) {
     this.docMaps = docMaps;
-    SegmentInfo firstSegment = merge.segments.info(0);
+    SegmentInfo firstSegment = merge.segments.get(0);
     int i = 0;
     while(true) {
       SegmentInfo info = infos.info(i);
@@ -45,7 +45,7 @@ final class MergeDocIDRemapper {
     int numDocs = 0;
     for(int j=0;j<docMaps.length;i++,j++) {
       numDocs += infos.info(i).docCount;
-      assert infos.info(i).equals(merge.segments.info(j));
+      assert infos.info(i).equals(merge.segments.get(j));
     }
     maxDocID = minDocID + numDocs;
 
@@ -55,7 +55,7 @@ final class MergeDocIDRemapper {
     starts[0] = minDocID;
     newStarts[0] = minDocID;
     for(i=1;i<docMaps.length;i++) {
-      final int lastDocCount = merge.segments.info(i-1).docCount;
+      final int lastDocCount = merge.segments.get(i-1).docCount;
       starts[i] = starts[i-1] + lastDocCount;
       newStarts[i] = newStarts[i-1] + lastDocCount - delCounts[i-1];
     }
@@ -69,7 +69,7 @@ final class MergeDocIDRemapper {
     // assert docShift > 0;
 
     // Make sure it all adds up:
-    assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.info(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
+    assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.get(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
   }
 
   public int remap(int oldDocID) {

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Fri May 13 11:18:19 2011
@@ -75,15 +75,21 @@ public abstract class MergePolicy implem
     long estimatedMergeBytes;       // used by IndexWriter
     List<SegmentReader> readers;        // used by IndexWriter
     List<SegmentReader> readerClones;   // used by IndexWriter
-    public final SegmentInfos segments;
+    public final List<SegmentInfo> segments;
+    public final int totalDocCount;
     boolean aborted;
     Throwable error;
     boolean paused;
 
-    public OneMerge(SegmentInfos segments) {
+    public OneMerge(List<SegmentInfo> segments) {
       if (0 == segments.size())
         throw new RuntimeException("segments must include at least one segment");
       this.segments = segments;
+      int count = 0;
+      for(SegmentInfo info : segments) {
+        count += info.docCount;
+      }
+      totalDocCount = count;
     }
 
     /** Record that an exception occurred while executing
@@ -147,7 +153,7 @@ public abstract class MergePolicy implem
       final int numSegments = segments.size();
       for(int i=0;i<numSegments;i++) {
         if (i > 0) b.append(' ');
-        b.append(segments.info(i).toString(dir, 0));
+        b.append(segments.get(i).toString(dir, 0));
       }
       if (info != null)
         b.append(" into ").append(info.name);

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Fri May 13 11:18:19 2011
@@ -43,7 +43,8 @@ import org.apache.lucene.util.Constants;
  * @lucene.experimental
  */
 public final class SegmentInfo {
-
+  // TODO: remove with hasVector and hasProx
+  private static final int CHECK_FIELDINFO = -2;
   static final int NO = -1;          // e.g. no norms; no deletes;
   static final int YES = 1;          // e.g. have norms; have deletes;
   static final int WITHOUT_GEN = 0;  // a file name that has no GEN in it.
@@ -85,10 +86,12 @@ public final class SegmentInfo {
   private boolean docStoreIsCompoundFile;         // whether doc store files are stored in compound file (*.cfx)
 
   private int delCount;                           // How many deleted docs in this segment
+  
+  //TODO: remove when we don't have to support old indexes anymore that had this field
+  private int hasVectors = CHECK_FIELDINFO;
+  //TODO: remove when we don't have to support old indexes anymore that had this field
+  private int hasProx = CHECK_FIELDINFO;     // True if this segment has any fields with omitTermFreqAndPositions==false
 
-  private boolean hasProx;                        // True if this segment has any fields with omitTermFreqAndPositions==false
-
-  private boolean hasVectors;                     // True if this segment wrote term vectors
   
   private FieldInfos fieldInfos;
 
@@ -106,9 +109,12 @@ public final class SegmentInfo {
   // NOTE: only used in-RAM by IW to track buffered deletes;
   // this is never written to/read from the Directory
   private long bufferedDeletesGen;
-
+  
+  // holds the fieldInfos Version to refresh files() cache if FI has changed
+  private long fieldInfosVersion;
+  
   public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
-                     boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
+                     SegmentCodecs segmentCodecs, FieldInfos fieldInfos) {
     this.name = name;
     this.docCount = docCount;
     this.dir = dir;
@@ -116,9 +122,7 @@ public final class SegmentInfo {
     this.isCompoundFile = isCompoundFile;
     this.docStoreOffset = -1;
     this.docStoreSegment = name;
-    this.hasProx = hasProx;
     this.segmentCodecs = segmentCodecs;
-    this.hasVectors = hasVectors;
     delCount = 0;
     version = Constants.LUCENE_MAIN_VERSION;
     this.fieldInfos = fieldInfos;
@@ -213,7 +217,7 @@ public final class SegmentInfo {
     delCount = input.readInt();
     assert delCount <= docCount;
 
-    hasProx = input.readByte() == YES;
+    hasProx = input.readByte();
 
     // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
     if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
@@ -226,7 +230,7 @@ public final class SegmentInfo {
     diagnostics = input.readStringStringMap();
 
     if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
-      hasVectors = input.readByte() == 1;
+      hasVectors = input.readByte();
     } else {
       final String storesSegment;
       final String ext;
@@ -247,7 +251,7 @@ public final class SegmentInfo {
         dirToTest = dir;
       }
       try {
-        hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
+        hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)) ? YES : NO;
       } finally {
         if (isCompoundFile) {
           dirToTest.close();
@@ -311,14 +315,9 @@ public final class SegmentInfo {
   }
 
   public boolean getHasVectors() throws IOException {
-    return hasVectors;
-  }
-
-  public void setHasVectors(boolean v) {
-    hasVectors = v;
-    clearFilesCache();
+    return hasVectors == CHECK_FIELDINFO ? getFieldInfos().hasVectors() : hasVectors == YES;
   }
-
+  
   public FieldInfos getFieldInfos() throws IOException {
     loadFieldInfos(dir, true);
     return fieldInfos;
@@ -349,7 +348,7 @@ public final class SegmentInfo {
 
   @Override
   public Object clone() {
-    final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, hasProx, segmentCodecs, hasVectors,
+    final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, segmentCodecs,
         fieldInfos == null ? null : (FieldInfos) fieldInfos.clone());
     si.docStoreOffset = docStoreOffset;
     si.docStoreSegment = docStoreSegment;
@@ -364,6 +363,8 @@ public final class SegmentInfo {
       }
     }
     si.version = version;
+    si.hasProx = hasProx;
+    si.hasVectors = hasVectors;
     return si;
   }
 
@@ -569,19 +570,14 @@ public final class SegmentInfo {
 
     output.writeByte((byte) (isCompoundFile ? YES : NO));
     output.writeInt(delCount);
-    output.writeByte((byte) (hasProx ? 1:0));
+    output.writeByte((byte) (hasProx));
     segmentCodecs.write(output);
     output.writeStringStringMap(diagnostics);
-    output.writeByte((byte) (hasVectors ? 1 : 0));
-  }
-
-  void setHasProx(boolean hasProx) {
-    this.hasProx = hasProx;
-    clearFilesCache();
+    output.writeByte((byte) (hasVectors));
   }
 
-  public boolean getHasProx() {
-    return hasProx;
+  public boolean getHasProx() throws IOException {
+    return hasProx == CHECK_FIELDINFO ? getFieldInfos().hasProx() : hasProx == YES;
   }
 
   /** Can only be called once. */
@@ -609,13 +605,14 @@ public final class SegmentInfo {
    */
 
   public List<String> files() throws IOException {
-
-    if (files != null) {
+    final long fisVersion = fieldInfosVersion;
+    if (fisVersion != (fieldInfosVersion = getFieldInfos().getVersion())) {
+      clearFilesCache(); // FIS has modifications - need to recompute
+    } else if (files != null) {
       // Already cached:
       return files;
     }
-
-    Set<String> fileSet = new HashSet<String>();
+    final Set<String> fileSet = new HashSet<String>();
 
     boolean useCompoundFile = getUseCompoundFile();
 
@@ -637,7 +634,7 @@ public final class SegmentInfo {
       } else {
         fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
         fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_EXTENSION));
-        if (hasVectors) {
+        if (getHasVectors()) {
           fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
           fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
           fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@@ -646,7 +643,7 @@ public final class SegmentInfo {
     } else if (!useCompoundFile) {
       fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
       fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_EXTENSION));
-      if (hasVectors) {
+      if (getHasVectors()) {
         fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
         fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
         fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@@ -709,8 +706,12 @@ public final class SegmentInfo {
     if (this.dir != dir) {
       s.append('x');
     }
-    if (hasVectors) {
-      s.append('v');
+    try {
+      if (getHasVectors()) {
+        s.append('v');
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
     s.append(docCount);
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Fri May 13 11:18:19 2011
@@ -72,7 +72,7 @@ final class SegmentMerger {
 
   private PayloadProcessorProvider payloadProcessorProvider;
 
-  SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
+  SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
     this.payloadProcessorProvider = payloadProcessorProvider;
     directory = dir;
     segment = name;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Fri May 13 11:18:19 2011
@@ -32,7 +32,6 @@ public class SegmentWriteState {
   public final String segmentName;
   public final FieldInfos fieldInfos;
   public final int numDocs;
-  public boolean hasVectors;
 
   // Deletes to apply while we are flushing the segment.  A
   // Term is enrolled in here if it was deleted at one

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Fri May 13 11:18:19 2011
@@ -63,7 +63,6 @@ final class TermVectorsTermsWriter exten
       }
 
       lastDocID = 0;
-      state.hasVectors = hasVectors;
       hasVectors = false;
     }
 
@@ -121,8 +120,7 @@ final class TermVectorsTermsWriter exten
     fill(docState.docID);
 
     // Append term vectors to the real outputs:
-    long pointer = tvd.getFilePointer();
-    tvx.writeLong(pointer);
+    tvx.writeLong(tvd.getFilePointer());
     tvx.writeLong(tvf.getFilePointer());
     tvd.writeVInt(numVectorFields);
     if (numVectorFields > 0) {
@@ -136,6 +134,8 @@ final class TermVectorsTermsWriter exten
         tvd.writeVLong(pos-lastPos);
         lastPos = pos;
         perFields[i].finishDocument();
+        // commit the termVectors once successful success - FI will otherwise reset them
+        perFields[i].fieldInfo.commitVectors();
       }
     }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java Fri May 13 11:18:19 2011
@@ -23,6 +23,8 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Comparator;
+import java.util.List;
+import java.util.ArrayList;
 
 /**
  *  Merges segments of approximately equal size, subject to
@@ -249,7 +251,7 @@ public class TieredMergePolicy extends M
     final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
     final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
 
-    final SegmentInfos infosSorted = new SegmentInfos();
+    final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
     infosSorted.addAll(infos);
 
     Collections.sort(infosSorted, segmentByteSizeDescending);
@@ -277,7 +279,7 @@ public class TieredMergePolicy extends M
     // If we have too-large segments, grace them out
     // of the maxSegmentCount:
     int tooBigCount = 0;
-    while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
+    while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
       totIndexBytes -= size(infosSorted.get(tooBigCount));
       tooBigCount++;
     }
@@ -310,7 +312,7 @@ public class TieredMergePolicy extends M
       // Gather eligible segments for merging, ie segments
       // not already being merged and not already picked (by
       // prior iteration of this loop) for merging:
-      final SegmentInfos eligible = new SegmentInfos();
+      final List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
       for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
         final SegmentInfo info = infosSorted.get(idx);
         if (merging.contains(info)) {
@@ -332,7 +334,7 @@ public class TieredMergePolicy extends M
 
         // OK we are over budget -- find best merge!
         MergeScore bestScore = null;
-        SegmentInfos best = null;
+        List<SegmentInfo> best = null;
         boolean bestTooLarge = false;
         long bestMergeBytes = 0;
 
@@ -341,10 +343,10 @@ public class TieredMergePolicy extends M
 
           long totAfterMergeBytes = 0;
 
-          final SegmentInfos candidate = new SegmentInfos();
+          final List<SegmentInfo> candidate = new ArrayList<SegmentInfo>();
           boolean hitTooLarge = false;
           for(int idx = startIdx;idx<eligible.size() && candidate.size() < maxMergeAtOnce;idx++) {
-            final SegmentInfo info = eligible.info(idx);
+            final SegmentInfo info = eligible.get(idx);
             final long segBytes = size(info);
 
             if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
@@ -398,7 +400,7 @@ public class TieredMergePolicy extends M
   }
 
   /** Expert: scores one merge; subclasses can override. */
-  protected MergeScore score(SegmentInfos candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
+  protected MergeScore score(List<SegmentInfo> candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
     long totBeforeMergeBytes = 0;
     long totAfterMergeBytes = 0;
     long totAfterMergeBytesFloored = 0;
@@ -420,7 +422,7 @@ public class TieredMergePolicy extends M
       // over time:
       skew = 1.0/maxMergeAtOnce;
     } else {
-      skew = ((double) floorSize(size(candidate.info(0))))/totAfterMergeBytesFloored;
+      skew = ((double) floorSize(size(candidate.get(0))))/totAfterMergeBytesFloored;
     }
 
     // Strongly favor merges with less skew (smaller
@@ -458,7 +460,8 @@ public class TieredMergePolicy extends M
     if (verbose()) {
       message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
     }
-    SegmentInfos eligible = new SegmentInfos();
+
+    List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
     boolean optimizeMergeRunning = false;
     final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
     for(SegmentInfo info : infos) {
@@ -499,7 +502,7 @@ public class TieredMergePolicy extends M
       if (spec == null) {
         spec = new MergeSpecification();
       }
-      final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end));
+      final OneMerge merge = new OneMerge(eligible.subList(end-maxMergeAtOnceExplicit, end));
       if (verbose()) {
         message("add merge=" + writer.get().segString(merge.segments));
       }
@@ -510,7 +513,7 @@ public class TieredMergePolicy extends M
     if (spec == null && !optimizeMergeRunning) {
       // Do final merge
       final int numToMerge = end - maxSegmentCount + 1;
-      final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end));
+      final OneMerge merge = new OneMerge(eligible.subList(end-numToMerge, end));
       if (verbose()) {
         message("add final merge=" + merge.segString(writer.get().getDirectory()));
       }
@@ -527,7 +530,7 @@ public class TieredMergePolicy extends M
     if (verbose()) {
       message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed);
     }
-    final SegmentInfos eligible = new SegmentInfos();
+    final List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
     final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
     for(SegmentInfo info : infos) {
       double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount;
@@ -580,7 +583,7 @@ public class TieredMergePolicy extends M
         spec = new MergeSpecification();
       }
 
-      final OneMerge merge = new OneMerge(eligible.range(start, upto));
+      final OneMerge merge = new OneMerge(eligible.subList(start, upto));
       if (verbose()) {
         message("add merge=" + writer.get().segString(merge.segments));
       }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Fri May 13 11:18:19 2011
@@ -72,6 +72,11 @@ public class CodecProvider {
       }
     }
   }
+  
+  /** @lucene.internal */
+  public synchronized Set<String> listAll() {
+    return codecs.keySet();
+  }
 
   public Collection<String> getAllExtensions() {
     return knownExtensions;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Fri May 13 11:18:19 2011
@@ -68,15 +68,8 @@ public class PulsingPostingsReaderImpl e
 
     @Override
     public Object clone() {
-      PulsingTermState clone;
-      clone = (PulsingTermState) super.clone();
-      if (postingsSize != -1) {
-        clone.postings = new byte[postingsSize];
-        System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
-      } else {
-        assert wrappedTermState != null;
-        clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
-      }
+      PulsingTermState clone = new PulsingTermState();
+      clone.copyFrom(this);
       return clone;
     }
 
@@ -90,8 +83,10 @@ public class PulsingPostingsReaderImpl e
           postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
         }
         System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
-      } else {
+      } else if (wrappedTermState != null) {
         wrappedTermState.copyFrom(other.wrappedTermState);
+      } else {
+        wrappedTermState = (BlockTermState) other.wrappedTermState.clone();
       }
 
       // NOTE: we do not copy the

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Fri May 13 11:18:19 2011
@@ -85,7 +85,7 @@ public class SepPostingsReaderImpl exten
     }
   }
 
-  public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) {
+  public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) throws IOException {
     files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
     files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
 
@@ -151,14 +151,8 @@ public class SepPostingsReaderImpl exten
 
     @Override
     public Object clone() {
-      SepTermState other = (SepTermState) super.clone();
-      other.docIndex = (IntIndexInput.Index) docIndex.clone();
-      if (freqIndex != null) {
-        other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
-      }
-      if (posIndex != null) {
-        other.posIndex = (IntIndexInput.Index) posIndex.clone();
-      }
+      SepTermState other = new SepTermState();
+      other.copyFrom(this);
       return other;
     }
 
@@ -166,12 +160,28 @@ public class SepPostingsReaderImpl exten
     public void copyFrom(TermState _other) {
       super.copyFrom(_other);
       SepTermState other = (SepTermState) _other;
-      docIndex.set(other.docIndex);
-      if (freqIndex != null && other.freqIndex != null) {
-        freqIndex.set(other.freqIndex);
+      if (docIndex == null) {
+        docIndex = (IntIndexInput.Index) other.docIndex.clone();
+      } else {
+        docIndex.set(other.docIndex);
+      }
+      if (other.freqIndex != null) {
+        if (freqIndex == null) {
+          freqIndex = (IntIndexInput.Index) other.freqIndex.clone();
+        } else {
+          freqIndex.set(other.freqIndex);
+        }
+      } else {
+        freqIndex = null;
       }
-      if (posIndex != null && other.posIndex != null) {
-        posIndex.set(other.posIndex);
+      if (other.posIndex != null) {
+        if (posIndex == null) {
+          posIndex = (IntIndexInput.Index) other.posIndex.clone();
+        } else {
+          posIndex.set(other.posIndex);
+        }
+      } else {
+        posIndex = null;
       }
       payloadFP = other.payloadFP;
       skipFP = other.skipFP;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java Fri May 13 11:18:19 2011
@@ -806,6 +806,7 @@ public abstract class QueryParserBase {
     }
       
     try {
+      source.end();
       source.close();
     } catch (IOException ignored) {}
     

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/HitQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/HitQueue.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/HitQueue.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/HitQueue.java Fri May 13 11:18:19 2011
@@ -21,8 +21,6 @@ import org.apache.lucene.util.PriorityQu
 
 final class HitQueue extends PriorityQueue<ScoreDoc> {
 
-  private boolean prePopulate;
-
   /**
    * Creates a new instance with <code>size</code> elements. If
    * <code>prePopulate</code> is set to true, the queue will pre-populate itself

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Fri May 13 11:18:19 2011
@@ -46,8 +46,18 @@ import org.apache.lucene.util.ThreadInte
  *
  * <p>Applications usually need only call the inherited
  * {@link #search(Query,int)}
- * or {@link #search(Query,Filter,int)} methods. For performance reasons it is 
- * recommended to open only one IndexSearcher and use it for all of your searches.
+ * or {@link #search(Query,Filter,int)} methods. For
+ * performance reasons, if your index is unchanging, you
+ * should share a single IndexSearcher instance across
+ * multiple searches instead of creating a new one
+ * per-search.  If your index has changed and you wish to
+ * see the changes reflected in searching, you should
+ * use {@link IndexReader#reopen} to obtain a new reader and
+ * then create a new IndexSearcher from that.  Also, for
+ * low-latency turnaround it's best to use a near-real-time
+ * reader ({@link IndexReader#open(IndexWriter,boolean)}).
+ * Once you have a new {@link IndexReader}, it's relatively
+ * cheap to create a new IndexSearcher from it.
  * 
  * <a name="thread-safety"></a><p><b>NOTE</b>: <code>{@link
  * IndexSearcher}</code> instances are completely

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Fri May 13 11:18:19 2011
@@ -214,12 +214,12 @@ public class MultiPhraseQuery extends Qu
           docFreq = reader.docFreq(term.field(), term.bytes());
         }
 
-        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
+        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
       }
 
       // sort by increasing docFreq order
       if (slop == 0) {
-        ArrayUtil.quickSort(postingsFreqs);
+        ArrayUtil.mergeSort(postingsFreqs);
       }
 
       if (slop == 0) {

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhrasePositions.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhrasePositions.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhrasePositions.java Fri May 13 11:18:19 2011
@@ -28,13 +28,15 @@ final class PhrasePositions {
   int position;					  // position in doc
   int count;					  // remaining pos in this doc
   int offset;					  // position in phrase
+  final int ord;                                  // unique across all PhrasePositions instances
   final DocsAndPositionsEnum postings;  	  // stream of docs & positions
   PhrasePositions next;	                          // used to make lists
   boolean repeats;       // there's other pp for same term (e.g. query="1st word 2nd word"~1) 
 
-  PhrasePositions(DocsAndPositionsEnum postings, int o) {
+  PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
     this.postings = postings;
     offset = o;
+    this.ord = ord;
   }
 
   final boolean next() throws IOException {	  // increments to next doc

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Fri May 13 11:18:19 2011
@@ -124,16 +124,48 @@ public class PhraseQuery extends Query {
     final DocsAndPositionsEnum postings;
     final int docFreq;
     final int position;
+    final Term term;
 
-    public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) {
+    public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
       this.postings = postings;
       this.docFreq = docFreq;
       this.position = position;
+      this.term = term;
     }
 
     public int compareTo(PostingsAndFreq other) {
+      if (docFreq == other.docFreq) {
+        if (position == other.position) {
+          return term.compareTo(other.term);
+        }
+        return position - other.position;
+      }
       return docFreq - other.docFreq;
     }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + docFreq;
+      result = prime * result + position;
+      result = prime * result + ((term == null) ? 0 : term.hashCode());
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) return true;
+      if (obj == null) return false;
+      if (getClass() != obj.getClass()) return false;
+      PostingsAndFreq other = (PostingsAndFreq) obj;
+      if (docFreq != other.docFreq) return false;
+      if (position != other.position) return false;
+      if (term == null) {
+        if (other.term != null) return false;
+      } else if (!term.equals(other.term)) return false;
+      return true;
+    }
   }
 
   private class PhraseWeight extends Weight {
@@ -197,12 +229,12 @@ public class PhraseQuery extends Query {
             return null;
           }
         }
-        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
+        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t);
       }
 
       // sort by increasing docFreq order
       if (slop == 0) {
-        ArrayUtil.quickSort(postingsFreqs);
+        ArrayUtil.mergeSort(postingsFreqs);
       }
 
       if (slop == 0) {				  // optimize exact case

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQueue.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQueue.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQueue.java Fri May 13 11:18:19 2011
@@ -30,10 +30,16 @@ final class PhraseQueue extends Priority
       if (pp1.position == pp2.position)
         // same doc and pp.position, so decide by actual term positions. 
         // rely on: pp.position == tp.position - offset. 
-        return pp1.offset < pp2.offset;
-      else
+        if (pp1.offset == pp2.offset) {
+          return pp1.ord < pp2.ord;
+        } else {
+          return pp1.offset < pp2.offset;
+        }
+      else {
         return pp1.position < pp2.position;
-    else
+      }
+    else {
       return pp1.doc < pp2.doc;
+    }
   }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseScorer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseScorer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseScorer.java Fri May 13 11:18:19 2011
@@ -55,7 +55,7 @@ abstract class PhraseScorer extends Scor
     // this allows to easily identify a matching (exact) phrase 
     // when all PhrasePositions have exactly the same position.
     for (int i = 0; i < postings.length; i++) {
-      PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position);
+      PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
       if (last != null) {			  // add next to end of list
         last.next = pp;
       } else {

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java Fri May 13 11:18:19 2011
@@ -134,7 +134,7 @@ public abstract class TopTermsRewrite<Q 
     final Term placeholderTerm = new Term(query.field);
     final Q q = getTopLevelQuery();
     final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
-    ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp);
+    ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
     for (final ScoreTerm st : scoreTerms) {
       final Term term = placeholderTerm.createTerm(st.bytes);
       assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Fri May 13 11:18:19 2011
@@ -190,7 +190,7 @@ public class NearSpansOrdered extends Sp
 
   /** Advance the subSpans to the same document */
   private boolean toSameDoc() throws IOException {
-    ArrayUtil.quickSort(subSpansByDoc, spanDocComparator);
+    ArrayUtil.mergeSort(subSpansByDoc, spanDocComparator);
     int firstIndex = 0;
     int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
     while (subSpansByDoc[firstIndex].doc() != maxDoc) {

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/SorterTemplate.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/SorterTemplate.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/SorterTemplate.java Fri May 13 11:18:19 2011
@@ -62,13 +62,26 @@ public abstract class SorterTemplate {
 
   /** Sorts via in-place, but unstable, QuickSort algorithm.
    * For small collections falls back to {@link #insertionSort(int,int)}. */
-  public final void quickSort(int lo, int hi) {
+  public final void quickSort(final int lo, final int hi) {
+    if (hi <= lo) return;
+    // from Integer's Javadocs: ceil(log2(x)) = 32 - numberOfLeadingZeros(x - 1)
+    quickSort(lo, hi, (Integer.SIZE - Integer.numberOfLeadingZeros(hi - lo)) << 1);
+  }
+  
+  private void quickSort(int lo, int hi, int maxDepth) {
+    // fall back to insertion when array has short length
     final int diff = hi - lo;
     if (diff <= QUICKSORT_THRESHOLD) {
       insertionSort(lo, hi);
       return;
     }
     
+    // fall back to merge sort when recursion depth gets too big
+    if (--maxDepth == 0) {
+      mergeSort(lo, hi);
+      return;
+    }
+    
     final int mid = lo + (diff >>> 1);
     
     if (compare(lo, mid) > 0) {
@@ -101,8 +114,8 @@ public abstract class SorterTemplate {
       }
     }
 
-    quickSort(lo, left);
-    quickSort(left + 1, hi);
+    quickSort(lo, left, maxDepth);
+    quickSort(left + 1, hi, maxDepth);
   }
   
   /** Sorts via stable in-place MergeSort algorithm

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java Fri May 13 11:18:19 2011
@@ -261,9 +261,12 @@ public class Builder<T> {
     add(scratchIntsRef, output);
   }
 
+  /** It's OK to add the same input twice in a row with
+   *  different outputs, as long as outputs impls the merge
+   *  method. */
   public void add(IntsRef input, T output) throws IOException {
     //System.out.println("\nFST ADD: input=" + input + " output=" + fst.outputs.outputToString(output));
-    assert lastInput.length == 0 || input.compareTo(lastInput) > 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
+    assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
     assert validOutput(output);
 
     //System.out.println("\nadd: " + input);
@@ -347,8 +350,15 @@ public class Builder<T> {
       assert validOutput(output);
     }
 
-    // push remaining output:
-    frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+    if (lastInput.length == input.length && prefixLenPlus1 == 1+input.length) {
+      // same input more than 1 time in a row, mapping to
+      // multiple outputs
+      lastNode.output = fst.outputs.merge(lastNode.output, output);
+    } else {
+      // this new arc is private to this new input; set its
+      // arc output to the leftover output:
+      frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+    }
 
     // save last input
     lastInput.copy(input);

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Fri May 13 11:18:19 2011
@@ -231,10 +231,13 @@ public class FST<T> {
   }
 
   void setEmptyOutput(T v) throws IOException {
-    if (emptyOutput != null && !emptyOutput.equals(v)) {
-      throw new IllegalStateException("empty output is already set: " + outputs.outputToString(emptyOutput) + " vs " + outputs.outputToString(v));
+    if (emptyOutput != null) {
+      if (!emptyOutput.equals(v)) {
+        emptyOutput = outputs.merge(emptyOutput, v);
+      }
+    } else {
+      emptyOutput = v;
     }
-    emptyOutput = v;
 
     // TODO: this is messy -- replace with sillyBytesWriter; maybe make
     // bytes private
@@ -446,25 +449,17 @@ public class FST<T> {
     // reverse bytes in-place; we do this so that the
     // "BIT_TARGET_NEXT" opto can work, ie, it reads the
     // node just before the current one
-    final int endAddress = writer.posWrite;
-    final int stopAt = (endAddress - startAddress)/2;
-    int upto = 0;
-    while (upto < stopAt) {
-      final byte b = bytes[startAddress+upto];
-      bytes[startAddress+upto] = bytes[endAddress-upto-1];
-      bytes[endAddress-upto-1] = b;
-      upto++;
-    }
+    final int endAddress = lastFrozenNode = writer.posWrite - 1;
 
-    lastFrozenNode = endAddress - 1;
-    /*
-    System.out.println("  return node addr=" + (endAddress-1));
-    for(int i=endAddress-1;i>=startAddress;i--) {
-      System.out.println("    bytes[" + i + "]=" + bytes[i]);
+    int left = startAddress;
+    int right = endAddress;
+    while (left < right) {
+      final byte b = bytes[left];
+      bytes[left++] = bytes[right];
+      bytes[right--] = b;
     }
-    */
 
-    return endAddress-1;
+    return endAddress;
   }
 
   /** Fills virtual 'start' arc, ie, an empty incoming arc to

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java Fri May 13 11:18:19 2011
@@ -140,7 +140,7 @@ abstract class FSTEnum<T> {
         // Arcs are fixed array -- use binary search to find
         // the target.
 
-        final FST.BytesReader in = fst.getBytesReader(0);
+        final FST<T>.BytesReader in = fst.getBytesReader(0);
         int low = arc.arcIdx;
         int high = arc.numArcs-1;
         int mid = 0;
@@ -278,7 +278,7 @@ abstract class FSTEnum<T> {
         // Arcs are fixed array -- use binary search to find
         // the target.
 
-        final FST.BytesReader in = fst.getBytesReader(0);
+        final FST<T>.BytesReader in = fst.getBytesReader(0);
         int low = arc.arcIdx;
         int high = arc.numArcs-1;
         int mid = 0;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java Fri May 13 11:18:19 2011
@@ -40,7 +40,7 @@ final class NodeHash<T> {
       return false;
     }
     for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
-      final Builder.Arc arc = node.arcs[arcUpto];
+      final Builder.Arc<T> arc = node.arcs[arcUpto];
       if (arc.label != scratchArc.label ||
           !arc.output.equals(scratchArc.output) ||
           ((Builder.CompiledNode) arc.target).address != scratchArc.target ||

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java Fri May 13 11:18:19 2011
@@ -54,4 +54,8 @@ public abstract class Outputs<T> {
   public abstract T getNoOutput();
 
   public abstract String outputToString(T output);
+
+  public T merge(T first, T second) {
+    throw new UnsupportedOperationException();
+  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java Fri May 13 11:18:19 2011
@@ -43,7 +43,7 @@ public class PairOutputs<A,B> extends Ou
       this.output2 = output2;
     }
 
-    @Override @SuppressWarnings("unchecked")
+    @Override @SuppressWarnings("rawtypes")
     public boolean equals(Object other) {
       if (other == this) {
         return true;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java Fri May 13 11:18:19 2011
@@ -22,14 +22,11 @@ import java.io.IOException;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 
-// TODO: make a sharing and non-sharing variant; eg if you
-// output docFreq per term the FST will be smaller if you
-// don't share since they are not "well shared"
-
 /**
  * Output is a long, for each input term.  NOTE: the
  * resulting FST is not guaranteed to be minimal!  See
- * {@link Builder}.
+ * {@link Builder}.  You cannot store 0 output with this
+ * (that's reserved to mean "no output")!
  * @lucene.experimental
  */
 

Modified: lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/fileformats.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/fileformats.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/fileformats.xml (original)
+++ lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/fileformats.xml Fri May 13 11:18:19 2011
@@ -90,6 +90,14 @@
         <p>
             In version 3.1, segments records the code version
             that created them. See LUCENE-2720 for details.
+            
+            Additionally segments track explicitly whether or
+            not they have term vectors. See LUCENE-2811 for details.
+           </p>
+        <p>
+            In version 3.2, numeric fields are written as natively
+            to stored fields file, previously they were stored in
+            text format only.
            </p>
         </section>
 
@@ -935,7 +943,7 @@
                     <b>3.1</b>
                     Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
                     NormGen<sup>NumField</sup>,
-                    IsCompoundFile, DeletionCount, HasProx, Diagnostics&gt;<sup>SegCount</sup>, CommitUserData, Checksum
+                    IsCompoundFile, DeletionCount, HasProx, Diagnostics, HasVectors&gt;<sup>SegCount</sup>, CommitUserData, Checksum
                 </p>
 
                 <p>
@@ -957,7 +965,7 @@
 
                 <p>
                     IsCompoundFile, HasSingleNormFile,
-                    DocStoreIsCompoundFile, HasProx --&gt; Int8
+                    DocStoreIsCompoundFile, HasProx, HasVectors --&gt; Int8
                 </p>
 
 		<p>
@@ -1083,6 +1091,10 @@
 		    Lucene version, OS, Java version, why the segment
 		    was created (merge, flush, addIndexes), etc.
                 </p>
+         
+        <p> HasVectors is 1 if this segment stores term vectors,
+            else it's 0.
+                </p>
 
             </section>
 
@@ -1293,10 +1305,18 @@
                                 <li>third bit is one for fields with compression option enabled
                                     (if compression is enabled, the algorithm used is ZLIB),
                                     only available for indexes until Lucene version 2.9.x</li>
+                                <li>4th to 6th bits (mask: 0x7&lt;&lt;3) define the type of a
+                                numeric field: <ul>
+                                  <li>all bits in mask are cleared if no numeric field at all</li>
+                                  <li>1&lt;&lt;3: Value is Int</li>
+                                  <li>2&lt;&lt;3: Value is Long</li>
+                                  <li>3&lt;&lt;3: Value is Int as Float (as of Integer.intBitsToFloat)</li>
+                                  <li>4&lt;&lt;3: Value is Long as Double (as of Double.longBitsToDouble)</li>
+                                </ul></li>
                             </ul>
                         </p>
                         <p>Value --&gt;
-                            String | BinaryValue (depending on Bits)
+                            String | BinaryValue | Int | Long (depending on Bits)
                         </p>
                         <p>BinaryValue --&gt;
                             ValueSize, &lt;Byte&gt;^ValueSize

Modified: lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml (original)
+++ lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml Fri May 13 11:18:19 2011
@@ -28,11 +28,11 @@ may wish to skip sections.
 
 <ul>
 	<li><a href="demo.html">About the command-line Lucene demo and its usage</a>.  This section
-	is intended for anyone who wants to use the command-line Lucene demo.</li> <p/>
+	is intended for anyone who wants to use the command-line Lucene demo.</li>
 
 	<li><a href="demo2.html">About the sources and implementation for the command-line Lucene
 	demo</a>.  This section walks through the implementation details (sources) of the
-	command-line Lucene demo.  This section is intended for developers.</li> <p/>
+	command-line Lucene demo.  This section is intended for developers.</li>
 </ul>
 </section>
 

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Fri May 13 11:18:19 2011
@@ -262,6 +262,7 @@ public abstract class BaseTokenStreamTes
         tokens.add(termAtt.toString());
         // TODO: we could collect offsets etc here for better checking that reset() really works.
       }
+      ts.end();
       ts.close();
       // verify reusing is "reproducable" and also get the normal tokenstream sanity checks
       if (!tokens.isEmpty())

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java Fri May 13 11:18:19 2011
@@ -36,6 +36,7 @@ public final class MockAnalyzer extends 
   private int positionIncrementGap;
   private final Random random;
   private Map<String,Integer> previousMappings = new HashMap<String,Integer>();
+  private boolean enableChecks = true;
 
   /**
    * Creates a new MockAnalyzer.
@@ -75,6 +76,7 @@ public final class MockAnalyzer extends 
   @Override
   public TokenStream tokenStream(String fieldName, Reader reader) {
     MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
+    tokenizer.setEnableChecks(enableChecks);
     TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
     filt = maybePayload(filt, fieldName);
     return filt;
@@ -98,13 +100,13 @@ public final class MockAnalyzer extends 
     if (saved == null) {
       saved = new SavedStreams();
       saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
+      saved.tokenizer.setEnableChecks(enableChecks);
       saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
       saved.filter = maybePayload(saved.filter, fieldName);
       map.put(fieldName, saved);
       return saved.filter;
     } else {
       saved.tokenizer.reset(reader);
-      saved.filter.reset();
       return saved.filter;
     }
   }
@@ -139,4 +141,12 @@ public final class MockAnalyzer extends 
   public int getPositionIncrementGap(String fieldName){
     return positionIncrementGap;
   }
+  
+  /** 
+   * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+   * should leave this enabled.
+   */
+  public void setEnableChecks(boolean enableChecks) {
+    this.enableChecks = enableChecks;
+  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java Fri May 13 11:18:19 2011
@@ -86,6 +86,7 @@ final class MockPayloadFilter extends To
 
   @Override
   public void reset() throws IOException {
+    super.reset();
     i = 0;
     pos = 0;
   }

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Fri May 13 11:18:19 2011
@@ -20,14 +20,15 @@ package org.apache.lucene.analysis;
 import java.io.IOException;
 import java.io.Reader;
 
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
 
 /**
  * Automaton-based tokenizer for testing. Optionally lowercases.
  */
-public class MockTokenizer extends CharTokenizer {
+public class MockTokenizer extends Tokenizer {
   /** Acts Similar to WhitespaceTokenizer */
   public static final CharacterRunAutomaton WHITESPACE = 
     new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
@@ -45,21 +46,88 @@ public class MockTokenizer extends CharT
   private final boolean lowerCase;
   private int state;
 
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  int off = 0;
+
+  // TODO: "register" with LuceneTestCase to ensure all streams are closed() ?
+  // currently, we can only check that the lifecycle is correct if someone is reusing,
+  // but not for "one-offs".
+  private static enum State { 
+    SETREADER,       // consumer set a reader input either via ctor or via reset(Reader)
+    RESET,           // consumer has called reset()
+    INCREMENT,       // consumer is consuming, has called incrementToken() == true
+    INCREMENT_FALSE, // consumer has called incrementToken() which returned false
+    END,             // consumer has called end() to perform end of stream operations
+    CLOSE            // consumer has called close() to release any resources
+  };
+  
+  private State streamState = State.CLOSE;
+  private boolean enableChecks = true;
+  
   public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    super(LuceneTestCase.TEST_VERSION_CURRENT, factory, input);
+    super(factory, input);
     this.runAutomaton = runAutomaton;
     this.lowerCase = lowerCase;
     this.state = runAutomaton.getInitialState();
+    this.streamState = State.SETREADER;
   }
 
   public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    super(LuceneTestCase.TEST_VERSION_CURRENT, input);
+    super(input);
     this.runAutomaton = runAutomaton;
     this.lowerCase = lowerCase;
     this.state = runAutomaton.getInitialState();
+    this.streamState = State.SETREADER;
   }
   
   @Override
+  public final boolean incrementToken() throws IOException {
+    assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT) 
+                            : "incrementToken() called while in wrong state: " + streamState;
+    clearAttributes();
+    for (;;) {
+      int startOffset = off;
+      int cp = readCodePoint();
+      if (cp < 0) {
+        break;
+      } else if (isTokenChar(cp)) {
+        int endOffset;
+        do {
+          char chars[] = Character.toChars(normalize(cp));
+          for (int i = 0; i < chars.length; i++)
+            termAtt.append(chars[i]);
+          endOffset = off;
+          cp = readCodePoint();
+        } while (cp >= 0 && isTokenChar(cp));
+        offsetAtt.setOffset(startOffset, endOffset);
+        streamState = State.INCREMENT;
+        return true;
+      }
+    }
+    streamState = State.INCREMENT_FALSE;
+    return false;
+  }
+
+  protected int readCodePoint() throws IOException {
+    int ch = input.read();
+    if (ch < 0) {
+      return ch;
+    } else {
+      assert !Character.isLowSurrogate((char) ch);
+      off++;
+      if (Character.isHighSurrogate((char) ch)) {
+        int ch2 = input.read();
+        if (ch2 >= 0) {
+          off++;
+          assert Character.isLowSurrogate((char) ch2);
+          return Character.toCodePoint((char) ch, (char) ch2);
+        }
+      }
+      return ch;
+    }
+  }
+
   protected boolean isTokenChar(int c) {
     state = runAutomaton.step(state, c);
     if (state < 0) {
@@ -70,7 +138,6 @@ public class MockTokenizer extends CharT
     }
   }
   
-  @Override
   protected int normalize(int c) {
     return lowerCase ? Character.toLowerCase(c) : c;
   }
@@ -79,5 +146,43 @@ public class MockTokenizer extends CharT
   public void reset() throws IOException {
     super.reset();
     state = runAutomaton.getInitialState();
+    off = 0;
+    assert !enableChecks || streamState != State.RESET : "double reset()";
+    streamState = State.RESET;
+  }
+  
+  @Override
+  public void close() throws IOException {
+    super.close();
+    // in some exceptional cases (e.g. TestIndexWriterExceptions) a test can prematurely close()
+    // these tests should disable this check, by default we check the normal workflow.
+    // TODO: investigate the CachingTokenFilter "double-close"... for now we ignore this
+    assert !enableChecks || streamState == State.END || streamState == State.CLOSE : "close() called in wrong state: " + streamState;
+    streamState = State.CLOSE;
+  }
+
+  @Override
+  public void reset(Reader input) throws IOException {
+    super.reset(input);
+    assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState;
+    streamState = State.SETREADER;
+  }
+
+  @Override
+  public void end() throws IOException {
+    int finalOffset = correctOffset(off);
+    offsetAtt.setOffset(finalOffset, finalOffset);
+    // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
+    // these tests should disable this check (in general you should consume the entire stream)
+    assert !enableChecks || streamState == State.INCREMENT_FALSE : "end() called before incrementToken() returned false!";
+    streamState = State.END;
+  }
+
+  /** 
+   * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+   * should leave this enabled.
+   */
+  public void setEnableChecks(boolean enableChecks) {
+    this.enableChecks = enableChecks;
   }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java Fri May 13 11:18:19 2011
@@ -18,7 +18,9 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
+import java.util.List;
 import java.util.Random;
 import java.util.Set;
 
@@ -58,21 +60,36 @@ public class MockRandomMergePolicy exten
       SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize)
     throws CorruptIndexException, IOException {
 
-    //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos);
+    final List<SegmentInfo> eligibleSegments = new ArrayList<SegmentInfo>();
+    for(SegmentInfo info : segmentInfos) {
+      if (segmentsToOptimize.contains(info)) {
+        eligibleSegments.add(info);
+      }
+    }
+
+    //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos + " eligible=" + eligibleSegments);
     MergeSpecification mergeSpec = null;
-    if (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && segmentInfos.info(0).hasDeletions())) {
+    if (eligibleSegments.size() > 1 || (eligibleSegments.size() == 1 && eligibleSegments.get(0).hasDeletions())) {
       mergeSpec = new MergeSpecification();
-      SegmentInfos segmentInfos2 = new SegmentInfos();
-      segmentInfos2.addAll(segmentInfos);
-      Collections.shuffle(segmentInfos2, random);
+      // Already shuffled having come out of a set but
+      // shuffle again for good measure:
+      Collections.shuffle(eligibleSegments, random);
       int upto = 0;
-      while(upto < segmentInfos.size()) {
-        int max = Math.min(10, segmentInfos.size()-upto);
+      while(upto < eligibleSegments.size()) {
+        int max = Math.min(10, eligibleSegments.size()-upto);
         int inc = max <= 2 ? max : _TestUtil.nextInt(random, 2, max);
-        mergeSpec.add(new OneMerge(segmentInfos2.range(upto, upto+inc)));
+        mergeSpec.add(new OneMerge(eligibleSegments.subList(upto, upto+inc)));
         upto += inc;
       }
     }
+
+    if (mergeSpec != null) {
+      for(OneMerge merge : mergeSpec.merges) {
+        for(SegmentInfo info : merge.segments) {
+          assert segmentsToOptimize.contains(info);
+        }
+      }
+    }
     return mergeSpec;
   }
 

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java Fri May 13 11:18:19 2011
@@ -146,6 +146,9 @@ public class MockRandomCodec extends Cod
     out.close();
 
     final Random random = new Random(seed);
+    
+    random.nextInt(); // consume a random for buffersize
+    
     PostingsWriterBase postingsWriter;
 
     if (random.nextBoolean()) {
@@ -244,16 +247,22 @@ public class MockRandomCodec extends Cod
     in.close();
 
     final Random random = new Random(seed);
+    
+    int readBufferSize = _TestUtil.nextInt(random, 1, 4096);
+    if (LuceneTestCase.VERBOSE) {
+      System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize);
+    }
+
     PostingsReaderBase postingsReader;
 
     if (random.nextBoolean()) {
       postingsReader = new SepPostingsReaderImpl(state.dir, state.segmentInfo,
-                                                 state.readBufferSize, new MockIntStreamFactory(random), state.codecId);
+                                                 readBufferSize, new MockIntStreamFactory(random), state.codecId);
     } else {
       if (LuceneTestCase.VERBOSE) {
         System.out.println("MockRandomCodec: reading Standard postings");
       }
-      postingsReader = new StandardPostingsReader(state.dir, state.segmentInfo, state.readBufferSize, state.codecId);
+      postingsReader = new StandardPostingsReader(state.dir, state.segmentInfo, readBufferSize, state.codecId);
     }
 
     if (random.nextBoolean()) {
@@ -318,7 +327,7 @@ public class MockRandomCodec extends Cod
                                                 state.fieldInfos,
                                                 state.segmentInfo.name,
                                                 postingsReader,
-                                                state.readBufferSize,
+                                                readBufferSize,
                                                 termsCacheSize,
                                                 state.codecId);
       success = true;

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java Fri May 13 11:18:19 2011
@@ -25,7 +25,6 @@ import org.apache.lucene.index.SegmentIn
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.codecs.Codec;
-import org.apache.lucene.index.codecs.DocValuesConsumer;
 import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.FieldsProducer;

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java Fri May 13 11:18:19 2011
@@ -71,6 +71,7 @@ public class MockDirectoryWrapper extend
   Set<String> openFilesForWrite = new HashSet<String>();
   volatile boolean crashed;
   private ThrottledIndexOutput throttledOutput;
+  private Throttling throttling = Throttling.SOMETIMES;
 
   // use this for tracking files for crash.
   // additionally: provides debugging information in case you leave one open
@@ -104,6 +105,8 @@ public class MockDirectoryWrapper extend
     // called from different threads; else test failures may
     // not be reproducible from the original seed
     this.randomState = new Random(random.nextInt());
+    this.throttledOutput = new ThrottledIndexOutput(ThrottledIndexOutput
+        .mBitsToBytes(40 + randomState.nextInt(10)), 5 + randomState.nextInt(5), null);
     init();
   }
 
@@ -117,8 +120,17 @@ public class MockDirectoryWrapper extend
     preventDoubleWrite = value;
   }
   
-  public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) {
-    this.throttledOutput = throttledOutput;
+  public static enum Throttling {
+    /** always emulate a slow hard disk. could be very slow! */
+    ALWAYS,
+    /** sometimes (2% of the time) emulate a slow hard disk. */
+    SOMETIMES,
+    /** never throttle output */
+    NEVER
+  };
+  
+  public void setThrottling(Throttling throttling) {
+    this.throttling = throttling;
   }
 
   @Override
@@ -354,7 +366,17 @@ public class MockDirectoryWrapper extend
     IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name);
     openFileHandles.put(io, new RuntimeException("unclosed IndexOutput"));
     openFilesForWrite.add(name);
-    return throttledOutput == null ? io : throttledOutput.newFromDelegate(io);
+    
+    // throttling REALLY slows down tests, so don't do it very often for SOMETIMES.
+    if (throttling == Throttling.ALWAYS || 
+        (throttling == Throttling.SOMETIMES && randomState.nextInt(50) == 0)) {
+      if (LuceneTestCase.VERBOSE) {
+        System.out.println("MockDirectoryWrapper: throttling indexOutput");
+      }
+      return throttledOutput.newFromDelegate(io);
+    } else {
+      return io;
+    }
   }
 
   @Override

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Fri May 13 11:18:19 2011
@@ -137,6 +137,8 @@ public abstract class LuceneTestCase ext
   // tests)
   /** Gets the codec to run tests with. */
   public static final String TEST_CODEC = System.getProperty("tests.codec", "randomPerField");
+  /** Gets the codecprovider to run tests with */
+  public static final String TEST_CODECPROVIDER = System.getProperty("tests.codecprovider", "random");
   /** Gets the locale to run tests with */
   public static final String TEST_LOCALE = System.getProperty("tests.locale", "random");
   /** Gets the timezone to run tests with */
@@ -329,15 +331,38 @@ public abstract class LuceneTestCase ext
     tempDirs.clear();
     stores = Collections.synchronizedMap(new IdentityHashMap<MockDirectoryWrapper,StackTraceElement[]>());
     savedCodecProvider = CodecProvider.getDefault();
-    if ("randomPerField".equals(TEST_CODEC)) {
-      if (random.nextInt(4) == 0) { // preflex-only setup
-        codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
-      } else { // per-field setup
-        CodecProvider.setDefault(new RandomCodecProvider(random));
+    if ("random".equals(TEST_CODECPROVIDER)) {
+      if ("randomPerField".equals(TEST_CODEC)) {
+        if (random.nextInt(4) == 0) { // preflex-only setup
+          codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
+        } else { // per-field setup
+          CodecProvider.setDefault(new RandomCodecProvider(random));
+          codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+        }
+      } else { // ordinary setup
         codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
       }
-    } else { // ordinary setup
-      codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+    } else {
+      // someone specified their own codecprovider by class
+      try {
+        Class<? extends CodecProvider> cpClazz = Class.forName(TEST_CODECPROVIDER).asSubclass(CodecProvider.class);
+        CodecProvider cp = cpClazz.newInstance();
+        String codecName;
+        if (TEST_CODEC.startsWith("random")) { // TODO: somehow do random per-field?!
+          Set<String> codecSet = cp.listAll();
+          String availableCodecs[] = codecSet.toArray(new String[codecSet.size()]);
+          codecName = availableCodecs[random.nextInt(availableCodecs.length)];
+        } else {
+          codecName = TEST_CODEC;
+        }
+        
+        codec = cp.lookup(codecName);
+        cp.setDefaultFieldCodec(codecName);
+        CodecProvider.setDefault(cp);
+      } catch (Exception e) {
+        System.err.println("Could not instantiate CodecProvider: " + TEST_CODECPROVIDER);
+        throw new RuntimeException(e);
+      }
     }
     savedLocale = Locale.getDefault();
     locale = TEST_LOCALE.equals("random") ? randomLocale(random) : localeForName(TEST_LOCALE);
@@ -360,16 +385,13 @@ public abstract class LuceneTestCase ext
     String codecDescription;
     CodecProvider cp = CodecProvider.getDefault();
 
-    if ("randomPerField".equals(TEST_CODEC)) {
-      if (cp instanceof RandomCodecProvider)
-        codecDescription = cp.toString();
-      else
-        codecDescription = "PreFlex";
+    if ("randomPerField".equals(TEST_CODEC) && cp instanceof RandomCodecProvider) {
+      codecDescription = cp.toString();
     } else {
       codecDescription = codec.toString();
     }
 
-    if (CodecProvider.getDefault() == savedCodecProvider)
+    if ("random".equals(TEST_CODECPROVIDER) && CodecProvider.getDefault() == savedCodecProvider)
       removeTestCodecs(codec, CodecProvider.getDefault());
     CodecProvider.setDefault(savedCodecProvider);
     Locale.setDefault(savedLocale);

Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Fri May 13 11:18:19 2011
@@ -107,6 +107,7 @@ public class TestMockAnalyzer extends Ba
       // consume
     }
     stream.end();
+    stream.close();
     
     assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
   }

Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/Test2BTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/Test2BTerms.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/Test2BTerms.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/Test2BTerms.java Fri May 13 11:18:19 2011
@@ -153,7 +153,8 @@ public class Test2BTerms extends LuceneT
 
     List<BytesRef> savedTerms = null;
 
-    Directory dir = newFSDirectory(_TestUtil.getTempDir("2BTerms"));
+    MockDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BTerms"));
+    dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
     //Directory dir = newFSDirectory(new File("/p/lucene/indices/2bindex"));
 
     if (true) {