You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/18 18:24:34 UTC

svn commit: r1124321 [2/5] - in /lucene/dev/branches/docvalues: ./ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contrib...

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Wed May 18 16:24:27 2011
@@ -733,8 +733,7 @@ class DirectoryReader extends IndexReade
       // case we have to roll back:
       startCommit();
 
-      final SegmentInfos rollbackSegmentInfos = new SegmentInfos();
-      rollbackSegmentInfos.addAll(segmentInfos);
+      final List<SegmentInfo> rollbackSegments = segmentInfos.createBackupSegmentInfos(false);
 
       boolean success = false;
       try {
@@ -766,8 +765,7 @@ class DirectoryReader extends IndexReade
           deleter.refresh();
 
           // Restore all SegmentInfos (in case we pruned some)
-          segmentInfos.clear();
-          segmentInfos.addAll(rollbackSegmentInfos);
+          segmentInfos.rollbackSegmentInfos(rollbackSegments);
         }
       }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Wed May 18 16:24:27 2011
@@ -126,7 +126,6 @@ final class DocumentsWriter {
   final DocumentsWriterPerThreadPool perThreadPool;
   final FlushPolicy flushPolicy;
   final DocumentsWriterFlushControl flushControl;
-  final Healthiness healthiness;
   DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers,
       BufferedDeletesStream bufferedDeletesStream) throws IOException {
     this.directory = directory;
@@ -142,10 +141,7 @@ final class DocumentsWriter {
       flushPolicy = configuredPolicy;
     }
     flushPolicy.init(this);
-    
-    healthiness = new Healthiness();
-    final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
-    flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT);
+    flushControl = new DocumentsWriterFlushControl(this, config );
   }
 
   synchronized void deleteQueries(final Query... queries) throws IOException {
@@ -283,31 +279,28 @@ final class DocumentsWriter {
     ensureOpen();
     boolean maybeMerge = false;
     final boolean isUpdate = delTerm != null;
-    if (healthiness.anyStalledThreads()) {
-
-      // Help out flushing any pending DWPTs so we can un-stall:
+    if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
+      // Help out flushing any queued DWPTs so we can un-stall:
       if (infoStream != null) {
-        message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)");
+        message("DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
       }
-
-      // Try pick up pending threads here if possible
-      DocumentsWriterPerThread flushingDWPT;
-      while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
-        // Don't push the delete here since the update could fail!
-        maybeMerge = doFlush(flushingDWPT);
-        if (!healthiness.anyStalledThreads()) {
-          break;
+      do {
+        // Try pick up pending threads here if possible
+        DocumentsWriterPerThread flushingDWPT;
+        while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
+          // Don't push the delete here since the update could fail!
+          maybeMerge |= doFlush(flushingDWPT);
         }
-      }
-
-      if (infoStream != null && healthiness.anyStalledThreads()) {
-        message("WARNING DocumentsWriter still has stalled threads; waiting");
-      }
-
-      healthiness.waitIfStalled(); // block if stalled
+  
+        if (infoStream != null && flushControl.anyStalledThreads()) {
+          message("WARNING DocumentsWriter has stalled threads; waiting");
+        }
+        
+        flushControl.waitIfStalled(); // block if stalled
+      } while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing
 
-      if (infoStream != null && healthiness.anyStalledThreads()) {
-        message("WARNING DocumentsWriter done waiting");
+      if (infoStream != null) {
+        message("continue indexing after helpling out flushing DocumentsWriter is healthy");
       }
     }
 
@@ -353,7 +346,6 @@ final class DocumentsWriter {
       maybeMerge = true;
       boolean success = false;
       FlushTicket ticket = null;
-      
       try {
         assert currentFullFlushDelQueue == null
             || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
@@ -511,9 +503,7 @@ final class DocumentsWriter {
         anythingFlushed |= doFlush(flushingDWPT);
       }
       // If a concurrent flush is still in flight wait for it
-      while (flushControl.anyFlushing()) {
-        flushControl.waitForFlush();  
-      }
+      flushControl.waitForFlush();  
       if (!anythingFlushed) { // apply deletes if we did not flush any document
         synchronized (ticketQueue) {
           ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java Wed May 18 16:24:27 2011
@@ -44,30 +44,32 @@ public final class DocumentsWriterFlushC
   private long activeBytes = 0;
   private long flushBytes = 0;
   private volatile int numPending = 0;
-  private volatile int numFlushing = 0;
   final AtomicBoolean flushDeletes = new AtomicBoolean(false);
   private boolean fullFlush = false;
-  private Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
+  private final Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
   // only for safety reasons if a DWPT is close to the RAM limit
-  private Queue<DocumentsWriterPerThread> blockedFlushes = new LinkedList<DocumentsWriterPerThread>();
-
+  private final Queue<BlockedFlush> blockedFlushes = new LinkedList<BlockedFlush>();
 
+  double maxConfiguredRamBuffer = 0;
   long peakActiveBytes = 0;// only with assert
   long peakFlushBytes = 0;// only with assert
   long peakNetBytes = 0;// only with assert
-  private final Healthiness healthiness;
+  long peakDelta = 0; // only with assert
+  final DocumentsWriterStallControl stallControl;
   private final DocumentsWriterPerThreadPool perThreadPool;
   private final FlushPolicy flushPolicy;
   private boolean closed = false;
   private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>();
   private final DocumentsWriter documentsWriter;
+  private final IndexWriterConfig config;
 
   DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
-      Healthiness healthiness, long hardMaxBytesPerDWPT) {
-    this.healthiness = healthiness;
+      IndexWriterConfig config) {
+    this.stallControl = new DocumentsWriterStallControl();
     this.perThreadPool = documentsWriter.perThreadPool;
     this.flushPolicy = documentsWriter.flushPolicy;
-    this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT;
+    this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
+    this.config = config;
     this.documentsWriter = documentsWriter;
   }
 
@@ -82,6 +84,24 @@ public final class DocumentsWriterFlushC
   public synchronized long netBytes() {
     return flushBytes + activeBytes;
   }
+  
+  long stallLimitBytes() {
+    final double maxRamMB = config.getRAMBufferSizeMB();
+    return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
+  }
+  
+  private boolean assertMemory() {
+    final double maxRamMB = config.getRAMBufferSizeMB();
+    if (maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
+      // for this assert we must be tolerant to ram buffer changes!
+      maxConfiguredRamBuffer = Math.max(maxRamMB, maxConfiguredRamBuffer);
+      final long ram = flushBytes + activeBytes;
+      // take peakDelta into account - worst case is that all flushing, pending and blocked DWPT had maxMem and the last doc had the peakDelta 
+      final long expected = (long)(2 * (maxConfiguredRamBuffer * 1024 * 1024)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
+      assert ram <= expected  : "ram was " + ram + " expected: " + expected + " flush mem: " + flushBytes + " active: " + activeBytes ;   
+    }
+    return true;
+  }
 
   private void commitPerThreadBytes(ThreadState perThread) {
     final long delta = perThread.perThread.bytesUsed()
@@ -105,53 +125,62 @@ public final class DocumentsWriterFlushC
     peakActiveBytes = Math.max(peakActiveBytes, activeBytes);
     peakFlushBytes = Math.max(peakFlushBytes, flushBytes);
     peakNetBytes = Math.max(peakNetBytes, netBytes());
+    peakDelta = Math.max(peakDelta, delta);
+    
     return true;
   }
 
   synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread,
       boolean isUpdate) {
-    commitPerThreadBytes(perThread);
-    if (!perThread.flushPending) {
-      if (isUpdate) {
-        flushPolicy.onUpdate(this, perThread);
-      } else {
-        flushPolicy.onInsert(this, perThread);
+    try {
+      commitPerThreadBytes(perThread);
+      if (!perThread.flushPending) {
+        if (isUpdate) {
+          flushPolicy.onUpdate(this, perThread);
+        } else {
+          flushPolicy.onInsert(this, perThread);
+        }
+        if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) {
+          // Safety check to prevent a single DWPT exceeding its RAM limit. This
+          // is super important since we can not address more than 2048 MB per DWPT
+          setFlushPending(perThread);
+        }
       }
-      if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) {
-        // Safety check to prevent a single DWPT exceeding its RAM limit. This
-        // is super important since we can not address more than 2048 MB per DWPT
-        setFlushPending(perThread);
-        if (fullFlush) {
-          DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
-          assert toBlock != null;
-          blockedFlushes.add(toBlock);
+      final DocumentsWriterPerThread flushingDWPT;
+      if (fullFlush) {
+        if (perThread.flushPending) {
+          checkoutAndBlock(perThread);
+          flushingDWPT = nextPendingFlush();
+        } else {
+          flushingDWPT = null;
         }
+      } else {
+       flushingDWPT = tryCheckoutForFlush(perThread);
       }
+      return flushingDWPT;
+    } finally {
+      stallControl.updateStalled(this);
+      assert assertMemory();
     }
-    final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
-    healthiness.updateStalled(this);
-    return flushingDWPT;
+    
+    
   }
 
   synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
     assert flushingWriters.containsKey(dwpt);
     try {
-      numFlushing--;
       Long bytes = flushingWriters.remove(dwpt);
       flushBytes -= bytes.longValue();
       perThreadPool.recycle(dwpt);
-      healthiness.updateStalled(this);
+      stallControl.updateStalled(this);
+      assert assertMemory();
     } finally {
       notifyAll();
     }
   }
   
-  public synchronized boolean anyFlushing() {
-    return numFlushing != 0;
-  }
-  
   public synchronized void waitForFlush() {
-    if (numFlushing != 0) {
+    while (flushingWriters.size() != 0) {
       try {
         this.wait();
       } catch (InterruptedException e) {
@@ -173,32 +202,51 @@ public final class DocumentsWriterFlushC
       flushBytes += bytes;
       activeBytes -= bytes;
       numPending++; // write access synced
+      assert assertMemory();
     } // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
     
   }
 
   synchronized void doOnAbort(ThreadState state) {
-    if (state.flushPending) {
-      flushBytes -= state.bytesUsed;
-    } else {
-      activeBytes -= state.bytesUsed;
+    try {
+      if (state.flushPending) {
+        flushBytes -= state.bytesUsed;
+      } else {
+        activeBytes -= state.bytesUsed;
+      }
+      assert assertMemory();
+      // Take it out of the loop this DWPT is stale
+      perThreadPool.replaceForFlush(state, closed);
+    }finally {
+      stallControl.updateStalled(this);
     }
-    // Take it out of the loop this DWPT is stale
-    perThreadPool.replaceForFlush(state, closed);
-    healthiness.updateStalled(this);
   }
 
   synchronized DocumentsWriterPerThread tryCheckoutForFlush(
       ThreadState perThread) {
-    if (fullFlush) {
-      return null;
+   return perThread.flushPending ? internalTryCheckOutForFlush(perThread) : null;
+  }
+  
+  private void checkoutAndBlock(ThreadState perThread) {
+    perThread.lock();
+    try {
+      assert perThread.flushPending : "can not block non-pending threadstate";
+      assert fullFlush : "can not block if fullFlush == false";
+      final DocumentsWriterPerThread dwpt;
+      final long bytes = perThread.bytesUsed;
+      dwpt = perThreadPool.replaceForFlush(perThread, closed);
+      numPending--;
+      blockedFlushes.add(new BlockedFlush(dwpt, bytes));
+    }finally {
+      perThread.unlock();
     }
-    return internalTryCheckOutForFlush(perThread);
   }
 
   private DocumentsWriterPerThread internalTryCheckOutForFlush(
       ThreadState perThread) {
-    if (perThread.flushPending) {
+    assert Thread.holdsLock(this);
+    assert perThread.flushPending;
+    try {
       // We are pending so all memory is already moved to flushBytes
       if (perThread.tryLock()) {
         try {
@@ -212,15 +260,16 @@ public final class DocumentsWriterFlushC
             // Record the flushing DWPT to reduce flushBytes in doAfterFlush
             flushingWriters.put(dwpt, Long.valueOf(bytes));
             numPending--; // write access synced
-            numFlushing++;
             return dwpt;
           }
         } finally {
           perThread.unlock();
         }
       }
+      return null;
+    } finally {
+      stallControl.updateStalled(this);
     }
-    return null;
   }
 
   @Override
@@ -231,12 +280,13 @@ public final class DocumentsWriterFlushC
 
   DocumentsWriterPerThread nextPendingFlush() {
     synchronized (this) {
-      DocumentsWriterPerThread poll = flushQueue.poll();
-      if (poll != null) {
+      final DocumentsWriterPerThread poll;
+      if ((poll = flushQueue.poll()) != null) {
+        stallControl.updateStalled(this);
         return poll;
-      }  
+      }
     }
-    if (numPending > 0) {
+    if (numPending > 0 && !fullFlush) { // don't check if we are doing a full flush
       final Iterator<ThreadState> allActiveThreads = perThreadPool
           .getActivePerThreadsIterator();
       while (allActiveThreads.hasNext() && numPending > 0) {
@@ -276,8 +326,8 @@ public final class DocumentsWriterFlushC
     return documentsWriter.deleteQueue.numGlobalTermDeletes();
   }
 
-  int numFlushingDWPT() {
-    return numFlushing;
+  synchronized int numFlushingDWPT() {
+    return flushingWriters.size();
   }
   
   public boolean doApplyAllDeletes() {	
@@ -289,7 +339,7 @@ public final class DocumentsWriterFlushC
   }
   
   int numActiveDWPT() {
-    return this.perThreadPool.getMaxThreadStates();
+    return this.perThreadPool.getActiveThreadState();
   }
   
   void markForFullFlush() {
@@ -331,11 +381,11 @@ public final class DocumentsWriterFlushC
             if (!next.flushPending) {
               setFlushPending(next);
             }
+            final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
+            assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
+            assert dwpt == flushingDWPT : "flushControl returned different DWPT";
+            toFlush.add(flushingDWPT);
           }
-          final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
-          assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
-          assert dwpt == flushingDWPT : "flushControl returned different DWPT";
-          toFlush.add(flushingDWPT);
         } else {
           // get the new delete queue from DW
           next.perThread.initialize();
@@ -345,31 +395,54 @@ public final class DocumentsWriterFlushC
       }
     }
     synchronized (this) {
-      assert assertBlockedFlushes(flushingQueue);
-      flushQueue.addAll(blockedFlushes);
-      blockedFlushes.clear();
+      /* make sure we move all DWPT that are where concurrently marked as
+       * pending and moved to blocked are moved over to the flushQueue. There is
+       * a chance that this happens since we marking DWPT for full flush without
+       * blocking indexing.*/
+      pruneBlockedQueue(flushingQueue);   
+      assert assertBlockedFlushes(documentsWriter.deleteQueue);
       flushQueue.addAll(toFlush);
+      stallControl.updateStalled(this);
+    }
+  }
+  
+  /**
+   * Prunes the blockedQueue by removing all DWPT that are associated with the given flush queue. 
+   */
+  private void pruneBlockedQueue(final DocumentsWriterDeleteQueue flushingQueue) {
+    Iterator<BlockedFlush> iterator = blockedFlushes.iterator();
+    while (iterator.hasNext()) {
+      BlockedFlush blockedFlush = iterator.next();
+      if (blockedFlush.dwpt.deleteQueue == flushingQueue) {
+        iterator.remove();
+        assert !flushingWriters.containsKey(blockedFlush.dwpt) : "DWPT is already flushing";
+        // Record the flushing DWPT to reduce flushBytes in doAfterFlush
+        flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
+        // don't decr pending here - its already done when DWPT is blocked
+        flushQueue.add(blockedFlush.dwpt);
+      }
     }
   }
   
   synchronized void finishFullFlush() {
     assert fullFlush;
     assert flushQueue.isEmpty();
+    assert flushingWriters.isEmpty();
     try {
       if (!blockedFlushes.isEmpty()) {
         assert assertBlockedFlushes(documentsWriter.deleteQueue);
-        flushQueue.addAll(blockedFlushes);
-        blockedFlushes.clear();
+        pruneBlockedQueue(documentsWriter.deleteQueue);
+        assert blockedFlushes.isEmpty();
       }
     } finally {
       fullFlush = false;
+      stallControl.updateStalled(this);
     }
   }
   
   boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) {
-    Queue<DocumentsWriterPerThread> flushes = this.blockedFlushes;
-    for (DocumentsWriterPerThread documentsWriterPerThread : flushes) {
-      assert documentsWriterPerThread.deleteQueue == flushingQueue;
+    for (BlockedFlush blockedFlush : blockedFlushes) {
+      assert blockedFlush.dwpt.deleteQueue == flushingQueue;
     }
     return true;
   }
@@ -379,18 +452,65 @@ public final class DocumentsWriterFlushC
       for (DocumentsWriterPerThread dwpt : flushQueue) {
         doAfterFlush(dwpt);
       }
-      for (DocumentsWriterPerThread dwpt : blockedFlushes) {
-        doAfterFlush(dwpt);
+      for (BlockedFlush blockedFlush : blockedFlushes) {
+        flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
+        doAfterFlush(blockedFlush.dwpt);
       }
-      
     } finally {
       fullFlush = false;
       flushQueue.clear();
       blockedFlushes.clear();
+      stallControl.updateStalled(this);
     }
   }
   
-  synchronized boolean isFullFlush() {
+  /**
+   * Returns <code>true</code> if a full flush is currently running
+   */
+  synchronized boolean isFullFlush() { // used by assert
     return fullFlush;
   }
+
+  /**
+   * Returns the number of flushes that are already checked out but not yet
+   * actively flushing
+   */
+  synchronized int numQueuedFlushes() {
+    return flushQueue.size();
+  }
+
+  /**
+   * Returns the number of flushes that are checked out but not yet available
+   * for flushing. This only applies during a full flush if a DWPT needs
+   * flushing but must not be flushed until the full flush has finished.
+   */
+  synchronized int numBlockedFlushes() {
+    return blockedFlushes.size();
+  }
+  
+  private static class BlockedFlush {
+    final DocumentsWriterPerThread dwpt;
+    final long bytes;
+    BlockedFlush(DocumentsWriterPerThread dwpt, long bytes) {
+      super();
+      this.dwpt = dwpt;
+      this.bytes = bytes;
+    }
+  }
+
+  /**
+   * This method will block if too many DWPT are currently flushing and no
+   * checked out DWPT are available
+   */
+  void waitIfStalled() {
+      stallControl.waitIfStalled();
+  }
+
+  /**
+   * Returns <code>true</code> iff stalled
+   */
+  boolean anyStalledThreads() {
+    return stallControl.anyStalledThreads();
+  }
+ 
 }
\ No newline at end of file

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java Wed May 18 16:24:27 2011
@@ -165,6 +165,13 @@ public abstract class DocumentsWriterPer
   public int getMaxThreadStates() {
     return perThreads.length;
   }
+  
+  /**
+   * Returns the active number of {@link ThreadState} instances.
+   */
+  public int getActiveThreadState() {
+    return numThreadStatesActive;
+  }
 
   /**
    * Returns a new {@link ThreadState} iff any new state is available otherwise

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java Wed May 18 16:24:27 2011
@@ -40,7 +40,13 @@ import java.util.Collection;
   * refuses to run by default. Specify {@code -delete-prior-commits}
   * to override this, allowing the tool to delete all but the last commit.
   * From Java code this can be enabled by passing {@code true} to
-  * {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
+  * {@link #IndexUpgrader(Directory,Version,PrintStream,boolean)}.
+  * <p><b>Warning:</b> This tool may reorder documents if the index was partially
+  * upgraded before execution (e.g., documents were added). If your application relies
+  * on &quot;monotonicity&quot; of doc IDs (which means that the order in which the documents
+  * were added to the index is preserved), do a full optimize instead.
+  * The {@link MergePolicy} set by {@link IndexWriterConfig} may also reorder
+  * documents.
   */
 public final class IndexUpgrader {
 
@@ -52,9 +58,11 @@ public final class IndexUpgrader {
     System.err.println("reason, if the incoming index has more than one commit, the tool");
     System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
     System.err.println("this, allowing the tool to delete all but the last commit.");
+    System.err.println("WARNING: This tool may reorder document IDs!");
     System.exit(1);
   }
 
+  @SuppressWarnings("deprecation")
   public static void main(String[] args) throws IOException {
     String dir = null;
     boolean deletePriorCommits = false;
@@ -74,7 +82,7 @@ public final class IndexUpgrader {
       printUsage();
     }
     
-    new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
+    new IndexUpgrader(FSDirectory.open(new File(dir)), Version.LUCENE_CURRENT, out, deletePriorCommits).upgrade();
   }
   
   private final Directory dir;
@@ -82,16 +90,22 @@ public final class IndexUpgrader {
   private final IndexWriterConfig iwc;
   private final boolean deletePriorCommits;
   
-  @SuppressWarnings("deprecation")
-  public IndexUpgrader(Directory dir) {
-    this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
-  }
-  
-  @SuppressWarnings("deprecation")
-  public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
-    this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
-  }
-  
+  /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
+   * {@code matchVersion}. The tool refuses to upgrade indexes with multiple commit points. */
+  public IndexUpgrader(Directory dir, Version matchVersion) {
+    this(dir, new IndexWriterConfig(matchVersion, null), null, false);
+  }
+  
+  /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
+   * {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit points by removing
+   * all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
+  public IndexUpgrader(Directory dir, Version matchVersion, PrintStream infoStream, boolean deletePriorCommits) {
+    this(dir, new IndexWriterConfig(matchVersion, null), infoStream, deletePriorCommits);
+  }
+  
+  /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
+   * config. You have the possibility to upgrade indexes with multiple commit points by removing
+   * all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
   public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
     this.dir = dir;
     this.iwc = iwc;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java Wed May 18 16:24:27 2011
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -221,7 +222,7 @@ public class IndexWriter implements Clos
   private volatile long changeCount; // increments every time a change is completed
   private long lastCommitChangeCount; // last changeCount that was committed
 
-  private SegmentInfos rollbackSegmentInfos;      // segmentInfos we will fallback to if the commit fails
+  private List<SegmentInfo> rollbackSegments;      // list of segmentInfo we will fallback to if the commit fails
 
   volatile SegmentInfos pendingCommit;            // set when a commit is pending (after prepareCommit() & before commit())
   volatile long pendingCommitChangeCount;
@@ -440,14 +441,14 @@ public class IndexWriter implements Clos
     public synchronized boolean infoIsLive(SegmentInfo info) {
       int idx = segmentInfos.indexOf(info);
       assert idx != -1: "info=" + info + " isn't in pool";
-      assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
+      assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
       return true;
     }
 
     public synchronized SegmentInfo mapToLive(SegmentInfo info) {
       int idx = segmentInfos.indexOf(info);
       if (idx != -1) {
-        info = segmentInfos.get(idx);
+        info = segmentInfos.info(idx);
       }
       return info;
     }
@@ -818,7 +819,7 @@ public class IndexWriter implements Clos
         }
       }
 
-      setRollbackSegmentInfos(segmentInfos);
+      rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
 
       // start with previous field numbers, but new FieldInfos
       globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
@@ -862,10 +863,6 @@ public class IndexWriter implements Clos
     }
   }
   
-  private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
-    rollbackSegmentInfos = (SegmentInfos) infos.clone();
-  }
-
   /**
    * Returns the private {@link IndexWriterConfig}, cloned
    * from the {@link IndexWriterConfig} passed to
@@ -1126,8 +1123,7 @@ public class IndexWriter implements Clos
     else
       count = 0;
 
-    for (int i = 0; i < segmentInfos.size(); i++)
-      count += segmentInfos.info(i).docCount;
+    count += segmentInfos.totalDocCount();
     return count;
   }
 
@@ -1144,8 +1140,7 @@ public class IndexWriter implements Clos
     else
       count = 0;
 
-    for (int i = 0; i < segmentInfos.size(); i++) {
-      final SegmentInfo info = segmentInfos.info(i);
+    for (final SegmentInfo info : segmentInfos) {
       count += info.docCount - numDeletedDocs(info);
     }
     return count;
@@ -1159,9 +1154,11 @@ public class IndexWriter implements Clos
     if (docWriter.anyDeletions()) {
       return true;
     }
-    for (int i = 0; i < segmentInfos.size(); i++)
-      if (segmentInfos.info(i).hasDeletions())
+    for (final SegmentInfo info : segmentInfos) {
+      if (info.hasDeletions()) {
         return true;
+      }
+    }
     return false;
   }
 
@@ -1554,7 +1551,8 @@ public class IndexWriter implements Clos
 
     synchronized(this) {
       resetMergeExceptions();
-      segmentsToOptimize = new HashSet<SegmentInfo>(segmentInfos);
+      segmentsToOptimize.clear();
+      segmentsToOptimize.addAll(segmentInfos.asSet());
       optimizeMaxNumSegments = maxNumSegments;
 
       // Now mark all pending & running merges as optimize
@@ -1778,7 +1776,7 @@ public class IndexWriter implements Clos
 
     final MergePolicy.MergeSpecification spec;
     if (optimize) {
-      spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
+      spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize));
 
       if (spec != null) {
         final int numMerges = spec.merges.size();
@@ -1889,8 +1887,7 @@ public class IndexWriter implements Clos
         // attempt to commit using this instance of IndexWriter
         // will always write to a new generation ("write
         // once").
-        segmentInfos.clear();
-        segmentInfos.addAll(rollbackSegmentInfos);
+        segmentInfos.rollbackSegmentInfos(rollbackSegments);
 
         docWriter.abort();
 
@@ -2555,7 +2552,7 @@ public class IndexWriter implements Clos
         lastCommitChangeCount = pendingCommitChangeCount;
         segmentInfos.updateGeneration(pendingCommit);
         segmentInfos.setUserData(pendingCommit.getUserData());
-        setRollbackSegmentInfos(pendingCommit);
+        rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
         deleter.checkpoint(pendingCommit, true);
       } finally {
         // Matches the incRef done in startCommit:
@@ -2660,7 +2657,7 @@ public class IndexWriter implements Clos
   final synchronized void applyAllDeletes() throws IOException {
     flushDeletesCount.incrementAndGet();
     final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
-      .applyDeletes(readerPool, segmentInfos);
+      .applyDeletes(readerPool, segmentInfos.asList());
     if (result.anyDeletes) {
       checkpoint();
     }
@@ -2709,7 +2706,7 @@ public class IndexWriter implements Clos
 
   private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
     for(SegmentInfo info : merge.segments) {
-      if (segmentInfos.indexOf(info) == -1) {
+      if (!segmentInfos.contains(info)) {
         throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
       }
     }
@@ -2847,39 +2844,13 @@ public class IndexWriter implements Clos
       message("merged segment " + merge.info + " is 100% deleted" +  (keepFullyDeletedSegments ? "" : "; skipping insert"));
     }
 
-    final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
-    int segIdx = 0;
-    int newSegIdx = 0;
-    boolean inserted = false;
-    final int curSegCount = segmentInfos.size();
-    while(segIdx < curSegCount) {
-      final SegmentInfo info = segmentInfos.info(segIdx++);
-      if (mergedAway.contains(info)) {
-        if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
-          segmentInfos.set(segIdx-1, merge.info);
-          inserted = true;
-          newSegIdx++;
-        }
-      } else {
-        segmentInfos.set(newSegIdx++, info);
-      }
-    }
-
-    // Either we found place to insert segment, or, we did
-    // not, but only because all segments we merged became
-    // deleted while we are merging, in which case it should
-    // be the case that the new segment is also all deleted:
-    if (!inserted) {
-      assert allDeleted;
-      if (keepFullyDeletedSegments) {
-        segmentInfos.add(0, merge.info);
-      } else {
-        readerPool.drop(merge.info);
-      }
+    final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
+    segmentInfos.applyMergeChanges(merge, dropSegment);
+    
+    if (dropSegment) {
+      readerPool.drop(merge.info);
     }
-
-    segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
-
+    
     if (infoStream != null) {
       message("after commit: " + segString());
     }
@@ -3014,7 +2985,7 @@ public class IndexWriter implements Clos
       if (mergingSegments.contains(info)) {
         return false;
       }
-      if (segmentInfos.indexOf(info) == -1) {
+      if (!segmentInfos.contains(info)) {
         return false;
       }
       if (info.dir != directory) {
@@ -3462,7 +3433,7 @@ public class IndexWriter implements Clos
   }
 
   // utility routines for tests
-  SegmentInfo newestSegment() {
+  synchronized SegmentInfo newestSegment() {
     return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
   }
 
@@ -3472,19 +3443,18 @@ public class IndexWriter implements Clos
   }
 
   /** @lucene.internal */
-  public synchronized String segString(List<SegmentInfo> infos) throws IOException {
-    StringBuilder buffer = new StringBuilder();
-    final int count = infos.size();
-    for(int i = 0; i < count; i++) {
-      if (i > 0) {
+  public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
+    final StringBuilder buffer = new StringBuilder();
+    for(final SegmentInfo s : infos) {
+      if (buffer.length() > 0) {
         buffer.append(' ');
       }
-      buffer.append(segString(infos.get(i)));
+      buffer.append(segString(s));
     }
-
     return buffer.toString();
   }
 
+  /** @lucene.internal */
   public synchronized String segString(SegmentInfo info) throws IOException {
     StringBuilder buffer = new StringBuilder();
     SegmentReader reader = readerPool.getIfExists(info);

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Wed May 18 16:24:27 2011
@@ -133,10 +133,15 @@ public final class IndexWriterConfig imp
 
   /**
    * Creates a new config that with defaults that match the specified
-   * {@link Version} as well as the default {@link Analyzer}. {@link Version} is
-   * a placeholder for future changes. The default settings are relevant to 3.1
-   * and before. In the future, if different settings will apply to different
-   * versions, they will be documented here.
+   * {@link Version} as well as the default {@link
+   * Analyzer}. If matchVersion is >= {@link
+   * Version#LUCENE_32}, {@link TieredMergePolicy} is used
+   * for merging; else {@link LogByteSizeMergePolicy}.
+   * Note that {@link TieredMergePolicy} is free to select
+   * non-contiguous merges, which means docIDs may not
+   * remain montonic over time.  If this is a problem you
+   * should switch to {@link LogByteSizeMergePolicy} or
+   * {@link LogDocMergePolicy}.
    */
   public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
     this.matchVersion = matchVersion;
@@ -154,7 +159,11 @@ public final class IndexWriterConfig imp
     indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
     mergedSegmentWarmer = null;
     codecProvider = CodecProvider.getDefault();
-    mergePolicy = new TieredMergePolicy();
+    if (matchVersion.onOrAfter(Version.LUCENE_32)) {
+      mergePolicy = new TieredMergePolicy();
+    } else {
+      mergePolicy = new LogByteSizeMergePolicy();
+    }
     readerPooling = DEFAULT_READER_POOLING;
     indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
     readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed May 18 16:24:27 2011
@@ -242,6 +242,7 @@ public abstract class LogMergePolicy ext
   private MergeSpecification findMergesForOptimizeSizeLimit(
       SegmentInfos infos, int maxNumSegments, int last) throws IOException {
     MergeSpecification spec = new MergeSpecification();
+    final List<SegmentInfo> segments = infos.asList();
 
     int start = last - 1;
     while (start >= 0) {
@@ -254,12 +255,12 @@ public abstract class LogMergePolicy ext
         // unless there is only 1 which is optimized.
         if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
           // there is more than 1 segment to the right of this one, or an unoptimized single segment.
-          spec.add(new OneMerge(infos.range(start + 1, last)));
+          spec.add(new OneMerge(segments.subList(start + 1, last)));
         }
         last = start;
       } else if (last - start == mergeFactor) {
         // mergeFactor eligible segments were found, add them as a merge.
-        spec.add(new OneMerge(infos.range(start, last)));
+        spec.add(new OneMerge(segments.subList(start, last)));
         last = start;
       }
       --start;
@@ -267,7 +268,7 @@ public abstract class LogMergePolicy ext
 
     // Add any left-over segments, unless there is just 1 already optimized.
     if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
-      spec.add(new OneMerge(infos.range(start, last)));
+      spec.add(new OneMerge(segments.subList(start, last)));
     }
 
     return spec.merges.size() == 0 ? null : spec;
@@ -280,11 +281,12 @@ public abstract class LogMergePolicy ext
    */
   private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
     MergeSpecification spec = new MergeSpecification();
+    final List<SegmentInfo> segments = infos.asList();
     
     // First, enroll all "full" merges (size
     // mergeFactor) to potentially be run concurrently:
     while (last - maxNumSegments + 1 >= mergeFactor) {
-      spec.add(new OneMerge(infos.range(last - mergeFactor, last)));
+      spec.add(new OneMerge(segments.subList(last - mergeFactor, last)));
       last -= mergeFactor;
     }
 
@@ -296,7 +298,7 @@ public abstract class LogMergePolicy ext
         // Since we must optimize down to 1 segment, the
         // choice is simple:
         if (last > 1 || !isOptimized(infos.info(0))) {
-          spec.add(new OneMerge(infos.range(0, last)));
+          spec.add(new OneMerge(segments.subList(0, last)));
         }
       } else if (last > maxNumSegments) {
 
@@ -325,7 +327,7 @@ public abstract class LogMergePolicy ext
           }
         }
 
-        spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize)));
+        spec.add(new OneMerge(segments.subList(bestStart, bestStart + finalMergeSize)));
       }
     }
     return spec.merges.size() == 0 ? null : spec;
@@ -412,7 +414,8 @@ public abstract class LogMergePolicy ext
   @Override
   public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
       throws CorruptIndexException, IOException {
-    final int numSegments = segmentInfos.size();
+    final List<SegmentInfo> segments = segmentInfos.asList();
+    final int numSegments = segments.size();
 
     if (verbose())
       message("findMergesToExpungeDeletes: " + numSegments + " segments");
@@ -434,7 +437,7 @@ public abstract class LogMergePolicy ext
           // deletions, so force a merge now:
           if (verbose())
             message("  add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
-          spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
+          spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
           firstSegmentWithDeletions = i;
         }
       } else if (firstSegmentWithDeletions != -1) {
@@ -443,7 +446,7 @@ public abstract class LogMergePolicy ext
         // mergeFactor segments
         if (verbose())
           message("  add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
-        spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
+        spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
         firstSegmentWithDeletions = -1;
       }
     }
@@ -451,7 +454,7 @@ public abstract class LogMergePolicy ext
     if (firstSegmentWithDeletions != -1) {
       if (verbose())
         message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
-      spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments)));
+      spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
     }
 
     return spec;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Wed May 18 16:24:27 2011
@@ -72,7 +72,7 @@ public abstract class MergePolicy implem
     long mergeGen;                  // used by IndexWriter
     boolean isExternal;             // used by IndexWriter
     int maxNumSegmentsOptimize;     // used by IndexWriter
-    long estimatedMergeBytes;       // used by IndexWriter
+    public long estimatedMergeBytes;       // used by IndexWriter
     List<SegmentReader> readers;        // used by IndexWriter
     List<SegmentReader> readerClones;   // used by IndexWriter
     public final List<SegmentInfo> segments;
@@ -84,7 +84,8 @@ public abstract class MergePolicy implem
     public OneMerge(List<SegmentInfo> segments) {
       if (0 == segments.size())
         throw new RuntimeException("segments must include at least one segment");
-      this.segments = segments;
+      // clone the list, as the in list may be based off original SegmentInfos and may be modified
+      this.segments = new ArrayList<SegmentInfo>(segments);
       int count = 0;
       for(SegmentInfo info : segments) {
         count += info.docCount;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Wed May 18 16:24:27 2011
@@ -42,7 +42,7 @@ import org.apache.lucene.util.Constants;
  *
  * @lucene.experimental
  */
-public final class SegmentInfo {
+public final class SegmentInfo implements Cloneable {
   // TODO: remove with hasVector and hasProx
   private static final int CHECK_FIELDINFO = -2;
   static final int NO = -1;          // e.g. no norms; no deletes;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Wed May 18 16:24:27 2011
@@ -20,13 +20,16 @@ package org.apache.lucene.index;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
-import java.util.Vector;
+import java.util.Set;
 
 import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
 import org.apache.lucene.index.codecs.CodecProvider;
@@ -45,7 +48,7 @@ import org.apache.lucene.util.ThreadInte
  * 
  * @lucene.experimental
  */
-public final class SegmentInfos extends Vector<SegmentInfo> {
+public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
 
   /* 
    * The file format version, a negative number.
@@ -84,7 +87,12 @@ public final class SegmentInfos extends 
   private int format;
   
   private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand
-
+  
+  private List<SegmentInfo> segments = new ArrayList<SegmentInfo>();
+  private Set<SegmentInfo> segmentSet = new HashSet<SegmentInfo>();
+  private transient List<SegmentInfo> cachedUnmodifiableList;
+  private transient Set<SegmentInfo> cachedUnmodifiableSet;  
+  
   /**
    * If non-null, information about loading segments_N files
    * will be printed here.  @see #setInfoStream.
@@ -107,8 +115,8 @@ public final class SegmentInfos extends 
     return format;
   }
 
-  public final SegmentInfo info(int i) {
-    return get(i);
+  public SegmentInfo info(int i) {
+    return segments.get(i);
   }
 
   /**
@@ -237,7 +245,7 @@ public final class SegmentInfos extends 
     boolean success = false;
 
     // Clear any previous segments:
-    clear();
+    this.clear();
 
     generation = generationFromSegmentsFileName(segmentFileName);
 
@@ -252,7 +260,7 @@ public final class SegmentInfos extends 
       if (!success) {
         // Clear any segment infos we had loaded so we
         // have a clean slate on retry:
-        clear();
+        this.clear();
       }
     }
   }
@@ -349,15 +357,14 @@ public final class SegmentInfos extends 
 
   /** Prunes any segment whose docs are all deleted. */
   public void pruneDeletedSegments() {
-    int segIdx = 0;
-    while(segIdx < size()) {
-      final SegmentInfo info = info(segIdx);
+    for(final Iterator<SegmentInfo> it = segments.iterator(); it.hasNext();) {
+      final SegmentInfo info = it.next();
       if (info.getDelCount() == info.docCount) {
-        remove(segIdx);
-      } else {
-        segIdx++;
+        it.remove();
+        segmentSet.remove(info);
       }
     }
+    assert segmentSet.size() == segments.size();
   }
 
   /**
@@ -367,14 +374,23 @@ public final class SegmentInfos extends 
   
   @Override
   public Object clone() {
-    SegmentInfos sis = (SegmentInfos) super.clone();
-    for(int i=0;i<sis.size();i++) {
-      final SegmentInfo info = sis.info(i);
-      assert info.getSegmentCodecs() != null;
-      sis.set(i, (SegmentInfo) info.clone());
+    try {
+      final SegmentInfos sis = (SegmentInfos) super.clone();
+      // deep clone, first recreate all collections:
+      sis.segments = new ArrayList<SegmentInfo>(size());
+      sis.segmentSet = new HashSet<SegmentInfo>(size());
+      sis.cachedUnmodifiableList = null;
+      sis.cachedUnmodifiableSet = null;
+      for(final SegmentInfo info : this) {
+        assert info.getSegmentCodecs() != null;
+        // dont directly access segments, use add method!!!
+        sis.add((SegmentInfo) info.clone());
+      }
+      sis.userData = new HashMap<String,String>(userData);
+      return sis;
+    } catch (CloneNotSupportedException e) {
+      throw new RuntimeException("should not happen", e);
     }
-    sis.userData = new HashMap<String,String>(userData);
-    return sis;
   }
 
   /**
@@ -742,18 +758,6 @@ public final class SegmentInfos extends 
     protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException;
   }
 
-  /**
-   * Returns a new SegmentInfos containing the SegmentInfo
-   * instances in the specified range first (inclusive) to
-   * last (exclusive), so total number of segments returned
-   * is last-first.
-   */
-  public SegmentInfos range(int first, int last) {
-    SegmentInfos infos = new SegmentInfos(codecs);
-    infos.addAll(super.subList(first, last));
-    return infos;
-  }
-
   // Carry over generation numbers from another SegmentInfos
   void updateGeneration(SegmentInfos other) {
     lastGeneration = other.lastGeneration;
@@ -831,6 +835,10 @@ public final class SegmentInfos extends 
         } catch (Throwable t) {
           // throw orig excp
         }
+      } else {
+        // we must sync here explicitly since during a commit
+        // IW will not sync the global field map. 
+        dir.sync(Collections.singleton(name));
       }
     }
     return version;
@@ -956,7 +964,7 @@ public final class SegmentInfos extends 
   }
   
 
-  public synchronized String toString(Directory directory) {
+  public String toString(Directory directory) {
     StringBuilder buffer = new StringBuilder();
     buffer.append(getCurrentSegmentFileName()).append(": ");
     final int count = size();
@@ -987,8 +995,7 @@ public final class SegmentInfos extends 
    *  remain write once.
    */
   void replace(SegmentInfos other) {
-    clear();
-    addAll(other);
+    rollbackSegmentInfos(other.asList());
     lastGeneration = other.lastGeneration;
     lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
     format = other.format;
@@ -1014,7 +1021,7 @@ public final class SegmentInfos extends 
    * Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
    * If this {@link SegmentInfos} has no global field number map the returned instance is empty
    */
-  synchronized FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
+  FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
     if (globalFieldNumberMap != null) {
       return globalFieldNumberMap;
     }
@@ -1054,4 +1061,135 @@ public final class SegmentInfos extends 
   long getLastGlobalFieldMapVersion() {
     return lastGlobalFieldMapVersion;
   }
+  
+  /** applies all changes caused by committing a merge to this SegmentInfos */
+  void applyMergeChanges(MergePolicy.OneMerge merge, boolean dropSegment) {
+    final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
+    boolean inserted = false;
+    int newSegIdx = 0;
+    for (int segIdx = 0, cnt = segments.size(); segIdx < cnt; segIdx++) {
+      assert segIdx >= newSegIdx;
+      final SegmentInfo info = segments.get(segIdx);
+      if (mergedAway.contains(info)) {
+        if (!inserted && !dropSegment) {
+          segments.set(segIdx, merge.info);
+          inserted = true;
+          newSegIdx++;
+        }
+      } else {
+        segments.set(newSegIdx, info);
+        newSegIdx++;
+      }
+    }
+
+    // Either we found place to insert segment, or, we did
+    // not, but only because all segments we merged became
+    // deleted while we are merging, in which case it should
+    // be the case that the new segment is also all deleted,
+    // we insert it at the beginning if it should not be dropped:
+    if (!inserted && !dropSegment) {
+      segments.add(0, merge.info);
+    }
+
+    // the rest of the segments in list are duplicates, so don't remove from map, only list!
+    segments.subList(newSegIdx, segments.size()).clear();
+    
+    // update the Set
+    if (!dropSegment) {
+      segmentSet.add(merge.info);
+    }
+    segmentSet.removeAll(mergedAway);
+    
+    assert segmentSet.size() == segments.size();
+  }
+
+  List<SegmentInfo> createBackupSegmentInfos(boolean cloneChildren) {
+    if (cloneChildren) {
+      final List<SegmentInfo> list = new ArrayList<SegmentInfo>(size());
+      for(final SegmentInfo info : this) {
+        assert info.getSegmentCodecs() != null;
+        list.add((SegmentInfo) info.clone());
+      }
+      return list;
+    } else {
+      return new ArrayList<SegmentInfo>(segments);
+    }
+  }
+  
+  void rollbackSegmentInfos(List<SegmentInfo> infos) {
+    this.clear();
+    this.addAll(infos);
+  }
+  
+  /** Returns an <b>unmodifiable</b> {@link Iterator} of contained segments in order. */
+  // @Override (comment out until Java 6)
+  public Iterator<SegmentInfo> iterator() {
+    return asList().iterator();
+  }
+  
+  /** Returns all contained segments as an <b>unmodifiable</b> {@link List} view. */
+  public List<SegmentInfo> asList() {
+    if (cachedUnmodifiableList == null) {
+      cachedUnmodifiableList = Collections.unmodifiableList(segments);
+    }
+    return cachedUnmodifiableList;
+  }
+  
+  /** Returns all contained segments as an <b>unmodifiable</b> {@link Set} view.
+   * The iterator is not sorted, use {@link List} view or {@link #iterator} to get all segments in order. */
+  public Set<SegmentInfo> asSet() {
+    if (cachedUnmodifiableSet == null) {
+      cachedUnmodifiableSet = Collections.unmodifiableSet(segmentSet);
+    }
+    return cachedUnmodifiableSet;
+  }
+  
+  public int size() {
+    return segments.size();
+  }
+
+  public void add(SegmentInfo si) {
+    if (segmentSet.contains(si)) {
+      throw new IllegalStateException("Cannot add the same segment two times to this SegmentInfos instance");
+    }
+    segments.add(si);
+    segmentSet.add(si);
+    assert segmentSet.size() == segments.size();
+  }
+  
+  public void addAll(Iterable<SegmentInfo> sis) {
+    for (final SegmentInfo si : sis) {
+      this.add(si);
+    }
+  }
+  
+  public void clear() {
+    segments.clear();
+    segmentSet.clear();
+  }
+  
+  public void remove(SegmentInfo si) {
+    final int index = this.indexOf(si);
+    if (index >= 0) {
+      this.remove(index);
+    }
+  }
+  
+  public void remove(int index) {
+    segmentSet.remove(segments.remove(index));
+    assert segmentSet.size() == segments.size();
+  }
+  
+  public boolean contains(SegmentInfo si) {
+    return segmentSet.contains(si);
+  }
+
+  public int indexOf(SegmentInfo si) {
+    if (segmentSet.contains(si)) {
+      return segments.indexOf(si);
+    } else {
+      return -1;
+    }
+  }
+
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java Wed May 18 16:24:27 2011
@@ -251,9 +251,7 @@ public class TieredMergePolicy extends M
     final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
     final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
 
-    final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
-    infosSorted.addAll(infos);
-
+    final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>(infos.asList());
     Collections.sort(infosSorted, segmentByteSizeDescending);
 
     // Compute total index bytes & print details about the index

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java Wed May 18 16:24:27 2011
@@ -40,6 +40,11 @@ import java.util.Set;
   *  w.optimize();
   *  w.close();
   * </pre>
+  * <p><b>Warning:</b> This merge policy may reorder documents if the index was partially
+  * upgraded before calling optimize (e.g., documents were added). If your application relies
+  * on &quot;monotonicity&quot; of doc IDs (which means that the order in which the documents
+  * were added to the index is preserved), do a full optimize instead. Please note, the
+  * delegate {@code MergePolicy} may also reorder documents.
   * @lucene.experimental
   * @see IndexUpgrader
   */

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java Wed May 18 16:24:27 2011
@@ -200,6 +200,7 @@ public class VariableGapTermsIndexWriter
   private class FSTFieldWriter extends FieldWriter {
     private final Builder<Long> fstBuilder;
     private final PositiveIntOutputs fstOutputs;
+    private final long startTermsFilePointer;
 
     final FieldInfo fieldInfo;
     int numIndexTerms;
@@ -220,6 +221,7 @@ public class VariableGapTermsIndexWriter
 
       // Always put empty string in
       fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
+      startTermsFilePointer = termsFilePointer;
     }
 
     @Override
@@ -239,6 +241,11 @@ public class VariableGapTermsIndexWriter
 
     @Override
     public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
+      if (text.length == 0) {
+        // We already added empty string in ctor
+        assert termsFilePointer == startTermsFilePointer;
+        return;
+      }
       final int lengthSave = text.length;
       text.length = indexedTermPrefixLength(lastTerm, text);
       try {

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java Wed May 18 16:24:27 2011
@@ -61,9 +61,10 @@ public abstract class DocIdSetIterator {
   public abstract int nextDoc() throws IOException;
 
   /**
-   * Advances to the first beyond the current whose document number is greater
-   * than or equal to <i>target</i>. Returns the current document number or
-   * {@link #NO_MORE_DOCS} if there are no more docs in the set.
+   * Advances to the first beyond (see NOTE below) the current whose document
+   * number is greater than or equal to <i>target</i>. Returns the current
+   * document number or {@link #NO_MORE_DOCS} if there are no more docs in the
+   * set.
    * <p>
    * Behaves as if written:
    * 
@@ -78,7 +79,7 @@ public abstract class DocIdSetIterator {
    * 
    * Some implementations are considerably more efficient than that.
    * <p>
-   * <b>NOTE:</b> when <code> target &le; current</code> implementations may opt 
+   * <b>NOTE:</b> when <code> target &le; current</code> implementations may opt
    * not to advance beyond their current {@link #docID()}.
    * <p>
    * <b>NOTE:</b> this method may be called with {@link #NO_MORE_DOCS} for

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/QueryTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/QueryTermVector.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/QueryTermVector.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/QueryTermVector.java Wed May 18 16:24:27 2011
@@ -55,7 +55,12 @@ public class QueryTermVector implements 
   public QueryTermVector(String queryString, Analyzer analyzer) {    
     if (analyzer != null)
     {
-      TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
+      TokenStream stream;
+      try {
+        stream = analyzer.reusableTokenStream("", new StringReader(queryString));
+      } catch (IOException e1) {
+        stream = null;
+      }
       if (stream != null)
       {
         List<BytesRef> terms = new ArrayList<BytesRef>();

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java Wed May 18 16:24:27 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.util.HashMap;
+import java.util.HashSet;
 
 final class SloppyPhraseScorer extends PhraseScorer {
     private int slop;
@@ -109,8 +109,14 @@ final class SloppyPhraseScorer extends P
 
     /**
      * Init PhrasePositions in place.
-     * There is a one time initialization for this scorer:
+     * There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
      * <br>- Put in repeats[] each pp that has another pp with same position in the doc.
+     *       This relies on that the position in PP is computed as (TP.position - offset) and 
+     *       so by adding offset we actually compare positions and identify that the two are 
+     *       the same term.
+     *       An exclusion to this is two distinct terms in the same offset in query and same 
+     *       position in doc. This case is detected by comparing just the (query) offsets, 
+     *       and two such PPs are not considered "repeating". 
      * <br>- Also mark each such pp by pp.repeats = true.
      * <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
      * In particular, this allows to score queries with no repetitions with no overhead due to this computation.
@@ -145,23 +151,26 @@ final class SloppyPhraseScorer extends P
         if (!checkedRepeats) {
             checkedRepeats = true;
             // check for repeats
-            HashMap<PhrasePositions, Object> m = null;
+            HashSet<PhrasePositions> m = null;
             for (PhrasePositions pp = first; pp != null; pp = pp.next) {
                 int tpPos = pp.position + pp.offset;
                 for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
+                    if (pp.offset == pp2.offset) {
+                      continue; // not a repetition: the two PPs are originally in same offset in the query! 
+                    }
                     int tpPos2 = pp2.position + pp2.offset;
                     if (tpPos2 == tpPos) { 
                         if (m == null)
-                            m = new HashMap<PhrasePositions, Object>();
+                            m = new HashSet<PhrasePositions>();
                         pp.repeats = true;
                         pp2.repeats = true;
-                        m.put(pp,null);
-                        m.put(pp2,null);
+                        m.add(pp);
+                        m.add(pp2);
                     }
                 }
             }
             if (m!=null)
-                repeats = m.keySet().toArray(new PhrasePositions[0]);
+                repeats = m.toArray(new PhrasePositions[0]);
         }
         
         // with repeats must advance some repeating pp's so they all start with differing tp's       
@@ -204,11 +213,16 @@ final class SloppyPhraseScorer extends P
         int tpPos = pp.position + pp.offset;
         for (int i = 0; i < repeats.length; i++) {
             PhrasePositions pp2 = repeats[i];
-            if (pp2 == pp)
+            if (pp2 == pp) {
                 continue;
+            }
+            if (pp.offset == pp2.offset) {
+              continue; // not a repetition: the two PPs are originally in same offset in the query! 
+            }
             int tpPos2 = pp2.position + pp2.offset;
-            if (tpPos2 == tpPos)
+            if (tpPos2 == tpPos) {
                 return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset.
+            }
         }
         return null; 
     }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java Wed May 18 16:24:27 2011
@@ -172,7 +172,7 @@ public class NIOFSDirectory extends FSDi
         final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
               "OutOfMemoryError likely caused by the Sun VM Bug described in "
               + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
-              + "with a a value smaller than the current chunk size (" + chunkSize + ")");
+              + "with a value smaller than the current chunk size (" + chunkSize + ")");
         outOfMemoryError.initCause(e);
         throw outOfMemoryError;
       }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java Wed May 18 16:24:27 2011
@@ -125,7 +125,7 @@ public class SimpleFSDirectory extends F
           final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
               "OutOfMemoryError likely caused by the Sun VM Bug described in "
               + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
-              + "with a value smaller than the current chunks size (" + chunkSize + ")");
+              + "with a value smaller than the current chunk size (" + chunkSize + ")");
           outOfMemoryError.initCause(e);
           throw outOfMemoryError;
         }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/BytesRef.java Wed May 18 16:24:27 2011
@@ -20,9 +20,6 @@ package org.apache.lucene.util;
 import java.util.Comparator;
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
-import java.io.ObjectInput;
-import java.io.ObjectOutput;
-import java.io.IOException;
 
 /** Represents byte[], as a slice (offset + length) into an
  *  existing byte[].
@@ -193,6 +190,9 @@ public final class BytesRef implements C
 
   @Override
   public boolean equals(Object other) {
+    if (other == null) {
+      return false;
+    }
     return this.bytesEquals((BytesRef) other);
   }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/StringHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/StringHelper.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/StringHelper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/StringHelper.java Wed May 18 16:24:27 2011
@@ -1,5 +1,8 @@
 package org.apache.lucene.util;
 
+import java.util.Comparator;
+import java.util.StringTokenizer;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -54,4 +57,42 @@ public abstract class StringHelper {
 
   private StringHelper() {
   }
+  
+  /**
+   * @return a Comparator over versioned strings such as X.YY.Z
+   * @lucene.internal
+   */
+  public static Comparator<String> getVersionComparator() {
+    return versionComparator;
+  }
+  
+  private static Comparator<String> versionComparator = new Comparator<String>() {
+    public int compare(String a, String b) {
+      StringTokenizer aTokens = new StringTokenizer(a, ".");
+      StringTokenizer bTokens = new StringTokenizer(b, ".");
+      
+      while (aTokens.hasMoreTokens()) {
+        int aToken = Integer.parseInt(aTokens.nextToken());
+        if (bTokens.hasMoreTokens()) {
+          int bToken = Integer.parseInt(bTokens.nextToken());
+          if (aToken != bToken) {
+            return aToken - bToken;
+          }
+        } else {
+          // a has some extra trailing tokens. if these are all zeroes, thats ok.
+          if (aToken != 0) {
+            return 1; 
+          }
+        }
+      }
+      
+      // b has some extra trailing tokens. if these are all zeroes, thats ok.
+      while (bTokens.hasMoreTokens()) {
+        if (Integer.parseInt(bTokens.nextToken()) != 0)
+          return -1;
+      }
+      
+      return 0;
+    }
+  };
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java Wed May 18 16:24:27 2011
@@ -143,13 +143,16 @@ public class LevenshteinAutomata {
       if (dest >= 0)
         for (int r = 0; r < numRanges; r++)
           states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));      
-      // reduce the state: this doesn't appear to help anything
-      //states[k].reduce();
     }
 
     Automaton a = new Automaton(states[0]);
     a.setDeterministic(true);
-    a.setNumberedStates(states);
+    // we create some useless unconnected states, and its a net-win overall to remove these,
+    // as well as to combine any adjacent transitions (it makes later algorithms more efficient).
+    // so, while we could set our numberedStates here, its actually best not to, and instead to
+    // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
+    //a.setNumberedStates(states);
+    a.reduce();
     // we need not trim transitions to dead states, as they are not created.
     //a.restoreInvariant();
     return a;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java Wed May 18 16:24:27 2011
@@ -30,6 +30,8 @@
 package org.apache.lucene.util.automaton;
 
 import java.util.BitSet;
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.LinkedList;
 
 /**
@@ -72,8 +74,12 @@ final public class MinimizationOperation
     final int[] sigma = a.getStartPoints();
     final State[] states = a.getNumberedStates();
     final int sigmaLen = sigma.length, statesLen = states.length;
-    final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
-    final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
+    @SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
+      (ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
+    @SuppressWarnings("unchecked") final HashSet<State>[] partition =
+      (HashSet<State>[]) new HashSet[statesLen];
+    @SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
+      (ArrayList<State>[]) new ArrayList[statesLen];
     final int[] block = new int[statesLen];
     final StateList[][] active = new StateList[statesLen][sigmaLen];
     final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
@@ -82,8 +88,8 @@ final public class MinimizationOperation
     final BitSet split = new BitSet(statesLen), 
       refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
     for (int q = 0; q < statesLen; q++) {
-      splitblock[q] = new BitSet(statesLen);
-      partition[q] = new BitSet(statesLen);
+      splitblock[q] = new ArrayList<State>();
+      partition[q] = new HashSet<State>();
       for (int x = 0; x < sigmaLen; x++) {
         active[q][x] = new StateList();
       }
@@ -92,23 +98,22 @@ final public class MinimizationOperation
     for (int q = 0; q < statesLen; q++) {
       final State qq = states[q];
       final int j = qq.accept ? 0 : 1;
-      partition[j].set(q);
+      partition[j].add(qq);
       block[q] = j;
       for (int x = 0; x < sigmaLen; x++) {
-        final BitSet[] r =
+        final ArrayList<State>[] r =
           reverse[qq.step(sigma[x]).number];
         if (r[x] == null)
-          r[x] = new BitSet();
-        r[x].set(q);
+          r[x] = new ArrayList<State>();
+        r[x].add(qq);
       }
     }
     // initialize active sets
     for (int j = 0; j <= 1; j++) {
-      final BitSet part = partition[j];
       for (int x = 0; x < sigmaLen; x++) {
-        for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
-          if (reverse[i][x] != null)
-            active2[i][x] = active[j][x].add(states[i]);
+        for (final State qq : partition[j]) {
+          if (reverse[qq.number][x] != null)
+            active2[qq.number][x] = active[j][x].add(qq);
         }
       }
     }
@@ -121,18 +126,19 @@ final public class MinimizationOperation
     // process pending until fixed point
     int k = 2;
     while (!pending.isEmpty()) {
-      IntPair ip = pending.removeFirst();
+      final IntPair ip = pending.removeFirst();
       final int p = ip.n1;
       final int x = ip.n2;
       pending2.clear(x*statesLen + p);
       // find states that need to be split off their blocks
       for (StateListNode m = active[p][x].first; m != null; m = m.next) {
-        final BitSet r = reverse[m.q.number][x];
-        if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
+        final ArrayList<State> r = reverse[m.q.number][x];
+        if (r != null) for (final State s : r) {
+          final int i = s.number;
           if (!split.get(i)) {
             split.set(i);
             final int j = block[i];
-            splitblock[j].set(i);
+            splitblock[j].add(s);
             if (!refine2.get(j)) {
               refine2.set(j);
               refine.set(j);
@@ -142,18 +148,19 @@ final public class MinimizationOperation
       }
       // refine blocks
       for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
-        final BitSet sb = splitblock[j];
-        if (sb.cardinality() < partition[j].cardinality()) {
-          final BitSet b1 = partition[j], b2 = partition[k];
-          for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
-            b1.clear(i);
-            b2.set(i);
-            block[i] = k;
+        final ArrayList<State> sb = splitblock[j];
+        if (sb.size() < partition[j].size()) {
+          final HashSet<State> b1 = partition[j];
+          final HashSet<State> b2 = partition[k];
+          for (final State s : sb) {
+            b1.remove(s);
+            b2.add(s);
+            block[s.number] = k;
             for (int c = 0; c < sigmaLen; c++) {
-              final StateListNode sn = active2[i][c];
+              final StateListNode sn = active2[s.number][c];
               if (sn != null && sn.sl == active[j][c]) {
                 sn.remove();
-                active2[i][c] = active[k][c].add(states[i]);
+                active2[s.number][c] = active[k][c].add(s);
               }
             }
           }
@@ -173,8 +180,8 @@ final public class MinimizationOperation
           k++;
         }
         refine2.clear(j);
-        for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
-          split.clear(i);
+        for (final State s : sb)
+          split.clear(s.number);
         sb.clear();
       }
       refine.clear();
@@ -184,9 +191,7 @@ final public class MinimizationOperation
     for (int n = 0; n < newstates.length; n++) {
       final State s = new State();
       newstates[n] = s;
-      BitSet part = partition[n];
-      for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
-        final State q = states[i];
+      for (State q : partition[n]) {
         if (q == a.initial) a.initial = s;
         s.accept = q.accept;
         s.number = q.number; // select representative

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Wed May 18 16:24:27 2011
@@ -232,9 +232,7 @@ public class FST<T> {
 
   void setEmptyOutput(T v) throws IOException {
     if (emptyOutput != null) {
-      if (!emptyOutput.equals(v)) {
-        emptyOutput = outputs.merge(emptyOutput, v);
-      }
+      emptyOutput = outputs.merge(emptyOutput, v);
     } else {
       emptyOutput = v;
     }

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Wed May 18 16:24:27 2011
@@ -100,7 +100,7 @@ public class MockTokenizer extends Token
           endOffset = off;
           cp = readCodePoint();
         } while (cp >= 0 && isTokenChar(cp));
-        offsetAtt.setOffset(startOffset, endOffset);
+        offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
         streamState = State.INCREMENT;
         return true;
       }

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java Wed May 18 16:24:27 2011
@@ -42,14 +42,13 @@ public class MockRandomMergePolicy exten
 
     if (segmentInfos.size() > 1 && random.nextInt(5) == 3) {
       
-      SegmentInfos segmentInfos2 = new SegmentInfos();
-      segmentInfos2.addAll(segmentInfos);
-      Collections.shuffle(segmentInfos2, random);
+      List<SegmentInfo> segments = new ArrayList<SegmentInfo>(segmentInfos.asList());
+      Collections.shuffle(segments, random);
 
       // TODO: sometimes make more than 1 merge?
       mergeSpec = new MergeSpecification();
       final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size());
-      mergeSpec.add(new OneMerge(segmentInfos2.range(0, segsToMerge)));
+      mergeSpec.add(new OneMerge(segments.subList(0, segsToMerge)));
     }
 
     return mergeSpec;

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Wed May 18 16:24:27 2011
@@ -171,8 +171,15 @@ public abstract class LuceneTestCase ext
   private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
 
   /** Used to track if setUp and tearDown are called correctly from subclasses */
-  private boolean setup;
+  private static State state = State.INITIAL;
 
+  private static enum State {
+    INITIAL, // no tests ran yet
+    SETUP,   // test has called setUp()
+    RANTEST, // test is running
+    TEARDOWN // test has called tearDown()
+  };
+  
   /**
    * Some tests expect the directory to contain a single segment, and want to do tests on that segment's reader.
    * This is an utility method to help them.
@@ -326,6 +333,7 @@ public abstract class LuceneTestCase ext
 
   @BeforeClass
   public static void beforeClassLuceneTestCaseJ4() {
+    state = State.INITIAL;
     staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
     random.setSeed(staticSeed);
     tempDirs.clear();
@@ -375,6 +383,11 @@ public abstract class LuceneTestCase ext
 
   @AfterClass
   public static void afterClassLuceneTestCaseJ4() {
+    if (!testsFailed) {
+      assertTrue("ensure your setUp() calls super.setUp() and your tearDown() calls super.tearDown()!!!", 
+          state == State.INITIAL || state == State.TEARDOWN);
+    }
+    state = State.INITIAL;
     if (! "false".equals(TEST_CLEAN_THREADS)) {
       int rogueThreads = threadCleanup("test class");
       if (rogueThreads > 0) {
@@ -483,17 +496,22 @@ public abstract class LuceneTestCase ext
     public void starting(FrameworkMethod method) {
       // set current method name for logging
       LuceneTestCase.this.name = method.getName();
+      if (!testsFailed) {
+        assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.SETUP);
+      }
+      state = State.RANTEST;
       super.starting(method);
     }
-
   };
 
   @Before
   public void setUp() throws Exception {
     seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2;
     random.setSeed(seed);
-    assertFalse("ensure your tearDown() calls super.tearDown()!!!", setup);
-    setup = true;
+    if (!testsFailed) {
+      assertTrue("ensure your tearDown() calls super.tearDown()!!!", (state == State.INITIAL || state == State.TEARDOWN));
+    }
+    state = State.SETUP;
     savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
     Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
       public void uncaughtException(Thread t, Throwable e) {
@@ -529,8 +547,12 @@ public abstract class LuceneTestCase ext
 
   @After
   public void tearDown() throws Exception {
-    assertTrue("ensure your setUp() calls super.setUp()!!!", setup);
-    setup = false;
+    if (!testsFailed) {
+      // Note: we allow a test to go straight from SETUP -> TEARDOWN (without ever entering the RANTEST state)
+      // because if you assume() inside setUp(), it skips the test and the TestWatchman has no way to know...
+      assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.RANTEST || state == State.SETUP);
+    }
+    state = State.TEARDOWN;
     BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
     if ("perMethod".equals(TEST_CLEAN_THREADS)) {
       int rogueThreads = threadCleanup("test method: '" + getName() + "'");