You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/18 18:24:34 UTC
svn commit: r1124321 [2/5] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/
dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contrib...
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Wed May 18 16:24:27 2011
@@ -733,8 +733,7 @@ class DirectoryReader extends IndexReade
// case we have to roll back:
startCommit();
- final SegmentInfos rollbackSegmentInfos = new SegmentInfos();
- rollbackSegmentInfos.addAll(segmentInfos);
+ final List<SegmentInfo> rollbackSegments = segmentInfos.createBackupSegmentInfos(false);
boolean success = false;
try {
@@ -766,8 +765,7 @@ class DirectoryReader extends IndexReade
deleter.refresh();
// Restore all SegmentInfos (in case we pruned some)
- segmentInfos.clear();
- segmentInfos.addAll(rollbackSegmentInfos);
+ segmentInfos.rollbackSegmentInfos(rollbackSegments);
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Wed May 18 16:24:27 2011
@@ -126,7 +126,6 @@ final class DocumentsWriter {
final DocumentsWriterPerThreadPool perThreadPool;
final FlushPolicy flushPolicy;
final DocumentsWriterFlushControl flushControl;
- final Healthiness healthiness;
DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers,
BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory;
@@ -142,10 +141,7 @@ final class DocumentsWriter {
flushPolicy = configuredPolicy;
}
flushPolicy.init(this);
-
- healthiness = new Healthiness();
- final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
- flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT);
+ flushControl = new DocumentsWriterFlushControl(this, config );
}
synchronized void deleteQueries(final Query... queries) throws IOException {
@@ -283,31 +279,28 @@ final class DocumentsWriter {
ensureOpen();
boolean maybeMerge = false;
final boolean isUpdate = delTerm != null;
- if (healthiness.anyStalledThreads()) {
-
- // Help out flushing any pending DWPTs so we can un-stall:
+ if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
+ // Help out flushing any queued DWPTs so we can un-stall:
if (infoStream != null) {
- message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)");
+ message("DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
}
-
- // Try pick up pending threads here if possible
- DocumentsWriterPerThread flushingDWPT;
- while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
- // Don't push the delete here since the update could fail!
- maybeMerge = doFlush(flushingDWPT);
- if (!healthiness.anyStalledThreads()) {
- break;
+ do {
+ // Try pick up pending threads here if possible
+ DocumentsWriterPerThread flushingDWPT;
+ while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
+ // Don't push the delete here since the update could fail!
+ maybeMerge |= doFlush(flushingDWPT);
}
- }
-
- if (infoStream != null && healthiness.anyStalledThreads()) {
- message("WARNING DocumentsWriter still has stalled threads; waiting");
- }
-
- healthiness.waitIfStalled(); // block if stalled
+
+ if (infoStream != null && flushControl.anyStalledThreads()) {
+ message("WARNING DocumentsWriter has stalled threads; waiting");
+ }
+
+ flushControl.waitIfStalled(); // block if stalled
+ } while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing
- if (infoStream != null && healthiness.anyStalledThreads()) {
- message("WARNING DocumentsWriter done waiting");
+ if (infoStream != null) {
+ message("continue indexing after helpling out flushing DocumentsWriter is healthy");
}
}
@@ -353,7 +346,6 @@ final class DocumentsWriter {
maybeMerge = true;
boolean success = false;
FlushTicket ticket = null;
-
try {
assert currentFullFlushDelQueue == null
|| flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
@@ -511,9 +503,7 @@ final class DocumentsWriter {
anythingFlushed |= doFlush(flushingDWPT);
}
// If a concurrent flush is still in flight wait for it
- while (flushControl.anyFlushing()) {
- flushControl.waitForFlush();
- }
+ flushControl.waitForFlush();
if (!anythingFlushed) { // apply deletes if we did not flush any document
synchronized (ticketQueue) {
ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java Wed May 18 16:24:27 2011
@@ -44,30 +44,32 @@ public final class DocumentsWriterFlushC
private long activeBytes = 0;
private long flushBytes = 0;
private volatile int numPending = 0;
- private volatile int numFlushing = 0;
final AtomicBoolean flushDeletes = new AtomicBoolean(false);
private boolean fullFlush = false;
- private Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
+ private final Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
// only for safety reasons if a DWPT is close to the RAM limit
- private Queue<DocumentsWriterPerThread> blockedFlushes = new LinkedList<DocumentsWriterPerThread>();
-
+ private final Queue<BlockedFlush> blockedFlushes = new LinkedList<BlockedFlush>();
+ double maxConfiguredRamBuffer = 0;
long peakActiveBytes = 0;// only with assert
long peakFlushBytes = 0;// only with assert
long peakNetBytes = 0;// only with assert
- private final Healthiness healthiness;
+ long peakDelta = 0; // only with assert
+ final DocumentsWriterStallControl stallControl;
private final DocumentsWriterPerThreadPool perThreadPool;
private final FlushPolicy flushPolicy;
private boolean closed = false;
private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>();
private final DocumentsWriter documentsWriter;
+ private final IndexWriterConfig config;
DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
- Healthiness healthiness, long hardMaxBytesPerDWPT) {
- this.healthiness = healthiness;
+ IndexWriterConfig config) {
+ this.stallControl = new DocumentsWriterStallControl();
this.perThreadPool = documentsWriter.perThreadPool;
this.flushPolicy = documentsWriter.flushPolicy;
- this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT;
+ this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
+ this.config = config;
this.documentsWriter = documentsWriter;
}
@@ -82,6 +84,24 @@ public final class DocumentsWriterFlushC
public synchronized long netBytes() {
return flushBytes + activeBytes;
}
+
+ long stallLimitBytes() {
+ final double maxRamMB = config.getRAMBufferSizeMB();
+ return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
+ }
+
+ private boolean assertMemory() {
+ final double maxRamMB = config.getRAMBufferSizeMB();
+ if (maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
+ // for this assert we must be tolerant to ram buffer changes!
+ maxConfiguredRamBuffer = Math.max(maxRamMB, maxConfiguredRamBuffer);
+ final long ram = flushBytes + activeBytes;
+ // take peakDelta into account - worst case is that all flushing, pending and blocked DWPT had maxMem and the last doc had the peakDelta
+ final long expected = (long)(2 * (maxConfiguredRamBuffer * 1024 * 1024)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
+ assert ram <= expected : "ram was " + ram + " expected: " + expected + " flush mem: " + flushBytes + " active: " + activeBytes ;
+ }
+ return true;
+ }
private void commitPerThreadBytes(ThreadState perThread) {
final long delta = perThread.perThread.bytesUsed()
@@ -105,53 +125,62 @@ public final class DocumentsWriterFlushC
peakActiveBytes = Math.max(peakActiveBytes, activeBytes);
peakFlushBytes = Math.max(peakFlushBytes, flushBytes);
peakNetBytes = Math.max(peakNetBytes, netBytes());
+ peakDelta = Math.max(peakDelta, delta);
+
return true;
}
synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread,
boolean isUpdate) {
- commitPerThreadBytes(perThread);
- if (!perThread.flushPending) {
- if (isUpdate) {
- flushPolicy.onUpdate(this, perThread);
- } else {
- flushPolicy.onInsert(this, perThread);
+ try {
+ commitPerThreadBytes(perThread);
+ if (!perThread.flushPending) {
+ if (isUpdate) {
+ flushPolicy.onUpdate(this, perThread);
+ } else {
+ flushPolicy.onInsert(this, perThread);
+ }
+ if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) {
+ // Safety check to prevent a single DWPT exceeding its RAM limit. This
+ // is super important since we can not address more than 2048 MB per DWPT
+ setFlushPending(perThread);
+ }
}
- if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) {
- // Safety check to prevent a single DWPT exceeding its RAM limit. This
- // is super important since we can not address more than 2048 MB per DWPT
- setFlushPending(perThread);
- if (fullFlush) {
- DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
- assert toBlock != null;
- blockedFlushes.add(toBlock);
+ final DocumentsWriterPerThread flushingDWPT;
+ if (fullFlush) {
+ if (perThread.flushPending) {
+ checkoutAndBlock(perThread);
+ flushingDWPT = nextPendingFlush();
+ } else {
+ flushingDWPT = null;
}
+ } else {
+ flushingDWPT = tryCheckoutForFlush(perThread);
}
+ return flushingDWPT;
+ } finally {
+ stallControl.updateStalled(this);
+ assert assertMemory();
}
- final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
- healthiness.updateStalled(this);
- return flushingDWPT;
+
+
}
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
assert flushingWriters.containsKey(dwpt);
try {
- numFlushing--;
Long bytes = flushingWriters.remove(dwpt);
flushBytes -= bytes.longValue();
perThreadPool.recycle(dwpt);
- healthiness.updateStalled(this);
+ stallControl.updateStalled(this);
+ assert assertMemory();
} finally {
notifyAll();
}
}
- public synchronized boolean anyFlushing() {
- return numFlushing != 0;
- }
-
public synchronized void waitForFlush() {
- if (numFlushing != 0) {
+ while (flushingWriters.size() != 0) {
try {
this.wait();
} catch (InterruptedException e) {
@@ -173,32 +202,51 @@ public final class DocumentsWriterFlushC
flushBytes += bytes;
activeBytes -= bytes;
numPending++; // write access synced
+ assert assertMemory();
} // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
}
synchronized void doOnAbort(ThreadState state) {
- if (state.flushPending) {
- flushBytes -= state.bytesUsed;
- } else {
- activeBytes -= state.bytesUsed;
+ try {
+ if (state.flushPending) {
+ flushBytes -= state.bytesUsed;
+ } else {
+ activeBytes -= state.bytesUsed;
+ }
+ assert assertMemory();
+ // Take it out of the loop this DWPT is stale
+ perThreadPool.replaceForFlush(state, closed);
+ }finally {
+ stallControl.updateStalled(this);
}
- // Take it out of the loop this DWPT is stale
- perThreadPool.replaceForFlush(state, closed);
- healthiness.updateStalled(this);
}
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
ThreadState perThread) {
- if (fullFlush) {
- return null;
+ return perThread.flushPending ? internalTryCheckOutForFlush(perThread) : null;
+ }
+
+ private void checkoutAndBlock(ThreadState perThread) {
+ perThread.lock();
+ try {
+ assert perThread.flushPending : "can not block non-pending threadstate";
+ assert fullFlush : "can not block if fullFlush == false";
+ final DocumentsWriterPerThread dwpt;
+ final long bytes = perThread.bytesUsed;
+ dwpt = perThreadPool.replaceForFlush(perThread, closed);
+ numPending--;
+ blockedFlushes.add(new BlockedFlush(dwpt, bytes));
+ }finally {
+ perThread.unlock();
}
- return internalTryCheckOutForFlush(perThread);
}
private DocumentsWriterPerThread internalTryCheckOutForFlush(
ThreadState perThread) {
- if (perThread.flushPending) {
+ assert Thread.holdsLock(this);
+ assert perThread.flushPending;
+ try {
// We are pending so all memory is already moved to flushBytes
if (perThread.tryLock()) {
try {
@@ -212,15 +260,16 @@ public final class DocumentsWriterFlushC
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
flushingWriters.put(dwpt, Long.valueOf(bytes));
numPending--; // write access synced
- numFlushing++;
return dwpt;
}
} finally {
perThread.unlock();
}
}
+ return null;
+ } finally {
+ stallControl.updateStalled(this);
}
- return null;
}
@Override
@@ -231,12 +280,13 @@ public final class DocumentsWriterFlushC
DocumentsWriterPerThread nextPendingFlush() {
synchronized (this) {
- DocumentsWriterPerThread poll = flushQueue.poll();
- if (poll != null) {
+ final DocumentsWriterPerThread poll;
+ if ((poll = flushQueue.poll()) != null) {
+ stallControl.updateStalled(this);
return poll;
- }
+ }
}
- if (numPending > 0) {
+ if (numPending > 0 && !fullFlush) { // don't check if we are doing a full flush
final Iterator<ThreadState> allActiveThreads = perThreadPool
.getActivePerThreadsIterator();
while (allActiveThreads.hasNext() && numPending > 0) {
@@ -276,8 +326,8 @@ public final class DocumentsWriterFlushC
return documentsWriter.deleteQueue.numGlobalTermDeletes();
}
- int numFlushingDWPT() {
- return numFlushing;
+ synchronized int numFlushingDWPT() {
+ return flushingWriters.size();
}
public boolean doApplyAllDeletes() {
@@ -289,7 +339,7 @@ public final class DocumentsWriterFlushC
}
int numActiveDWPT() {
- return this.perThreadPool.getMaxThreadStates();
+ return this.perThreadPool.getActiveThreadState();
}
void markForFullFlush() {
@@ -331,11 +381,11 @@ public final class DocumentsWriterFlushC
if (!next.flushPending) {
setFlushPending(next);
}
+ final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
+ assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
+ assert dwpt == flushingDWPT : "flushControl returned different DWPT";
+ toFlush.add(flushingDWPT);
}
- final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
- assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
- assert dwpt == flushingDWPT : "flushControl returned different DWPT";
- toFlush.add(flushingDWPT);
} else {
// get the new delete queue from DW
next.perThread.initialize();
@@ -345,31 +395,54 @@ public final class DocumentsWriterFlushC
}
}
synchronized (this) {
- assert assertBlockedFlushes(flushingQueue);
- flushQueue.addAll(blockedFlushes);
- blockedFlushes.clear();
+ /* make sure we move all DWPT that are where concurrently marked as
+ * pending and moved to blocked are moved over to the flushQueue. There is
+ * a chance that this happens since we marking DWPT for full flush without
+ * blocking indexing.*/
+ pruneBlockedQueue(flushingQueue);
+ assert assertBlockedFlushes(documentsWriter.deleteQueue);
flushQueue.addAll(toFlush);
+ stallControl.updateStalled(this);
+ }
+ }
+
+ /**
+ * Prunes the blockedQueue by removing all DWPT that are associated with the given flush queue.
+ */
+ private void pruneBlockedQueue(final DocumentsWriterDeleteQueue flushingQueue) {
+ Iterator<BlockedFlush> iterator = blockedFlushes.iterator();
+ while (iterator.hasNext()) {
+ BlockedFlush blockedFlush = iterator.next();
+ if (blockedFlush.dwpt.deleteQueue == flushingQueue) {
+ iterator.remove();
+ assert !flushingWriters.containsKey(blockedFlush.dwpt) : "DWPT is already flushing";
+ // Record the flushing DWPT to reduce flushBytes in doAfterFlush
+ flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
+ // don't decr pending here - its already done when DWPT is blocked
+ flushQueue.add(blockedFlush.dwpt);
+ }
}
}
synchronized void finishFullFlush() {
assert fullFlush;
assert flushQueue.isEmpty();
+ assert flushingWriters.isEmpty();
try {
if (!blockedFlushes.isEmpty()) {
assert assertBlockedFlushes(documentsWriter.deleteQueue);
- flushQueue.addAll(blockedFlushes);
- blockedFlushes.clear();
+ pruneBlockedQueue(documentsWriter.deleteQueue);
+ assert blockedFlushes.isEmpty();
}
} finally {
fullFlush = false;
+ stallControl.updateStalled(this);
}
}
boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) {
- Queue<DocumentsWriterPerThread> flushes = this.blockedFlushes;
- for (DocumentsWriterPerThread documentsWriterPerThread : flushes) {
- assert documentsWriterPerThread.deleteQueue == flushingQueue;
+ for (BlockedFlush blockedFlush : blockedFlushes) {
+ assert blockedFlush.dwpt.deleteQueue == flushingQueue;
}
return true;
}
@@ -379,18 +452,65 @@ public final class DocumentsWriterFlushC
for (DocumentsWriterPerThread dwpt : flushQueue) {
doAfterFlush(dwpt);
}
- for (DocumentsWriterPerThread dwpt : blockedFlushes) {
- doAfterFlush(dwpt);
+ for (BlockedFlush blockedFlush : blockedFlushes) {
+ flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
+ doAfterFlush(blockedFlush.dwpt);
}
-
} finally {
fullFlush = false;
flushQueue.clear();
blockedFlushes.clear();
+ stallControl.updateStalled(this);
}
}
- synchronized boolean isFullFlush() {
+ /**
+ * Returns <code>true</code> if a full flush is currently running
+ */
+ synchronized boolean isFullFlush() { // used by assert
return fullFlush;
}
+
+ /**
+ * Returns the number of flushes that are already checked out but not yet
+ * actively flushing
+ */
+ synchronized int numQueuedFlushes() {
+ return flushQueue.size();
+ }
+
+ /**
+ * Returns the number of flushes that are checked out but not yet available
+ * for flushing. This only applies during a full flush if a DWPT needs
+ * flushing but must not be flushed until the full flush has finished.
+ */
+ synchronized int numBlockedFlushes() {
+ return blockedFlushes.size();
+ }
+
+ private static class BlockedFlush {
+ final DocumentsWriterPerThread dwpt;
+ final long bytes;
+ BlockedFlush(DocumentsWriterPerThread dwpt, long bytes) {
+ super();
+ this.dwpt = dwpt;
+ this.bytes = bytes;
+ }
+ }
+
+ /**
+ * This method will block if too many DWPT are currently flushing and no
+ * checked out DWPT are available
+ */
+ void waitIfStalled() {
+ stallControl.waitIfStalled();
+ }
+
+ /**
+ * Returns <code>true</code> iff stalled
+ */
+ boolean anyStalledThreads() {
+ return stallControl.anyStalledThreads();
+ }
+
}
\ No newline at end of file
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java Wed May 18 16:24:27 2011
@@ -165,6 +165,13 @@ public abstract class DocumentsWriterPer
public int getMaxThreadStates() {
return perThreads.length;
}
+
+ /**
+ * Returns the active number of {@link ThreadState} instances.
+ */
+ public int getActiveThreadState() {
+ return numThreadStatesActive;
+ }
/**
* Returns a new {@link ThreadState} iff any new state is available otherwise
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java Wed May 18 16:24:27 2011
@@ -40,7 +40,13 @@ import java.util.Collection;
* refuses to run by default. Specify {@code -delete-prior-commits}
* to override this, allowing the tool to delete all but the last commit.
* From Java code this can be enabled by passing {@code true} to
- * {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
+ * {@link #IndexUpgrader(Directory,Version,PrintStream,boolean)}.
+ * <p><b>Warning:</b> This tool may reorder documents if the index was partially
+ * upgraded before execution (e.g., documents were added). If your application relies
+ * on "monotonicity" of doc IDs (which means that the order in which the documents
+ * were added to the index is preserved), do a full optimize instead.
+ * The {@link MergePolicy} set by {@link IndexWriterConfig} may also reorder
+ * documents.
*/
public final class IndexUpgrader {
@@ -52,9 +58,11 @@ public final class IndexUpgrader {
System.err.println("reason, if the incoming index has more than one commit, the tool");
System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
System.err.println("this, allowing the tool to delete all but the last commit.");
+ System.err.println("WARNING: This tool may reorder document IDs!");
System.exit(1);
}
+ @SuppressWarnings("deprecation")
public static void main(String[] args) throws IOException {
String dir = null;
boolean deletePriorCommits = false;
@@ -74,7 +82,7 @@ public final class IndexUpgrader {
printUsage();
}
- new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
+ new IndexUpgrader(FSDirectory.open(new File(dir)), Version.LUCENE_CURRENT, out, deletePriorCommits).upgrade();
}
private final Directory dir;
@@ -82,16 +90,22 @@ public final class IndexUpgrader {
private final IndexWriterConfig iwc;
private final boolean deletePriorCommits;
- @SuppressWarnings("deprecation")
- public IndexUpgrader(Directory dir) {
- this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
- }
-
- @SuppressWarnings("deprecation")
- public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
- this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
- }
-
+ /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
+ * {@code matchVersion}. The tool refuses to upgrade indexes with multiple commit points. */
+ public IndexUpgrader(Directory dir, Version matchVersion) {
+ this(dir, new IndexWriterConfig(matchVersion, null), null, false);
+ }
+
+ /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
+ * {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit points by removing
+ * all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
+ public IndexUpgrader(Directory dir, Version matchVersion, PrintStream infoStream, boolean deletePriorCommits) {
+ this(dir, new IndexWriterConfig(matchVersion, null), infoStream, deletePriorCommits);
+ }
+
+ /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
+ * config. You have the possibility to upgrade indexes with multiple commit points by removing
+ * all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
this.dir = dir;
this.iwc = iwc;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java Wed May 18 16:24:27 2011
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -221,7 +222,7 @@ public class IndexWriter implements Clos
private volatile long changeCount; // increments every time a change is completed
private long lastCommitChangeCount; // last changeCount that was committed
- private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
+ private List<SegmentInfo> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
volatile long pendingCommitChangeCount;
@@ -440,14 +441,14 @@ public class IndexWriter implements Clos
public synchronized boolean infoIsLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info);
assert idx != -1: "info=" + info + " isn't in pool";
- assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
+ assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
return true;
}
public synchronized SegmentInfo mapToLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info);
if (idx != -1) {
- info = segmentInfos.get(idx);
+ info = segmentInfos.info(idx);
}
return info;
}
@@ -818,7 +819,7 @@ public class IndexWriter implements Clos
}
}
- setRollbackSegmentInfos(segmentInfos);
+ rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
// start with previous field numbers, but new FieldInfos
globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
@@ -862,10 +863,6 @@ public class IndexWriter implements Clos
}
}
- private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
- rollbackSegmentInfos = (SegmentInfos) infos.clone();
- }
-
/**
* Returns the private {@link IndexWriterConfig}, cloned
* from the {@link IndexWriterConfig} passed to
@@ -1126,8 +1123,7 @@ public class IndexWriter implements Clos
else
count = 0;
- for (int i = 0; i < segmentInfos.size(); i++)
- count += segmentInfos.info(i).docCount;
+ count += segmentInfos.totalDocCount();
return count;
}
@@ -1144,8 +1140,7 @@ public class IndexWriter implements Clos
else
count = 0;
- for (int i = 0; i < segmentInfos.size(); i++) {
- final SegmentInfo info = segmentInfos.info(i);
+ for (final SegmentInfo info : segmentInfos) {
count += info.docCount - numDeletedDocs(info);
}
return count;
@@ -1159,9 +1154,11 @@ public class IndexWriter implements Clos
if (docWriter.anyDeletions()) {
return true;
}
- for (int i = 0; i < segmentInfos.size(); i++)
- if (segmentInfos.info(i).hasDeletions())
+ for (final SegmentInfo info : segmentInfos) {
+ if (info.hasDeletions()) {
return true;
+ }
+ }
return false;
}
@@ -1554,7 +1551,8 @@ public class IndexWriter implements Clos
synchronized(this) {
resetMergeExceptions();
- segmentsToOptimize = new HashSet<SegmentInfo>(segmentInfos);
+ segmentsToOptimize.clear();
+ segmentsToOptimize.addAll(segmentInfos.asSet());
optimizeMaxNumSegments = maxNumSegments;
// Now mark all pending & running merges as optimize
@@ -1778,7 +1776,7 @@ public class IndexWriter implements Clos
final MergePolicy.MergeSpecification spec;
if (optimize) {
- spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
+ spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize));
if (spec != null) {
final int numMerges = spec.merges.size();
@@ -1889,8 +1887,7 @@ public class IndexWriter implements Clos
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write
// once").
- segmentInfos.clear();
- segmentInfos.addAll(rollbackSegmentInfos);
+ segmentInfos.rollbackSegmentInfos(rollbackSegments);
docWriter.abort();
@@ -2555,7 +2552,7 @@ public class IndexWriter implements Clos
lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit);
segmentInfos.setUserData(pendingCommit.getUserData());
- setRollbackSegmentInfos(pendingCommit);
+ rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
deleter.checkpoint(pendingCommit, true);
} finally {
// Matches the incRef done in startCommit:
@@ -2660,7 +2657,7 @@ public class IndexWriter implements Clos
final synchronized void applyAllDeletes() throws IOException {
flushDeletesCount.incrementAndGet();
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
- .applyDeletes(readerPool, segmentInfos);
+ .applyDeletes(readerPool, segmentInfos.asList());
if (result.anyDeletes) {
checkpoint();
}
@@ -2709,7 +2706,7 @@ public class IndexWriter implements Clos
private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
for(SegmentInfo info : merge.segments) {
- if (segmentInfos.indexOf(info) == -1) {
+ if (!segmentInfos.contains(info)) {
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
}
}
@@ -2847,39 +2844,13 @@ public class IndexWriter implements Clos
message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
}
- final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
- int segIdx = 0;
- int newSegIdx = 0;
- boolean inserted = false;
- final int curSegCount = segmentInfos.size();
- while(segIdx < curSegCount) {
- final SegmentInfo info = segmentInfos.info(segIdx++);
- if (mergedAway.contains(info)) {
- if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
- segmentInfos.set(segIdx-1, merge.info);
- inserted = true;
- newSegIdx++;
- }
- } else {
- segmentInfos.set(newSegIdx++, info);
- }
- }
-
- // Either we found place to insert segment, or, we did
- // not, but only because all segments we merged became
- // deleted while we are merging, in which case it should
- // be the case that the new segment is also all deleted:
- if (!inserted) {
- assert allDeleted;
- if (keepFullyDeletedSegments) {
- segmentInfos.add(0, merge.info);
- } else {
- readerPool.drop(merge.info);
- }
+ final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
+ segmentInfos.applyMergeChanges(merge, dropSegment);
+
+ if (dropSegment) {
+ readerPool.drop(merge.info);
}
-
- segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
-
+
if (infoStream != null) {
message("after commit: " + segString());
}
@@ -3014,7 +2985,7 @@ public class IndexWriter implements Clos
if (mergingSegments.contains(info)) {
return false;
}
- if (segmentInfos.indexOf(info) == -1) {
+ if (!segmentInfos.contains(info)) {
return false;
}
if (info.dir != directory) {
@@ -3462,7 +3433,7 @@ public class IndexWriter implements Clos
}
// utility routines for tests
- SegmentInfo newestSegment() {
+ synchronized SegmentInfo newestSegment() {
return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
}
@@ -3472,19 +3443,18 @@ public class IndexWriter implements Clos
}
/** @lucene.internal */
- public synchronized String segString(List<SegmentInfo> infos) throws IOException {
- StringBuilder buffer = new StringBuilder();
- final int count = infos.size();
- for(int i = 0; i < count; i++) {
- if (i > 0) {
+ public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
+ final StringBuilder buffer = new StringBuilder();
+ for(final SegmentInfo s : infos) {
+ if (buffer.length() > 0) {
buffer.append(' ');
}
- buffer.append(segString(infos.get(i)));
+ buffer.append(segString(s));
}
-
return buffer.toString();
}
+ /** @lucene.internal */
public synchronized String segString(SegmentInfo info) throws IOException {
StringBuilder buffer = new StringBuilder();
SegmentReader reader = readerPool.getIfExists(info);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Wed May 18 16:24:27 2011
@@ -133,10 +133,15 @@ public final class IndexWriterConfig imp
/**
* Creates a new config that with defaults that match the specified
- * {@link Version} as well as the default {@link Analyzer}. {@link Version} is
- * a placeholder for future changes. The default settings are relevant to 3.1
- * and before. In the future, if different settings will apply to different
- * versions, they will be documented here.
+ * {@link Version} as well as the default {@link
+ * Analyzer}. If matchVersion is >= {@link
+ * Version#LUCENE_32}, {@link TieredMergePolicy} is used
+ * for merging; else {@link LogByteSizeMergePolicy}.
+ * Note that {@link TieredMergePolicy} is free to select
+ * non-contiguous merges, which means docIDs may not
+ * remain montonic over time. If this is a problem you
+ * should switch to {@link LogByteSizeMergePolicy} or
+ * {@link LogDocMergePolicy}.
*/
public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
this.matchVersion = matchVersion;
@@ -154,7 +159,11 @@ public final class IndexWriterConfig imp
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
mergedSegmentWarmer = null;
codecProvider = CodecProvider.getDefault();
- mergePolicy = new TieredMergePolicy();
+ if (matchVersion.onOrAfter(Version.LUCENE_32)) {
+ mergePolicy = new TieredMergePolicy();
+ } else {
+ mergePolicy = new LogByteSizeMergePolicy();
+ }
readerPooling = DEFAULT_READER_POOLING;
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed May 18 16:24:27 2011
@@ -242,6 +242,7 @@ public abstract class LogMergePolicy ext
private MergeSpecification findMergesForOptimizeSizeLimit(
SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification();
+ final List<SegmentInfo> segments = infos.asList();
int start = last - 1;
while (start >= 0) {
@@ -254,12 +255,12 @@ public abstract class LogMergePolicy ext
// unless there is only 1 which is optimized.
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
// there is more than 1 segment to the right of this one, or an unoptimized single segment.
- spec.add(new OneMerge(infos.range(start + 1, last)));
+ spec.add(new OneMerge(segments.subList(start + 1, last)));
}
last = start;
} else if (last - start == mergeFactor) {
// mergeFactor eligible segments were found, add them as a merge.
- spec.add(new OneMerge(infos.range(start, last)));
+ spec.add(new OneMerge(segments.subList(start, last)));
last = start;
}
--start;
@@ -267,7 +268,7 @@ public abstract class LogMergePolicy ext
// Add any left-over segments, unless there is just 1 already optimized.
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
- spec.add(new OneMerge(infos.range(start, last)));
+ spec.add(new OneMerge(segments.subList(start, last)));
}
return spec.merges.size() == 0 ? null : spec;
@@ -280,11 +281,12 @@ public abstract class LogMergePolicy ext
*/
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification();
+ final List<SegmentInfo> segments = infos.asList();
// First, enroll all "full" merges (size
// mergeFactor) to potentially be run concurrently:
while (last - maxNumSegments + 1 >= mergeFactor) {
- spec.add(new OneMerge(infos.range(last - mergeFactor, last)));
+ spec.add(new OneMerge(segments.subList(last - mergeFactor, last)));
last -= mergeFactor;
}
@@ -296,7 +298,7 @@ public abstract class LogMergePolicy ext
// Since we must optimize down to 1 segment, the
// choice is simple:
if (last > 1 || !isOptimized(infos.info(0))) {
- spec.add(new OneMerge(infos.range(0, last)));
+ spec.add(new OneMerge(segments.subList(0, last)));
}
} else if (last > maxNumSegments) {
@@ -325,7 +327,7 @@ public abstract class LogMergePolicy ext
}
}
- spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize)));
+ spec.add(new OneMerge(segments.subList(bestStart, bestStart + finalMergeSize)));
}
}
return spec.merges.size() == 0 ? null : spec;
@@ -412,7 +414,8 @@ public abstract class LogMergePolicy ext
@Override
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
throws CorruptIndexException, IOException {
- final int numSegments = segmentInfos.size();
+ final List<SegmentInfo> segments = segmentInfos.asList();
+ final int numSegments = segments.size();
if (verbose())
message("findMergesToExpungeDeletes: " + numSegments + " segments");
@@ -434,7 +437,7 @@ public abstract class LogMergePolicy ext
// deletions, so force a merge now:
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
- spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
+ spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = i;
}
} else if (firstSegmentWithDeletions != -1) {
@@ -443,7 +446,7 @@ public abstract class LogMergePolicy ext
// mergeFactor segments
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
- spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
+ spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = -1;
}
}
@@ -451,7 +454,7 @@ public abstract class LogMergePolicy ext
if (firstSegmentWithDeletions != -1) {
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
- spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments)));
+ spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
}
return spec;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Wed May 18 16:24:27 2011
@@ -72,7 +72,7 @@ public abstract class MergePolicy implem
long mergeGen; // used by IndexWriter
boolean isExternal; // used by IndexWriter
int maxNumSegmentsOptimize; // used by IndexWriter
- long estimatedMergeBytes; // used by IndexWriter
+ public long estimatedMergeBytes; // used by IndexWriter
List<SegmentReader> readers; // used by IndexWriter
List<SegmentReader> readerClones; // used by IndexWriter
public final List<SegmentInfo> segments;
@@ -84,7 +84,8 @@ public abstract class MergePolicy implem
public OneMerge(List<SegmentInfo> segments) {
if (0 == segments.size())
throw new RuntimeException("segments must include at least one segment");
- this.segments = segments;
+ // clone the list, as the in list may be based off original SegmentInfos and may be modified
+ this.segments = new ArrayList<SegmentInfo>(segments);
int count = 0;
for(SegmentInfo info : segments) {
count += info.docCount;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Wed May 18 16:24:27 2011
@@ -42,7 +42,7 @@ import org.apache.lucene.util.Constants;
*
* @lucene.experimental
*/
-public final class SegmentInfo {
+public final class SegmentInfo implements Cloneable {
// TODO: remove with hasVector and hasProx
private static final int CHECK_FIELDINFO = -2;
static final int NO = -1; // e.g. no norms; no deletes;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Wed May 18 16:24:27 2011
@@ -20,13 +20,16 @@ package org.apache.lucene.index;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
import java.util.Map;
-import java.util.Vector;
+import java.util.Set;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.codecs.CodecProvider;
@@ -45,7 +48,7 @@ import org.apache.lucene.util.ThreadInte
*
* @lucene.experimental
*/
-public final class SegmentInfos extends Vector<SegmentInfo> {
+public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
/*
* The file format version, a negative number.
@@ -84,7 +87,12 @@ public final class SegmentInfos extends
private int format;
private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand
-
+
+ private List<SegmentInfo> segments = new ArrayList<SegmentInfo>();
+ private Set<SegmentInfo> segmentSet = new HashSet<SegmentInfo>();
+ private transient List<SegmentInfo> cachedUnmodifiableList;
+ private transient Set<SegmentInfo> cachedUnmodifiableSet;
+
/**
* If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream.
@@ -107,8 +115,8 @@ public final class SegmentInfos extends
return format;
}
- public final SegmentInfo info(int i) {
- return get(i);
+ public SegmentInfo info(int i) {
+ return segments.get(i);
}
/**
@@ -237,7 +245,7 @@ public final class SegmentInfos extends
boolean success = false;
// Clear any previous segments:
- clear();
+ this.clear();
generation = generationFromSegmentsFileName(segmentFileName);
@@ -252,7 +260,7 @@ public final class SegmentInfos extends
if (!success) {
// Clear any segment infos we had loaded so we
// have a clean slate on retry:
- clear();
+ this.clear();
}
}
}
@@ -349,15 +357,14 @@ public final class SegmentInfos extends
/** Prunes any segment whose docs are all deleted. */
public void pruneDeletedSegments() {
- int segIdx = 0;
- while(segIdx < size()) {
- final SegmentInfo info = info(segIdx);
+ for(final Iterator<SegmentInfo> it = segments.iterator(); it.hasNext();) {
+ final SegmentInfo info = it.next();
if (info.getDelCount() == info.docCount) {
- remove(segIdx);
- } else {
- segIdx++;
+ it.remove();
+ segmentSet.remove(info);
}
}
+ assert segmentSet.size() == segments.size();
}
/**
@@ -367,14 +374,23 @@ public final class SegmentInfos extends
@Override
public Object clone() {
- SegmentInfos sis = (SegmentInfos) super.clone();
- for(int i=0;i<sis.size();i++) {
- final SegmentInfo info = sis.info(i);
- assert info.getSegmentCodecs() != null;
- sis.set(i, (SegmentInfo) info.clone());
+ try {
+ final SegmentInfos sis = (SegmentInfos) super.clone();
+ // deep clone, first recreate all collections:
+ sis.segments = new ArrayList<SegmentInfo>(size());
+ sis.segmentSet = new HashSet<SegmentInfo>(size());
+ sis.cachedUnmodifiableList = null;
+ sis.cachedUnmodifiableSet = null;
+ for(final SegmentInfo info : this) {
+ assert info.getSegmentCodecs() != null;
+ // dont directly access segments, use add method!!!
+ sis.add((SegmentInfo) info.clone());
+ }
+ sis.userData = new HashMap<String,String>(userData);
+ return sis;
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException("should not happen", e);
}
- sis.userData = new HashMap<String,String>(userData);
- return sis;
}
/**
@@ -742,18 +758,6 @@ public final class SegmentInfos extends
protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException;
}
- /**
- * Returns a new SegmentInfos containing the SegmentInfo
- * instances in the specified range first (inclusive) to
- * last (exclusive), so total number of segments returned
- * is last-first.
- */
- public SegmentInfos range(int first, int last) {
- SegmentInfos infos = new SegmentInfos(codecs);
- infos.addAll(super.subList(first, last));
- return infos;
- }
-
// Carry over generation numbers from another SegmentInfos
void updateGeneration(SegmentInfos other) {
lastGeneration = other.lastGeneration;
@@ -831,6 +835,10 @@ public final class SegmentInfos extends
} catch (Throwable t) {
// throw orig excp
}
+ } else {
+ // we must sync here explicitly since during a commit
+ // IW will not sync the global field map.
+ dir.sync(Collections.singleton(name));
}
}
return version;
@@ -956,7 +964,7 @@ public final class SegmentInfos extends
}
- public synchronized String toString(Directory directory) {
+ public String toString(Directory directory) {
StringBuilder buffer = new StringBuilder();
buffer.append(getCurrentSegmentFileName()).append(": ");
final int count = size();
@@ -987,8 +995,7 @@ public final class SegmentInfos extends
* remain write once.
*/
void replace(SegmentInfos other) {
- clear();
- addAll(other);
+ rollbackSegmentInfos(other.asList());
lastGeneration = other.lastGeneration;
lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
format = other.format;
@@ -1014,7 +1021,7 @@ public final class SegmentInfos extends
* Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
* If this {@link SegmentInfos} has no global field number map the returned instance is empty
*/
- synchronized FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
+ FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
if (globalFieldNumberMap != null) {
return globalFieldNumberMap;
}
@@ -1054,4 +1061,135 @@ public final class SegmentInfos extends
long getLastGlobalFieldMapVersion() {
return lastGlobalFieldMapVersion;
}
+
+ /** applies all changes caused by committing a merge to this SegmentInfos */
+ void applyMergeChanges(MergePolicy.OneMerge merge, boolean dropSegment) {
+ final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
+ boolean inserted = false;
+ int newSegIdx = 0;
+ for (int segIdx = 0, cnt = segments.size(); segIdx < cnt; segIdx++) {
+ assert segIdx >= newSegIdx;
+ final SegmentInfo info = segments.get(segIdx);
+ if (mergedAway.contains(info)) {
+ if (!inserted && !dropSegment) {
+ segments.set(segIdx, merge.info);
+ inserted = true;
+ newSegIdx++;
+ }
+ } else {
+ segments.set(newSegIdx, info);
+ newSegIdx++;
+ }
+ }
+
+ // Either we found place to insert segment, or, we did
+ // not, but only because all segments we merged became
+ // deleted while we are merging, in which case it should
+ // be the case that the new segment is also all deleted,
+ // we insert it at the beginning if it should not be dropped:
+ if (!inserted && !dropSegment) {
+ segments.add(0, merge.info);
+ }
+
+ // the rest of the segments in list are duplicates, so don't remove from map, only list!
+ segments.subList(newSegIdx, segments.size()).clear();
+
+ // update the Set
+ if (!dropSegment) {
+ segmentSet.add(merge.info);
+ }
+ segmentSet.removeAll(mergedAway);
+
+ assert segmentSet.size() == segments.size();
+ }
+
+ List<SegmentInfo> createBackupSegmentInfos(boolean cloneChildren) {
+ if (cloneChildren) {
+ final List<SegmentInfo> list = new ArrayList<SegmentInfo>(size());
+ for(final SegmentInfo info : this) {
+ assert info.getSegmentCodecs() != null;
+ list.add((SegmentInfo) info.clone());
+ }
+ return list;
+ } else {
+ return new ArrayList<SegmentInfo>(segments);
+ }
+ }
+
+ void rollbackSegmentInfos(List<SegmentInfo> infos) {
+ this.clear();
+ this.addAll(infos);
+ }
+
+ /** Returns an <b>unmodifiable</b> {@link Iterator} of contained segments in order. */
+ // @Override (comment out until Java 6)
+ public Iterator<SegmentInfo> iterator() {
+ return asList().iterator();
+ }
+
+ /** Returns all contained segments as an <b>unmodifiable</b> {@link List} view. */
+ public List<SegmentInfo> asList() {
+ if (cachedUnmodifiableList == null) {
+ cachedUnmodifiableList = Collections.unmodifiableList(segments);
+ }
+ return cachedUnmodifiableList;
+ }
+
+ /** Returns all contained segments as an <b>unmodifiable</b> {@link Set} view.
+ * The iterator is not sorted, use {@link List} view or {@link #iterator} to get all segments in order. */
+ public Set<SegmentInfo> asSet() {
+ if (cachedUnmodifiableSet == null) {
+ cachedUnmodifiableSet = Collections.unmodifiableSet(segmentSet);
+ }
+ return cachedUnmodifiableSet;
+ }
+
+ public int size() {
+ return segments.size();
+ }
+
+ public void add(SegmentInfo si) {
+ if (segmentSet.contains(si)) {
+ throw new IllegalStateException("Cannot add the same segment two times to this SegmentInfos instance");
+ }
+ segments.add(si);
+ segmentSet.add(si);
+ assert segmentSet.size() == segments.size();
+ }
+
+ public void addAll(Iterable<SegmentInfo> sis) {
+ for (final SegmentInfo si : sis) {
+ this.add(si);
+ }
+ }
+
+ public void clear() {
+ segments.clear();
+ segmentSet.clear();
+ }
+
+ public void remove(SegmentInfo si) {
+ final int index = this.indexOf(si);
+ if (index >= 0) {
+ this.remove(index);
+ }
+ }
+
+ public void remove(int index) {
+ segmentSet.remove(segments.remove(index));
+ assert segmentSet.size() == segments.size();
+ }
+
+ public boolean contains(SegmentInfo si) {
+ return segmentSet.contains(si);
+ }
+
+ public int indexOf(SegmentInfo si) {
+ if (segmentSet.contains(si)) {
+ return segments.indexOf(si);
+ } else {
+ return -1;
+ }
+ }
+
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java Wed May 18 16:24:27 2011
@@ -251,9 +251,7 @@ public class TieredMergePolicy extends M
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
- final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
- infosSorted.addAll(infos);
-
+ final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>(infos.asList());
Collections.sort(infosSorted, segmentByteSizeDescending);
// Compute total index bytes & print details about the index
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java Wed May 18 16:24:27 2011
@@ -40,6 +40,11 @@ import java.util.Set;
* w.optimize();
* w.close();
* </pre>
+ * <p><b>Warning:</b> This merge policy may reorder documents if the index was partially
+ * upgraded before calling optimize (e.g., documents were added). If your application relies
+ * on "monotonicity" of doc IDs (which means that the order in which the documents
+ * were added to the index is preserved), do a full optimize instead. Please note, the
+ * delegate {@code MergePolicy} may also reorder documents.
* @lucene.experimental
* @see IndexUpgrader
*/
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java Wed May 18 16:24:27 2011
@@ -200,6 +200,7 @@ public class VariableGapTermsIndexWriter
private class FSTFieldWriter extends FieldWriter {
private final Builder<Long> fstBuilder;
private final PositiveIntOutputs fstOutputs;
+ private final long startTermsFilePointer;
final FieldInfo fieldInfo;
int numIndexTerms;
@@ -220,6 +221,7 @@ public class VariableGapTermsIndexWriter
// Always put empty string in
fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
+ startTermsFilePointer = termsFilePointer;
}
@Override
@@ -239,6 +241,11 @@ public class VariableGapTermsIndexWriter
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
+ if (text.length == 0) {
+ // We already added empty string in ctor
+ assert termsFilePointer == startTermsFilePointer;
+ return;
+ }
final int lengthSave = text.length;
text.length = indexedTermPrefixLength(lastTerm, text);
try {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java Wed May 18 16:24:27 2011
@@ -61,9 +61,10 @@ public abstract class DocIdSetIterator {
public abstract int nextDoc() throws IOException;
/**
- * Advances to the first beyond the current whose document number is greater
- * than or equal to <i>target</i>. Returns the current document number or
- * {@link #NO_MORE_DOCS} if there are no more docs in the set.
+ * Advances to the first beyond (see NOTE below) the current whose document
+ * number is greater than or equal to <i>target</i>. Returns the current
+ * document number or {@link #NO_MORE_DOCS} if there are no more docs in the
+ * set.
* <p>
* Behaves as if written:
*
@@ -78,7 +79,7 @@ public abstract class DocIdSetIterator {
*
* Some implementations are considerably more efficient than that.
* <p>
- * <b>NOTE:</b> when <code> target ≤ current</code> implementations may opt
+ * <b>NOTE:</b> when <code> target ≤ current</code> implementations may opt
* not to advance beyond their current {@link #docID()}.
* <p>
* <b>NOTE:</b> this method may be called with {@link #NO_MORE_DOCS} for
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/QueryTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/QueryTermVector.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/QueryTermVector.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/QueryTermVector.java Wed May 18 16:24:27 2011
@@ -55,7 +55,12 @@ public class QueryTermVector implements
public QueryTermVector(String queryString, Analyzer analyzer) {
if (analyzer != null)
{
- TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
+ TokenStream stream;
+ try {
+ stream = analyzer.reusableTokenStream("", new StringReader(queryString));
+ } catch (IOException e1) {
+ stream = null;
+ }
if (stream != null)
{
List<BytesRef> terms = new ArrayList<BytesRef>();
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java Wed May 18 16:24:27 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.util.HashMap;
+import java.util.HashSet;
final class SloppyPhraseScorer extends PhraseScorer {
private int slop;
@@ -109,8 +109,14 @@ final class SloppyPhraseScorer extends P
/**
* Init PhrasePositions in place.
- * There is a one time initialization for this scorer:
+ * There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
* <br>- Put in repeats[] each pp that has another pp with same position in the doc.
+ * This relies on that the position in PP is computed as (TP.position - offset) and
+ * so by adding offset we actually compare positions and identify that the two are
+ * the same term.
+ * An exclusion to this is two distinct terms in the same offset in query and same
+ * position in doc. This case is detected by comparing just the (query) offsets,
+ * and two such PPs are not considered "repeating".
* <br>- Also mark each such pp by pp.repeats = true.
* <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
* In particular, this allows to score queries with no repetitions with no overhead due to this computation.
@@ -145,23 +151,26 @@ final class SloppyPhraseScorer extends P
if (!checkedRepeats) {
checkedRepeats = true;
// check for repeats
- HashMap<PhrasePositions, Object> m = null;
+ HashSet<PhrasePositions> m = null;
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
int tpPos = pp.position + pp.offset;
for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
+ if (pp.offset == pp2.offset) {
+ continue; // not a repetition: the two PPs are originally in same offset in the query!
+ }
int tpPos2 = pp2.position + pp2.offset;
if (tpPos2 == tpPos) {
if (m == null)
- m = new HashMap<PhrasePositions, Object>();
+ m = new HashSet<PhrasePositions>();
pp.repeats = true;
pp2.repeats = true;
- m.put(pp,null);
- m.put(pp2,null);
+ m.add(pp);
+ m.add(pp2);
}
}
}
if (m!=null)
- repeats = m.keySet().toArray(new PhrasePositions[0]);
+ repeats = m.toArray(new PhrasePositions[0]);
}
// with repeats must advance some repeating pp's so they all start with differing tp's
@@ -204,11 +213,16 @@ final class SloppyPhraseScorer extends P
int tpPos = pp.position + pp.offset;
for (int i = 0; i < repeats.length; i++) {
PhrasePositions pp2 = repeats[i];
- if (pp2 == pp)
+ if (pp2 == pp) {
continue;
+ }
+ if (pp.offset == pp2.offset) {
+ continue; // not a repetition: the two PPs are originally in same offset in the query!
+ }
int tpPos2 = pp2.position + pp2.offset;
- if (tpPos2 == tpPos)
+ if (tpPos2 == tpPos) {
return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset.
+ }
}
return null;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java Wed May 18 16:24:27 2011
@@ -172,7 +172,7 @@ public class NIOFSDirectory extends FSDi
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
- + "with a a value smaller than the current chunk size (" + chunkSize + ")");
+ + "with a value smaller than the current chunk size (" + chunkSize + ")");
outOfMemoryError.initCause(e);
throw outOfMemoryError;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java Wed May 18 16:24:27 2011
@@ -125,7 +125,7 @@ public class SimpleFSDirectory extends F
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
- + "with a value smaller than the current chunks size (" + chunkSize + ")");
+ + "with a value smaller than the current chunk size (" + chunkSize + ")");
outOfMemoryError.initCause(e);
throw outOfMemoryError;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/BytesRef.java Wed May 18 16:24:27 2011
@@ -20,9 +20,6 @@ package org.apache.lucene.util;
import java.util.Comparator;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
-import java.io.ObjectInput;
-import java.io.ObjectOutput;
-import java.io.IOException;
/** Represents byte[], as a slice (offset + length) into an
* existing byte[].
@@ -193,6 +190,9 @@ public final class BytesRef implements C
@Override
public boolean equals(Object other) {
+ if (other == null) {
+ return false;
+ }
return this.bytesEquals((BytesRef) other);
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/StringHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/StringHelper.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/StringHelper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/StringHelper.java Wed May 18 16:24:27 2011
@@ -1,5 +1,8 @@
package org.apache.lucene.util;
+import java.util.Comparator;
+import java.util.StringTokenizer;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -54,4 +57,42 @@ public abstract class StringHelper {
private StringHelper() {
}
+
+ /**
+ * @return a Comparator over versioned strings such as X.YY.Z
+ * @lucene.internal
+ */
+ public static Comparator<String> getVersionComparator() {
+ return versionComparator;
+ }
+
+ private static Comparator<String> versionComparator = new Comparator<String>() {
+ public int compare(String a, String b) {
+ StringTokenizer aTokens = new StringTokenizer(a, ".");
+ StringTokenizer bTokens = new StringTokenizer(b, ".");
+
+ while (aTokens.hasMoreTokens()) {
+ int aToken = Integer.parseInt(aTokens.nextToken());
+ if (bTokens.hasMoreTokens()) {
+ int bToken = Integer.parseInt(bTokens.nextToken());
+ if (aToken != bToken) {
+ return aToken - bToken;
+ }
+ } else {
+ // a has some extra trailing tokens. if these are all zeroes, thats ok.
+ if (aToken != 0) {
+ return 1;
+ }
+ }
+ }
+
+ // b has some extra trailing tokens. if these are all zeroes, thats ok.
+ while (bTokens.hasMoreTokens()) {
+ if (Integer.parseInt(bTokens.nextToken()) != 0)
+ return -1;
+ }
+
+ return 0;
+ }
+ };
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java Wed May 18 16:24:27 2011
@@ -143,13 +143,16 @@ public class LevenshteinAutomata {
if (dest >= 0)
for (int r = 0; r < numRanges; r++)
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
- // reduce the state: this doesn't appear to help anything
- //states[k].reduce();
}
Automaton a = new Automaton(states[0]);
a.setDeterministic(true);
- a.setNumberedStates(states);
+ // we create some useless unconnected states, and its a net-win overall to remove these,
+ // as well as to combine any adjacent transitions (it makes later algorithms more efficient).
+ // so, while we could set our numberedStates here, its actually best not to, and instead to
+ // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
+ //a.setNumberedStates(states);
+ a.reduce();
// we need not trim transitions to dead states, as they are not created.
//a.restoreInvariant();
return a;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java Wed May 18 16:24:27 2011
@@ -30,6 +30,8 @@
package org.apache.lucene.util.automaton;
import java.util.BitSet;
+import java.util.ArrayList;
+import java.util.HashSet;
import java.util.LinkedList;
/**
@@ -72,8 +74,12 @@ final public class MinimizationOperation
final int[] sigma = a.getStartPoints();
final State[] states = a.getNumberedStates();
final int sigmaLen = sigma.length, statesLen = states.length;
- final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
- final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
+ @SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
+ (ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
+ @SuppressWarnings("unchecked") final HashSet<State>[] partition =
+ (HashSet<State>[]) new HashSet[statesLen];
+ @SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
+ (ArrayList<State>[]) new ArrayList[statesLen];
final int[] block = new int[statesLen];
final StateList[][] active = new StateList[statesLen][sigmaLen];
final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
@@ -82,8 +88,8 @@ final public class MinimizationOperation
final BitSet split = new BitSet(statesLen),
refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
for (int q = 0; q < statesLen; q++) {
- splitblock[q] = new BitSet(statesLen);
- partition[q] = new BitSet(statesLen);
+ splitblock[q] = new ArrayList<State>();
+ partition[q] = new HashSet<State>();
for (int x = 0; x < sigmaLen; x++) {
active[q][x] = new StateList();
}
@@ -92,23 +98,22 @@ final public class MinimizationOperation
for (int q = 0; q < statesLen; q++) {
final State qq = states[q];
final int j = qq.accept ? 0 : 1;
- partition[j].set(q);
+ partition[j].add(qq);
block[q] = j;
for (int x = 0; x < sigmaLen; x++) {
- final BitSet[] r =
+ final ArrayList<State>[] r =
reverse[qq.step(sigma[x]).number];
if (r[x] == null)
- r[x] = new BitSet();
- r[x].set(q);
+ r[x] = new ArrayList<State>();
+ r[x].add(qq);
}
}
// initialize active sets
for (int j = 0; j <= 1; j++) {
- final BitSet part = partition[j];
for (int x = 0; x < sigmaLen; x++) {
- for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
- if (reverse[i][x] != null)
- active2[i][x] = active[j][x].add(states[i]);
+ for (final State qq : partition[j]) {
+ if (reverse[qq.number][x] != null)
+ active2[qq.number][x] = active[j][x].add(qq);
}
}
}
@@ -121,18 +126,19 @@ final public class MinimizationOperation
// process pending until fixed point
int k = 2;
while (!pending.isEmpty()) {
- IntPair ip = pending.removeFirst();
+ final IntPair ip = pending.removeFirst();
final int p = ip.n1;
final int x = ip.n2;
pending2.clear(x*statesLen + p);
// find states that need to be split off their blocks
for (StateListNode m = active[p][x].first; m != null; m = m.next) {
- final BitSet r = reverse[m.q.number][x];
- if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
+ final ArrayList<State> r = reverse[m.q.number][x];
+ if (r != null) for (final State s : r) {
+ final int i = s.number;
if (!split.get(i)) {
split.set(i);
final int j = block[i];
- splitblock[j].set(i);
+ splitblock[j].add(s);
if (!refine2.get(j)) {
refine2.set(j);
refine.set(j);
@@ -142,18 +148,19 @@ final public class MinimizationOperation
}
// refine blocks
for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
- final BitSet sb = splitblock[j];
- if (sb.cardinality() < partition[j].cardinality()) {
- final BitSet b1 = partition[j], b2 = partition[k];
- for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
- b1.clear(i);
- b2.set(i);
- block[i] = k;
+ final ArrayList<State> sb = splitblock[j];
+ if (sb.size() < partition[j].size()) {
+ final HashSet<State> b1 = partition[j];
+ final HashSet<State> b2 = partition[k];
+ for (final State s : sb) {
+ b1.remove(s);
+ b2.add(s);
+ block[s.number] = k;
for (int c = 0; c < sigmaLen; c++) {
- final StateListNode sn = active2[i][c];
+ final StateListNode sn = active2[s.number][c];
if (sn != null && sn.sl == active[j][c]) {
sn.remove();
- active2[i][c] = active[k][c].add(states[i]);
+ active2[s.number][c] = active[k][c].add(s);
}
}
}
@@ -173,8 +180,8 @@ final public class MinimizationOperation
k++;
}
refine2.clear(j);
- for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
- split.clear(i);
+ for (final State s : sb)
+ split.clear(s.number);
sb.clear();
}
refine.clear();
@@ -184,9 +191,7 @@ final public class MinimizationOperation
for (int n = 0; n < newstates.length; n++) {
final State s = new State();
newstates[n] = s;
- BitSet part = partition[n];
- for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
- final State q = states[i];
+ for (State q : partition[n]) {
if (q == a.initial) a.initial = s;
s.accept = q.accept;
s.number = q.number; // select representative
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Wed May 18 16:24:27 2011
@@ -232,9 +232,7 @@ public class FST<T> {
void setEmptyOutput(T v) throws IOException {
if (emptyOutput != null) {
- if (!emptyOutput.equals(v)) {
- emptyOutput = outputs.merge(emptyOutput, v);
- }
+ emptyOutput = outputs.merge(emptyOutput, v);
} else {
emptyOutput = v;
}
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Wed May 18 16:24:27 2011
@@ -100,7 +100,7 @@ public class MockTokenizer extends Token
endOffset = off;
cp = readCodePoint();
} while (cp >= 0 && isTokenChar(cp));
- offsetAtt.setOffset(startOffset, endOffset);
+ offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
streamState = State.INCREMENT;
return true;
}
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java Wed May 18 16:24:27 2011
@@ -42,14 +42,13 @@ public class MockRandomMergePolicy exten
if (segmentInfos.size() > 1 && random.nextInt(5) == 3) {
- SegmentInfos segmentInfos2 = new SegmentInfos();
- segmentInfos2.addAll(segmentInfos);
- Collections.shuffle(segmentInfos2, random);
+ List<SegmentInfo> segments = new ArrayList<SegmentInfo>(segmentInfos.asList());
+ Collections.shuffle(segments, random);
// TODO: sometimes make more than 1 merge?
mergeSpec = new MergeSpecification();
final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size());
- mergeSpec.add(new OneMerge(segmentInfos2.range(0, segsToMerge)));
+ mergeSpec.add(new OneMerge(segments.subList(0, segsToMerge)));
}
return mergeSpec;
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java?rev=1124321&r1=1124320&r2=1124321&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Wed May 18 16:24:27 2011
@@ -171,8 +171,15 @@ public abstract class LuceneTestCase ext
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
/** Used to track if setUp and tearDown are called correctly from subclasses */
- private boolean setup;
+ private static State state = State.INITIAL;
+ private static enum State {
+ INITIAL, // no tests ran yet
+ SETUP, // test has called setUp()
+ RANTEST, // test is running
+ TEARDOWN // test has called tearDown()
+ };
+
/**
* Some tests expect the directory to contain a single segment, and want to do tests on that segment's reader.
* This is an utility method to help them.
@@ -326,6 +333,7 @@ public abstract class LuceneTestCase ext
@BeforeClass
public static void beforeClassLuceneTestCaseJ4() {
+ state = State.INITIAL;
staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
random.setSeed(staticSeed);
tempDirs.clear();
@@ -375,6 +383,11 @@ public abstract class LuceneTestCase ext
@AfterClass
public static void afterClassLuceneTestCaseJ4() {
+ if (!testsFailed) {
+ assertTrue("ensure your setUp() calls super.setUp() and your tearDown() calls super.tearDown()!!!",
+ state == State.INITIAL || state == State.TEARDOWN);
+ }
+ state = State.INITIAL;
if (! "false".equals(TEST_CLEAN_THREADS)) {
int rogueThreads = threadCleanup("test class");
if (rogueThreads > 0) {
@@ -483,17 +496,22 @@ public abstract class LuceneTestCase ext
public void starting(FrameworkMethod method) {
// set current method name for logging
LuceneTestCase.this.name = method.getName();
+ if (!testsFailed) {
+ assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.SETUP);
+ }
+ state = State.RANTEST;
super.starting(method);
}
-
};
@Before
public void setUp() throws Exception {
seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2;
random.setSeed(seed);
- assertFalse("ensure your tearDown() calls super.tearDown()!!!", setup);
- setup = true;
+ if (!testsFailed) {
+ assertTrue("ensure your tearDown() calls super.tearDown()!!!", (state == State.INITIAL || state == State.TEARDOWN));
+ }
+ state = State.SETUP;
savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
public void uncaughtException(Thread t, Throwable e) {
@@ -529,8 +547,12 @@ public abstract class LuceneTestCase ext
@After
public void tearDown() throws Exception {
- assertTrue("ensure your setUp() calls super.setUp()!!!", setup);
- setup = false;
+ if (!testsFailed) {
+ // Note: we allow a test to go straight from SETUP -> TEARDOWN (without ever entering the RANTEST state)
+ // because if you assume() inside setUp(), it skips the test and the TestWatchman has no way to know...
+ assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.RANTEST || state == State.SETUP);
+ }
+ state = State.TEARDOWN;
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
if ("perMethod".equals(TEST_CLEAN_THREADS)) {
int rogueThreads = threadCleanup("test method: '" + getName() + "'");