You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/02 15:51:22 UTC
svn commit: r1098566 [7/22] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/db/bdb-je/
dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/lucene/contrib...
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Mon May 2 13:50:57 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.DocumentsWriter.IndexingChain;
+import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.IndexSearcher;
@@ -41,7 +41,7 @@ import org.apache.lucene.util.Version;
* IndexWriterConfig conf = new IndexWriterConfig(analyzer);
* conf.setter1().setter2();
* </pre>
- *
+ *
* @since 3.1
*/
public final class IndexWriterConfig implements Cloneable {
@@ -56,7 +56,7 @@ public final class IndexWriterConfig imp
* </ul>
*/
public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
-
+
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
@@ -77,23 +77,19 @@ public final class IndexWriterConfig imp
/**
* Default value for the write lock timeout (1,000 ms).
- *
+ *
* @see #setDefaultWriteLockTimeout(long)
*/
public static long WRITE_LOCK_TIMEOUT = 1000;
- /** The maximum number of simultaneous threads that may be
- * indexing documents at once in IndexWriter; if more
- * than this many threads arrive they will wait for
- * others to finish. */
- public final static int DEFAULT_MAX_THREAD_STATES = 8;
-
/** Default setting for {@link #setReaderPooling}. */
public final static boolean DEFAULT_READER_POOLING = false;
/** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */
public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR;
+ /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */
+ public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945;
/**
* Sets the default (for any instance) maximum time to wait for a write lock
* (in milliseconds).
@@ -105,7 +101,7 @@ public final class IndexWriterConfig imp
/**
* Returns the default write lock timeout for newly instantiated
* IndexWriterConfigs.
- *
+ *
* @see #setDefaultWriteLockTimeout(long)
*/
public static long getDefaultWriteLockTimeout() {
@@ -127,10 +123,12 @@ public final class IndexWriterConfig imp
private volatile IndexReaderWarmer mergedSegmentWarmer;
private volatile CodecProvider codecProvider;
private volatile MergePolicy mergePolicy;
- private volatile int maxThreadStates;
+ private volatile DocumentsWriterPerThreadPool indexerThreadPool;
private volatile boolean readerPooling;
private volatile int readerTermsIndexDivisor;
-
+ private volatile FlushPolicy flushPolicy;
+ private volatile int perThreadHardLimitMB;
+
private Version matchVersion;
/**
@@ -153,15 +151,16 @@ public final class IndexWriterConfig imp
maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
- indexingChain = DocumentsWriter.defaultIndexingChain;
+ indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
mergedSegmentWarmer = null;
codecProvider = CodecProvider.getDefault();
- mergePolicy = new LogByteSizeMergePolicy();
- maxThreadStates = DEFAULT_MAX_THREAD_STATES;
+ mergePolicy = new TieredMergePolicy();
readerPooling = DEFAULT_READER_POOLING;
+ indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
+ perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
}
-
+
@Override
public Object clone() {
// Shallow clone is the only thing that's possible, since parameters like
@@ -186,7 +185,7 @@ public final class IndexWriterConfig imp
this.openMode = openMode;
return this;
}
-
+
/** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
public OpenMode getOpenMode() {
return openMode;
@@ -261,7 +260,7 @@ public final class IndexWriterConfig imp
public SimilarityProvider getSimilarityProvider() {
return similarityProvider;
}
-
+
/**
* Expert: set the interval between indexed terms. Large values cause less
* memory to be used by IndexReader, but slow random-access to terms. Small
@@ -281,7 +280,7 @@ public final class IndexWriterConfig imp
* In particular, <code>numUniqueTerms/interval</code> terms are read into
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
* must be scanned for each random term access.
- *
+ *
* @see #DEFAULT_TERM_INDEX_INTERVAL
*
* <p>Takes effect immediately, but only applies to newly
@@ -293,7 +292,7 @@ public final class IndexWriterConfig imp
/**
* Returns the interval between indexed terms.
- *
+ *
* @see #setTermIndexInterval(int)
*/
public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
@@ -331,10 +330,10 @@ public final class IndexWriterConfig imp
this.writeLockTimeout = writeLockTimeout;
return this;
}
-
+
/**
* Returns allowed timeout when acquiring the write lock.
- *
+ *
* @see #setWriteLockTimeout(long)
*/
public long getWriteLockTimeout() {
@@ -343,15 +342,16 @@ public final class IndexWriterConfig imp
/**
* Determines the minimal number of delete terms required before the buffered
- * in-memory delete terms are applied and flushed. If there are documents
- * buffered in memory at the time, they are merged and a new segment is
- * created.
-
- * <p>Disabled by default (writer flushes by RAM usage).
+ * in-memory delete terms and queries are applied and flushed.
+ * <p>Disabled by default (writer flushes by RAM usage).</p>
+ * <p>
+ * NOTE: This setting won't trigger a segment flush.
+ * </p>
*
* @throws IllegalArgumentException if maxBufferedDeleteTerms
* is enabled but smaller than 1
* @see #setRAMBufferSizeMB
+ * @see #setFlushPolicy(FlushPolicy)
*
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
@@ -366,9 +366,9 @@ public final class IndexWriterConfig imp
}
/**
- * Returns the number of buffered deleted terms that will trigger a flush if
- * enabled.
- *
+ * Returns the number of buffered deleted terms that will trigger a flush of all
+ * buffered deletes if enabled.
+ *
* @see #setMaxBufferedDeleteTerms(int)
*/
public int getMaxBufferedDeleteTerms() {
@@ -380,45 +380,50 @@ public final class IndexWriterConfig imp
* and deletions before they are flushed to the Directory. Generally for
* faster indexing performance it's best to flush by RAM usage instead of
* document count and use as large a RAM buffer as you can.
- *
* <p>
* When this is set, the writer will flush whenever buffered documents and
* deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
* triggering a flush due to RAM usage. Note that if flushing by document
* count is also enabled, then the flush will be triggered by whichever comes
* first.
- *
+ * <p>
+ * The maximum RAM limit is inherently determined by the JVMs available memory.
+ * Yet, an {@link IndexWriter} session can consume a significantly larger amount
+ * of memory than the given RAM limit since this limit is just an indicator when
+ * to flush memory resident documents to the Directory. Flushes are likely happen
+ * concurrently while other threads adding documents to the writer. For application
+ * stability the available memory in the JVM should be significantly larger than
+ * the RAM buffer used for indexing.
* <p>
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
* approximate. Specifically, if you delete by Query, Lucene currently has no
* way to measure the RAM usage of individual Queries so the accounting will
* under-estimate and you should compensate by either calling commit()
* periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
- * to flush by count instead of RAM usage (each buffered delete Query counts
- * as one).
- *
+ * to flush and apply buffered deletes by count instead of RAM usage
+ * (for each buffered delete Query a constant number of bytes is used to estimate
+ * RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will
+ * not trigger any segment flushes.
+ * <p>
+ * <b>NOTE</b>: It's not guaranteed that all memory resident documents are flushed
+ * once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a
+ * subset of the buffered documents are flushed and therefore only parts of the RAM
+ * buffer is released.
* <p>
- * <b>NOTE</b>: because IndexWriter uses <code>int</code>s when managing its
- * internal storage, the absolute maximum value for this setting is somewhat
- * less than 2048 MB. The precise limit depends on various factors, such as
- * how large your documents are, how many fields have norms, etc., so it's
- * best to set this value comfortably under 2048.
*
- * <p>
* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
- *
+ * @see #setFlushPolicy(FlushPolicy)
+ * @see #setRAMPerThreadHardLimitMB(int)
+ *
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
*
* @throws IllegalArgumentException
* if ramBufferSize is enabled but non-positive, or it disables
* ramBufferSize when maxBufferedDocs is already disabled
+ *
*/
public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
- if (ramBufferSizeMB > 2048.0) {
- throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB
- + " is too large; should be comfortably less than 2048");
- }
if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
throw new IllegalArgumentException(
"ramBufferSize should be > 0.0 MB when enabled");
@@ -438,22 +443,22 @@ public final class IndexWriterConfig imp
* Determines the minimal number of documents required before the buffered
* in-memory documents are flushed as a new Segment. Large values generally
* give faster indexing.
- *
+ *
* <p>
* When this is set, the writer will flush every maxBufferedDocs added
* documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
* flush due to number of buffered documents. Note that if flushing by RAM
* usage is also enabled, then the flush will be triggered by whichever comes
* first.
- *
+ *
* <p>
* Disabled by default (writer flushes by RAM usage).
- *
+ *
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
*
* @see #setRAMBufferSizeMB(double)
- *
+ * @see #setFlushPolicy(FlushPolicy)
* @throws IllegalArgumentException
* if maxBufferedDocs is enabled but smaller than 2, or it disables
* maxBufferedDocs when ramBufferSize is already disabled
@@ -473,7 +478,7 @@ public final class IndexWriterConfig imp
/**
* Returns the number of buffered added documents that will trigger a flush if
* enabled.
- *
+ *
* @see #setMaxBufferedDocs(int)
*/
public int getMaxBufferedDocs() {
@@ -519,32 +524,43 @@ public final class IndexWriterConfig imp
return codecProvider;
}
-
+
/**
* Returns the current MergePolicy in use by this writer.
- *
+ *
* @see #setMergePolicy(MergePolicy)
*/
public MergePolicy getMergePolicy() {
return mergePolicy;
}
- /**
- * Sets the max number of simultaneous threads that may be indexing documents
- * at once in IndexWriter. Values < 1 are invalid and if passed
- * <code>maxThreadStates</code> will be set to
- * {@link #DEFAULT_MAX_THREAD_STATES}.
- *
- * <p>Only takes effect when IndexWriter is first created. */
- public IndexWriterConfig setMaxThreadStates(int maxThreadStates) {
- this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates;
+ /** Expert: Sets the {@link DocumentsWriterPerThreadPool} instance used by the
+ * IndexWriter to assign thread-states to incoming indexing threads. If no
+ * {@link DocumentsWriterPerThreadPool} is set {@link IndexWriter} will use
+ * {@link ThreadAffinityDocumentsWriterThreadPool} with max number of
+ * thread-states set to {@link DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES} (see
+ * {@link DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES}).
+ * </p>
+ * <p>
+ * NOTE: The given {@link DocumentsWriterPerThreadPool} instance must not be used with
+ * other {@link IndexWriter} instances once it has been initialized / associated with an
+ * {@link IndexWriter}.
+ * </p>
+ * <p>
+ * NOTE: This only takes effect when IndexWriter is first created.</p>*/
+ public IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) {
+ if(threadPool == null) {
+ throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul");
+ }
+ this.indexerThreadPool = threadPool;
return this;
}
- /** Returns the max number of simultaneous threads that
- * may be indexing documents at once in IndexWriter. */
- public int getMaxThreadStates() {
- return maxThreadStates;
+ /** Returns the configured {@link DocumentsWriterPerThreadPool} instance.
+ * @see #setIndexerThreadPool(DocumentsWriterPerThreadPool)
+ * @return the configured {@link DocumentsWriterPerThreadPool} instance.*/
+ public DocumentsWriterPerThreadPool getIndexerThreadPool() {
+ return this.indexerThreadPool;
}
/** By default, IndexWriter does not pool the
@@ -572,10 +588,10 @@ public final class IndexWriterConfig imp
*
* <p>Only takes effect when IndexWriter is first created. */
IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
- this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
+ this.indexingChain = indexingChain == null ? DocumentsWriterPerThread.defaultIndexingChain : indexingChain;
return this;
}
-
+
/** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
IndexingChain getIndexingChain() {
return indexingChain;
@@ -604,6 +620,53 @@ public final class IndexWriterConfig imp
return readerTermsIndexDivisor;
}
+ /**
+ * Expert: Controls when segments are flushed to disk during indexing.
+ * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized
+ * the given instance is bound to this {@link IndexWriter} and should not be used with another writer.
+ * @see #setMaxBufferedDeleteTerms(int)
+ * @see #setMaxBufferedDocs(int)
+ * @see #setRAMBufferSizeMB(double)
+ */
+ public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) {
+ this.flushPolicy = flushPolicy;
+ return this;
+ }
+
+ /**
+ * Expert: Sets the maximum memory consumption per thread triggering a forced
+ * flush if exceeded. A {@link DocumentsWriterPerThread} is forcefully flushed
+ * once it exceeds this limit even if the {@link #getRAMBufferSizeMB()} has
+ * not been exceeded. This is a safety limit to prevent a
+ * {@link DocumentsWriterPerThread} from address space exhaustion due to its
+ * internal 32 bit signed integer based memory addressing.
+ * The given value must be less that 2GB (2048MB)
+ *
+ * @see #DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB
+ */
+ public IndexWriterConfig setRAMPerThreadHardLimitMB(int perThreadHardLimitMB) {
+ if (perThreadHardLimitMB <= 0 || perThreadHardLimitMB >= 2048) {
+ throw new IllegalArgumentException("PerThreadHardLimit must be greater than 0 and less than 2048MB");
+ }
+ this.perThreadHardLimitMB = perThreadHardLimitMB;
+ return this;
+ }
+
+ /**
+ * Returns the max amount of memory each {@link DocumentsWriterPerThread} can
+ * consume until forcefully flushed.
+ * @see #setRAMPerThreadHardLimitMB(int)
+ */
+ public int getRAMPerThreadHardLimitMB() {
+ return perThreadHardLimitMB;
+ }
+ /**
+ * @see #setFlushPolicy(FlushPolicy)
+ */
+ public FlushPolicy getFlushPolicy() {
+ return flushPolicy;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -623,9 +686,13 @@ public final class IndexWriterConfig imp
sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n");
sb.append("codecProvider=").append(codecProvider).append("\n");
sb.append("mergePolicy=").append(mergePolicy).append("\n");
- sb.append("maxThreadStates=").append(maxThreadStates).append("\n");
+ sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n");
sb.append("readerPooling=").append(readerPooling).append("\n");
sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n");
+ sb.append("flushPolicy=").append(flushPolicy).append("\n");
+ sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n");
+
return sb.toString();
}
+
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IntBlockPool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IntBlockPool.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IntBlockPool.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IntBlockPool.java Mon May 2 13:50:57 2011
@@ -1,5 +1,7 @@
package org.apache.lucene.index;
+import java.util.Arrays;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -22,24 +24,24 @@ final class IntBlockPool {
public int[][] buffers = new int[10][];
int bufferUpto = -1; // Which buffer we are upto
- public int intUpto = DocumentsWriter.INT_BLOCK_SIZE; // Where we are in head buffer
+ public int intUpto = DocumentsWriterPerThread.INT_BLOCK_SIZE; // Where we are in head buffer
public int[] buffer; // Current head buffer
- public int intOffset = -DocumentsWriter.INT_BLOCK_SIZE; // Current head offset
+ public int intOffset = -DocumentsWriterPerThread.INT_BLOCK_SIZE; // Current head offset
- final private DocumentsWriter docWriter;
+ final private DocumentsWriterPerThread docWriter;
- public IntBlockPool(DocumentsWriter docWriter) {
+ public IntBlockPool(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
}
public void reset() {
if (bufferUpto != -1) {
- if (bufferUpto > 0)
- // Recycle all but the first buffer
- docWriter.recycleIntBlocks(buffers, 1, 1+bufferUpto);
-
// Reuse first buffer
+ if (bufferUpto > 0) {
+ docWriter.recycleIntBlocks(buffers, 1, bufferUpto-1);
+ Arrays.fill(buffers, 1, bufferUpto, null);
+ }
bufferUpto = 0;
intUpto = 0;
intOffset = 0;
@@ -57,7 +59,7 @@ final class IntBlockPool {
bufferUpto++;
intUpto = 0;
- intOffset += DocumentsWriter.INT_BLOCK_SIZE;
+ intOffset += DocumentsWriterPerThread.INT_BLOCK_SIZE;
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java Mon May 2 13:50:57 2011
@@ -17,20 +17,22 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.util.Collection;
-import java.util.Map;
import java.io.IOException;
+import java.util.Map;
abstract class InvertedDocConsumer {
- /** Add a new thread */
- abstract InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
-
/** Abort (called after hitting AbortException) */
abstract void abort();
/** Flush a new segment */
- abstract void flush(Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, InvertedDocConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
+
+ abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+
+ abstract void startDocument() throws IOException;
+
+ abstract void finishDocument() throws IOException;
/** Attempt to free RAM, returning true if any RAM was
* freed */
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java Mon May 2 13:50:57 2011
@@ -17,12 +17,13 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.util.Collection;
-import java.util.Map;
import java.io.IOException;
+import java.util.Map;
abstract class InvertedDocEndConsumer {
- abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
- abstract void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
abstract void abort();
+ abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+ abstract void startDocument() throws IOException;
+ abstract void finishDocument() throws IOException;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon May 2 13:50:57 2011
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
@@ -72,7 +71,6 @@ public abstract class LogMergePolicy ext
// out there wrote his own LMP ...
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
- protected boolean requireContiguousMerge = false;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@@ -111,21 +109,6 @@ public abstract class LogMergePolicy ext
writer.get().message("LMP: " + message);
}
- /** If true, merges must be in-order slice of the
- * segments. If false, then the merge policy is free to
- * pick any segments. The default is false, which is
- * in general more efficient than true since it gives the
- * merge policy more freedom to pick closely sized
- * segments. */
- public void setRequireContiguousMerge(boolean v) {
- requireContiguousMerge = v;
- }
-
- /** See {@link #setRequireContiguousMerge}. */
- public boolean getRequireContiguousMerge() {
- return requireContiguousMerge;
- }
-
/** <p>Returns the number of segments that are merged at
* once and also controls the total number of segments
* allowed to accumulate in the index.</p> */
@@ -378,8 +361,6 @@ public abstract class LogMergePolicy ext
return null;
}
- // TODO: handle non-contiguous merge case differently?
-
// Find the newest (rightmost) segment that needs to
// be optimized (other segments may have been flushed
// since optimize started):
@@ -499,14 +480,6 @@ public abstract class LogMergePolicy ext
}
}
- private static class SortByIndex implements Comparator<SegmentInfoAndLevel> {
- public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
- return o1.index - o2.index;
- }
- }
-
- private static final SortByIndex sortByIndex = new SortByIndex();
-
/** Checks if any merges are now necessary and returns a
* {@link MergePolicy.MergeSpecification} if so. A merge
* is necessary when there are more than {@link
@@ -532,31 +505,24 @@ public abstract class LogMergePolicy ext
final SegmentInfo info = infos.info(i);
long size = size(info);
- // When we require contiguous merge, we still add the
- // segment to levels to avoid merging "across" a set
- // of segment being merged:
- if (!requireContiguousMerge && mergingSegments.contains(info)) {
- if (verbose()) {
- message("seg " + info.name + " already being merged; skip");
- }
- continue;
- }
-
// Floor tiny segments
if (size < 1) {
size = 1;
}
+
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
levels.add(infoLevel);
+
if (verbose()) {
- message("seg " + info.name + " level=" + infoLevel.level + " size=" + size);
+ final long segBytes = sizeBytes(info);
+ String extra = mergingSegments.contains(info) ? " [merging]" : "";
+ if (size >= maxMergeSize) {
+ extra += " [skip: too large]";
+ }
+ message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
}
}
- if (!requireContiguousMerge) {
- Collections.sort(levels);
- }
-
final float levelFloor;
if (minMergeSize <= 0)
levelFloor = (float) 0.0;
@@ -614,23 +580,29 @@ public abstract class LogMergePolicy ext
int end = start + mergeFactor;
while(end <= 1+upto) {
boolean anyTooLarge = false;
+ boolean anyMerging = false;
for(int i=start;i<end;i++) {
final SegmentInfo info = levels.get(i).info;
anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
+ if (mergingSegments.contains(info)) {
+ anyMerging = true;
+ break;
+ }
}
- if (!anyTooLarge) {
+ if (anyMerging) {
+ // skip
+ } else if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- if (verbose()) {
- message(" " + start + " to " + end + ": add this merge");
- }
- Collections.sort(levels.subList(start, end), sortByIndex);
final SegmentInfos mergeInfos = new SegmentInfos();
for(int i=start;i<end;i++) {
mergeInfos.add(levels.get(i).info);
assert infos.contains(levels.get(i).info);
}
+ if (verbose()) {
+ message(" add merge=" + writer.get().segString(mergeInfos) + " start=" + start + " end=" + end);
+ }
spec.add(new OneMerge(mergeInfos));
} else if (verbose()) {
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
@@ -682,7 +654,7 @@ public abstract class LogMergePolicy ext
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
- sb.append("requireContiguousMerge=").append(requireContiguousMerge);
+ sb.append("noCFSRatio=").append(noCFSRatio);
sb.append("]");
return sb.toString();
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon May 2 13:50:57 2011
@@ -72,6 +72,7 @@ public abstract class MergePolicy implem
long mergeGen; // used by IndexWriter
boolean isExternal; // used by IndexWriter
int maxNumSegmentsOptimize; // used by IndexWriter
+ long estimatedMergeBytes; // used by IndexWriter
List<SegmentReader> readers; // used by IndexWriter
List<SegmentReader> readerClones; // used by IndexWriter
public final SegmentInfos segments;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFields.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFields.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFields.java Mon May 2 13:50:57 2011
@@ -51,7 +51,6 @@ public final class MultiFields extends F
private final Fields[] subs;
private final ReaderUtil.Slice[] subSlices;
private final Map<String,Terms> terms = new ConcurrentHashMap<String,Terms>();
- private final Map<String,DocValues> docValues = new ConcurrentHashMap<String,DocValues>();
/** Returns a single {@link Fields} instance for this
* reader, merging fields/terms/docs/positions on the
@@ -193,12 +192,6 @@ public final class MultiFields extends F
}
}
- /** This method may return null if the field does not exist.*/
- public static DocValues getDocValues(IndexReader r, String field) throws IOException {
- final Fields fields = getFields(r);
- return fields == null? null: fields.docValues(field);
- }
-
/** Returns {@link DocsEnum} for the specified field &
* term. This may return null if the term does not
* exist. */
@@ -283,41 +276,5 @@ public final class MultiFields extends F
return result;
}
- @Override
- public DocValues docValues(String field) throws IOException {
- DocValues result = docValues.get(field);
- if (result == null) {
- // Lazy init: first time this field is requested, we
- // create & add to docValues:
- final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
- int docsUpto = 0;
- Type type = null;
- // Gather all sub-readers that share this field
- for(int i=0;i<subs.length;i++) {
- DocValues values = subs[i].docValues(field);
- final int start = subSlices[i].start;
- final int length = subSlices[i].length;
- if (values != null) {
- if (docsUpto != start) {
- type = values.type();
- docValuesIndex.add(new MultiDocValues.DocValuesIndex(new MultiDocValues.DummyDocValues(start, type), docsUpto, start - docsUpto));
- }
- docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, length));
- docsUpto = start+length;
-
- } else if (i+1 == subs.length && !docValuesIndex.isEmpty()) {
- docValuesIndex.add(new MultiDocValues.DocValuesIndex(
- new MultiDocValues.DummyDocValues(start, type), docsUpto, start
- - docsUpto));
- }
- }
- if (docValuesIndex.isEmpty()) {
- return null;
- }
- result = new MultiDocValues(docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY));
- docValues.put(field, result);
- }
- return result;
- }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java Mon May 2 13:50:57 2011
@@ -17,12 +17,9 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.MultiDocValues;
-import org.apache.lucene.index.values.Type;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.ReaderUtil;
-import org.apache.lucene.util.ReaderUtil.Slice;
import java.io.IOException;
import java.util.List;
@@ -153,37 +150,37 @@ public final class MultiFieldsEnum exte
}
}
- @Override
- public DocValues docValues() throws IOException {
- final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
- int docsUpto = 0;
- Type type = null;
- final int numEnums = enumWithSlices.length;
- for (int i = 0; i < numEnums; i++) {
- FieldsEnumWithSlice withSlice = enumWithSlices[i];
- Slice slice = withSlice.slice;
- final DocValues values = withSlice.fields.docValues();
- final int start = slice.start;
- final int length = slice.length;
- if (values != null && currentField.equals(withSlice.current)) {
- if (docsUpto != start) {
- type = values.type();
- docValuesIndex.add(new MultiDocValues.DocValuesIndex(
- new MultiDocValues.DummyDocValues(start, type), docsUpto, start
- - docsUpto));
- }
- docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
- length));
- docsUpto = start + length;
-
- } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) {
- docValuesIndex.add(new MultiDocValues.DocValuesIndex(
- new MultiDocValues.DummyDocValues(start, type), docsUpto, start
- - docsUpto));
- }
- }
- return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex
- .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY));
- }
+// @Override
+// public DocValues docValues() throws IOException {
+// final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
+// int docsUpto = 0;
+// Type type = null;
+// final int numEnums = enumWithSlices.length;
+// for (int i = 0; i < numEnums; i++) {
+// FieldsEnumWithSlice withSlice = enumWithSlices[i];
+// Slice slice = withSlice.slice;
+// final DocValues values = withSlice.fields.docValues();
+// final int start = slice.start;
+// final int length = slice.length;
+// if (values != null && currentField.equals(withSlice.current)) {
+// if (docsUpto != start) {
+// type = values.type();
+// docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+// new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+// - docsUpto));
+// }
+// docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
+// length));
+// docsUpto = start + length;
+//
+// } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) {
+// docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+// new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+// - docsUpto));
+// }
+// }
+// return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex
+// .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY));
+// }
}
Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java?rev=1098566&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java Mon May 2 13:50:57 2011
@@ -0,0 +1,148 @@
+package org.apache.lucene.index;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeSet;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.lucene.index.codecs.PerDocValues;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.MultiDocValues;
+import org.apache.lucene.index.values.Type;
+import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex;
+import org.apache.lucene.util.ReaderUtil;
+
+/**
+ *
+ * nocommit - javadoc
+ * @experimental
+ */
+public class MultiPerDocValues extends PerDocValues {
+ private final PerDocValues[] subs;
+ private final ReaderUtil.Slice[] subSlices;
+ private final Map<String, DocValues> docValues = new ConcurrentHashMap<String, DocValues>();
+ private final TreeSet<String> fields;
+
+ public MultiPerDocValues(PerDocValues[] subs, ReaderUtil.Slice[] subSlices) {
+ this.subs = subs;
+ this.subSlices = subSlices;
+ fields = new TreeSet<String>();
+ for (PerDocValues sub : subs) {
+ fields.addAll(sub.fields());
+ }
+ }
+
+ public static PerDocValues getPerDocs(IndexReader r) throws IOException {
+ final IndexReader[] subs = r.getSequentialSubReaders();
+ if (subs == null) {
+ // already an atomic reader
+ return r.perDocValues();
+ } else if (subs.length == 0) {
+ // no fields
+ return null;
+ } else if (subs.length == 1) {
+ return getPerDocs(subs[0]);
+ }
+ PerDocValues perDocValues = r.retrievePerDoc();
+ if (perDocValues == null) {
+
+ final List<PerDocValues> producer = new ArrayList<PerDocValues>();
+ final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
+
+ new ReaderUtil.Gather(r) {
+ @Override
+ protected void add(int base, IndexReader r) throws IOException {
+ final PerDocValues f = r.perDocValues();
+ if (f != null) {
+ producer.add(f);
+ slices
+ .add(new ReaderUtil.Slice(base, r.maxDoc(), producer.size() - 1));
+ }
+ }
+ }.run();
+
+ if (producer.size() == 0) {
+ return null;
+ } else if (producer.size() == 1) {
+ perDocValues = producer.get(0);
+ } else {
+ perDocValues = new MultiPerDocValues(
+ producer.toArray(PerDocValues.EMPTY_ARRAY),
+ slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
+ }
+ r.storePerDoc(perDocValues);
+ }
+ return perDocValues;
+ }
+
+ public DocValues docValues(String field) throws IOException {
+ DocValues result = docValues.get(field);
+ if (result == null) {
+ // Lazy init: first time this field is requested, we
+ // create & add to docValues:
+ final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
+ int docsUpto = 0;
+ Type type = null;
+ // Gather all sub-readers that share this field
+ for (int i = 0; i < subs.length; i++) {
+ DocValues values = subs[i].docValues(field);
+ final int start = subSlices[i].start;
+ final int length = subSlices[i].length;
+ if (values != null) {
+ if (docsUpto != start) {
+ type = values.type();
+ docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+ new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+ - docsUpto));
+ }
+ docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
+ length));
+ docsUpto = start + length;
+
+ } else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) {
+ docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+ new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+ - docsUpto));
+ }
+ }
+ if (docValuesIndex.isEmpty()) {
+ return null;
+ }
+ result = new MultiDocValues(
+ docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY));
+ docValues.put(field, result);
+ }
+ return result;
+ }
+
+ @Override
+ public void close() throws IOException {
+ PerDocValues[] perDocValues = this.subs;
+ for (PerDocValues values : perDocValues) {
+ values.close();
+ }
+ }
+
+ @Override
+ public Collection<String> fields() {
+ return fields;
+ }
+}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java Mon May 2 13:50:57 2011
@@ -24,6 +24,7 @@ import java.util.concurrent.ConcurrentHa
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
@@ -403,4 +404,9 @@ public class MultiReader extends IndexRe
sub.removeReaderFinishedListener(listener);
}
}
+
+ @Override
+ public PerDocValues perDocValues() throws IOException {
+ throw new UnsupportedOperationException("please use MultiPerDoc#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java Mon May 2 13:50:57 2011
@@ -19,11 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
-import java.util.Iterator;
-import java.util.HashMap;
import java.util.Map;
-import java.util.List;
-import java.util.ArrayList;
import org.apache.lucene.store.IndexOutput;
@@ -36,10 +32,6 @@ import org.apache.lucene.store.IndexOutp
final class NormsWriter extends InvertedDocEndConsumer {
- @Override
- public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
- return new NormsWriterPerThread(docInverterPerThread, this);
- }
@Override
public void abort() {}
@@ -50,40 +42,11 @@ final class NormsWriter extends Inverted
/** Produce _X.nrm if any document had a field with norms
* not disabled */
@Override
- public void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
-
- final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
-
+ public void flush(Map<FieldInfo,InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
if (!state.fieldInfos.hasNorms()) {
return;
}
- // Typically, each thread will have encountered the same
- // field. So first we collate by field, ie, all
- // per-thread field instances that correspond to the
- // same FieldInfo
- for (final Map.Entry<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> entry : threadsAndFields.entrySet()) {
- final Collection<InvertedDocEndConsumerPerField> fields = entry.getValue();
- final Iterator<InvertedDocEndConsumerPerField> fieldsIt = fields.iterator();
-
- while (fieldsIt.hasNext()) {
- final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next();
-
- if (perField.upto > 0) {
- // It has some norms
- List<NormsWriterPerField> l = byField.get(perField.fieldInfo);
- if (l == null) {
- l = new ArrayList<NormsWriterPerField>();
- byField.put(perField.fieldInfo, l);
- }
- l.add(perField);
- } else
- // Remove this field since we haven't seen it
- // since the previous flush
- fieldsIt.remove();
- }
- }
-
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
IndexOutput normsOut = state.directory.createOutput(normsFileName);
@@ -93,60 +56,25 @@ final class NormsWriter extends Inverted
int normCount = 0;
for (FieldInfo fi : state.fieldInfos) {
- final List<NormsWriterPerField> toMerge = byField.get(fi);
+ final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi);
int upto = 0;
- if (toMerge != null) {
-
- final int numFields = toMerge.size();
-
+ if (toWrite != null && toWrite.upto > 0) {
normCount++;
- final NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
- int[] uptos = new int[numFields];
-
- for(int j=0;j<numFields;j++)
- fields[j] = toMerge.get(j);
-
- int numLeft = numFields;
-
- while(numLeft > 0) {
-
- assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length);
-
- int minLoc = 0;
- int minDocID = fields[0].docIDs[uptos[0]];
-
- for(int j=1;j<numLeft;j++) {
- final int docID = fields[j].docIDs[uptos[j]];
- if (docID < minDocID) {
- minDocID = docID;
- minLoc = j;
- }
- }
-
- assert minDocID < state.numDocs;
-
- // Fill hole
- for(;upto<minDocID;upto++)
+ int docID = 0;
+ for (; docID < state.numDocs; docID++) {
+ if (upto < toWrite.upto && toWrite.docIDs[upto] == docID) {
+ normsOut.writeByte(toWrite.norms[upto]);
+ upto++;
+ } else {
normsOut.writeByte((byte) 0);
-
- normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
- (uptos[minLoc])++;
- upto++;
-
- if (uptos[minLoc] == fields[minLoc].upto) {
- fields[minLoc].reset();
- if (minLoc != numLeft-1) {
- fields[minLoc] = fields[numLeft-1];
- uptos[minLoc] = uptos[numLeft-1];
- }
- numLeft--;
}
}
-
- // Fill final hole with defaultNorm
- for(;upto<state.numDocs;upto++)
- normsOut.writeByte((byte) 0);
+
+ // we should have consumed every norm
+ assert upto == toWrite.upto;
+
+ toWrite.reset();
} else if (fi.isIndexed && !fi.omitNorms) {
normCount++;
// Fill entire field with default norm:
@@ -161,4 +89,16 @@ final class NormsWriter extends Inverted
normsOut.close();
}
}
+
+ @Override
+ void finishDocument() throws IOException {}
+
+ @Override
+ void startDocument() throws IOException {}
+
+ @Override
+ InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField,
+ FieldInfo fieldInfo) {
+ return new NormsWriterPerField(docInverterPerField, fieldInfo);
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Mon May 2 13:50:57 2011
@@ -27,9 +27,8 @@ import org.apache.lucene.util.ArrayUtil;
final class NormsWriterPerField extends InvertedDocEndConsumerPerField implements Comparable<NormsWriterPerField> {
- final NormsWriterPerThread perThread;
final FieldInfo fieldInfo;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final Similarity similarity;
// Holds all docID/norm pairs we've seen
@@ -46,10 +45,9 @@ final class NormsWriterPerField extends
upto = 0;
}
- public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) {
- this.perThread = perThread;
+ public NormsWriterPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
- docState = perThread.docState;
+ docState = docInverterPerField.docState;
fieldState = docInverterPerField.fieldState;
similarity = docState.similarityProvider.get(fieldInfo.name);
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ParallelReader.java Mon May 2 13:50:57 2011
@@ -21,9 +21,8 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.Pair;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MapBackedSet;
@@ -183,21 +182,15 @@ public class ParallelReader extends Inde
}
}
- @Override
- public DocValues docValues() throws IOException {
- assert currentReader != null;
- return MultiFields.getDocValues(currentReader, currentField);
- }
}
// Single instance of this, per ParallelReader instance
private class ParallelFields extends Fields {
- final HashMap<String,Pair<Terms, DocValues>> fields = new HashMap<String,Pair<Terms, DocValues>>();
+ final HashMap<String,Terms> fields = new HashMap<String,Terms>();
public void addField(String field, IndexReader r) throws IOException {
Fields multiFields = MultiFields.getFields(r);
- fields.put(field, new Pair<Terms, DocValues>( multiFields.terms(field),
- multiFields.docValues(field)));
+ fields.put(field, multiFields.terms(field));
}
@Override
@@ -206,12 +199,7 @@ public class ParallelReader extends Inde
}
@Override
public Terms terms(String field) throws IOException {
- return fields.get(field).cur;
- }
-
- @Override
- public DocValues docValues(String field) throws IOException {
- return fields.get(field).cud;
+ return fields.get(field);
}
}
@@ -578,6 +566,12 @@ public class ParallelReader extends Inde
reader.removeReaderFinishedListener(listener);
}
}
+
+ @Override
+ public PerDocValues perDocValues() throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java?rev=1098566&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java Mon May 2 13:50:57 2011
@@ -0,0 +1,77 @@
+package org.apache.lucene.index;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.PrintStream;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * nocommit - javadoc
+ * @lucene.experimental
+ */
+public class PerDocWriteState {
+ public final PrintStream infoStream;
+ public final Directory directory;
+ public final String segmentName;
+ public final FieldInfos fieldInfos;
+ public final AtomicLong bytesUsed;
+ public final SegmentCodecs segmentCodecs;
+ public final int codecId;
+
+ /** Expert: The fraction of terms in the "dictionary" which should be stored
+ * in RAM. Smaller values use more memory, but make searching slightly
+ * faster, while larger values use less memory and make searching slightly
+ * slower. Searching is typically not dominated by dictionary lookup, so
+ * tweaking this is rarely useful.*/
+ public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
+
+ public PerDocWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, AtomicLong bytesUsed, int codecId) {
+ this.infoStream = infoStream;
+ this.directory = directory;
+ this.segmentName = segmentName;
+ this.fieldInfos = fieldInfos;
+ this.segmentCodecs = fieldInfos.buildSegmentCodecs(false);
+ this.codecId = codecId;
+ this.bytesUsed = bytesUsed;
+ }
+
+ public PerDocWriteState(SegmentWriteState state) {
+ infoStream = state.infoStream;
+ directory = state.directory;
+ segmentCodecs = state.segmentCodecs;
+ segmentName = state.segmentName;
+ fieldInfos = state.fieldInfos;
+ codecId = state.codecId;
+ bytesUsed = new AtomicLong(0);
+ }
+
+ public PerDocWriteState(PerDocWriteState state, int codecId) {
+ this.infoStream = state.infoStream;
+ this.directory = state.directory;
+ this.segmentName = state.segmentName;
+ this.fieldInfos = state.fieldInfos;
+ this.segmentCodecs = state.segmentCodecs;
+ this.codecId = codecId;
+ this.bytesUsed = state.bytesUsed;
+ }
+
+
+ public String codecIdAsString() {
+ return "" + codecId;
+ }
+}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Mon May 2 13:50:57 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@@ -28,6 +29,8 @@ import java.util.TreeSet;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.PerDocConsumer;
+import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.index.values.DocValues;
@@ -75,12 +78,6 @@ final class PerFieldCodecWrapper extends
}
@Override
- public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
- final FieldsConsumer fields = consumers.get(field.getCodecId());
- return fields.addValuesField(field);
- }
-
- @Override
public void close() throws IOException {
Iterator<FieldsConsumer> it = consumers.iterator();
IOException err = null;
@@ -113,7 +110,7 @@ final class PerFieldCodecWrapper extends
boolean success = false;
try {
for (FieldInfo fi : fieldInfos) {
- if (fi.isIndexed || fi.hasDocValues()) { // TODO this does not work for non-indexed fields
+ if (fi.isIndexed) {
fields.add(fi.name);
assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
Codec codec = segmentCodecs.codecs[fi.getCodecId()];
@@ -171,11 +168,6 @@ final class PerFieldCodecWrapper extends
return TermsEnum.EMPTY;
}
}
-
- @Override
- public DocValues docValues() throws IOException {
- return codecs.get(current).docValues(current);
- }
}
@Override
@@ -190,12 +182,6 @@ final class PerFieldCodecWrapper extends
}
@Override
- public DocValues docValues(String field) throws IOException {
- FieldsProducer fieldsProducer = codecs.get(field);
- return fieldsProducer == null? null: fieldsProducer.docValues(field);
- }
-
- @Override
public void close() throws IOException {
Iterator<FieldsProducer> it = codecs.values().iterator();
IOException err = null;
@@ -244,4 +230,133 @@ final class PerFieldCodecWrapper extends
codec.getExtensions(extensions);
}
}
+
+ @Override
+ public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
+ return new PerDocConsumers(state);
+ }
+
+ @Override
+ public PerDocValues docsProducer(SegmentReadState state) throws IOException {
+ return new PerDocProducers(state.dir, state.fieldInfos, state.segmentInfo,
+ state.readBufferSize, state.termsIndexDivisor);
+ }
+
+ private final class PerDocProducers extends PerDocValues {
+ private final Set<String> fields = new TreeSet<String>();
+ private final Map<String, PerDocValues> codecs = new HashMap<String, PerDocValues>();
+
+ public PerDocProducers(Directory dir, FieldInfos fieldInfos, SegmentInfo si,
+ int readBufferSize, int indexDivisor) throws IOException {
+ final Map<Codec, PerDocValues> producers = new HashMap<Codec, PerDocValues>();
+ boolean success = false;
+ try {
+ for (FieldInfo fi : fieldInfos) {
+ if (fi.hasDocValues()) {
+ fields.add(fi.name);
+ assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
+ Codec codec = segmentCodecs.codecs[fi.getCodecId()];
+ if (!producers.containsKey(codec)) {
+ producers.put(codec, codec.docsProducer(new SegmentReadState(dir,
+ si, fieldInfos, readBufferSize, indexDivisor, fi.getCodecId())));
+ }
+ codecs.put(fi.name, producers.get(codec));
+ }
+ }
+ success = true;
+ } finally {
+ if (!success) {
+ // If we hit exception (eg, IOE because writer was
+ // committing, or, for any other reason) we must
+ // go back and close all FieldsProducers we opened:
+ for(PerDocValues producer : producers.values()) {
+ try {
+ producer.close();
+ } catch (Throwable t) {
+ // Suppress all exceptions here so we continue
+ // to throw the original one
+ }
+ }
+ }
+ }
+ }
+ @Override
+ public Collection<String> fields() {
+ return fields;
+ }
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ final PerDocValues perDocProducer = codecs.get(field);
+ if (perDocProducer == null) {
+ return null;
+ }
+ return perDocProducer.docValues(field);
+ }
+
+ @Override
+ public void close() throws IOException {
+ final Iterator<PerDocValues> it = codecs.values().iterator();
+ IOException err = null;
+ while (it.hasNext()) {
+ try {
+ it.next().close();
+ } catch (IOException ioe) {
+ // keep first IOException we hit but keep
+ // closing the rest
+ if (err == null) {
+ err = ioe;
+ }
+ }
+ }
+ if (err != null) {
+ throw err;
+ }
+ }
+ }
+
+ private final class PerDocConsumers extends PerDocConsumer {
+ private final ArrayList<PerDocConsumer> consumers = new ArrayList<PerDocConsumer>();
+
+ public PerDocConsumers(PerDocWriteState state) throws IOException {
+ assert segmentCodecs == state.segmentCodecs;
+ final Codec[] codecs = segmentCodecs.codecs;
+ for (int i = 0; i < codecs.length; i++) {
+ consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i)));
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ Iterator<PerDocConsumer> it = consumers.iterator();
+ IOException err = null;
+ while (it.hasNext()) {
+ try {
+ PerDocConsumer next = it.next();
+ if (next != null) {
+ next.close();
+ }
+ } catch (IOException ioe) {
+ // keep first IOException we hit but keep
+ // closing the rest
+ if (err == null) {
+ err = ioe;
+ }
+ }
+ }
+ if (err != null) {
+ throw err;
+ }
+ }
+
+ @Override
+ public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
+ assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
+ final PerDocConsumer perDoc = consumers.get(field.getCodecId());
+ if (perDoc == null) {
+ return null;
+ }
+ return perDoc.addValuesField(field);
+ }
+
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Mon May 2 13:50:57 2011
@@ -39,14 +39,14 @@ import org.apache.lucene.util.Constants;
/**
* Information about a segment such as it's name, directory, and files related
* to the segment.
- *
+ *
* @lucene.experimental
*/
public final class SegmentInfo {
static final int NO = -1; // e.g. no norms; no deletes;
static final int YES = 1; // e.g. have norms; have deletes;
- static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
+ static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
public String name; // unique name in dir
public int docCount; // number of docs in seg
@@ -58,7 +58,7 @@ public final class SegmentInfo {
* - YES or higher if there are deletes at generation N
*/
private long delGen;
-
+
/*
* Current generation of each field's norm file. If this array is null,
* means no separate norms. If this array is not null, its values mean:
@@ -67,7 +67,7 @@ public final class SegmentInfo {
*/
private Map<Integer,Long> normGen;
- private boolean isCompoundFile;
+ private boolean isCompoundFile;
private volatile List<String> files; // cached list of files that this segment uses
// in the Directory
@@ -75,10 +75,13 @@ public final class SegmentInfo {
private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand)
private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand)
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private int docStoreOffset; // if this segment shares stored fields & vectors, this
// offset is where in that file this segment's docs begin
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private String docStoreSegment; // name used to derive fields/vectors file we share with
// other segments
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
private int delCount; // How many deleted docs in this segment
@@ -93,9 +96,9 @@ public final class SegmentInfo {
private Map<String,String> diagnostics;
- // Tracks the Lucene version this segment was created with, since 3.1. Null
+ // Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
- // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
+ // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
// specific versions afterwards ("3.0", "3.1" etc.).
// see Constants.LUCENE_MAIN_VERSION.
private String version;
@@ -103,7 +106,7 @@ public final class SegmentInfo {
// NOTE: only used in-RAM by IW to track buffered deletes;
// this is never written to/read from the Directory
private long bufferedDeletesGen;
-
+
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
this.name = name;
@@ -184,11 +187,13 @@ public final class SegmentInfo {
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
+
if (format > DefaultSegmentInfosWriter.FORMAT_4_0) {
// pre-4.0 indexes write a byte if there is a single norms file
byte b = input.readByte();
assert 1 == b;
}
+
int numNormGen = input.readInt();
if (numNormGen == NO) {
normGen = null;
@@ -209,7 +214,7 @@ public final class SegmentInfo {
assert delCount <= docCount;
hasProx = input.readByte() == YES;
-
+
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
segmentCodecs = new SegmentCodecs(codecs, input);
@@ -219,7 +224,7 @@ public final class SegmentInfo {
segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")});
}
diagnostics = input.readStringStringMap();
-
+
if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
hasVectors = input.readByte() == 1;
} else {
@@ -368,7 +373,7 @@ public final class SegmentInfo {
// against this segment
return null;
} else {
- return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
+ return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
}
}
@@ -434,7 +439,7 @@ public final class SegmentInfo {
if (hasSeparateNorms(number)) {
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
} else {
- // single file for all norms
+ // single file for all norms
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
}
}
@@ -467,39 +472,74 @@ public final class SegmentInfo {
assert delCount <= docCount;
}
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
public int getDocStoreOffset() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
return docStoreOffset;
}
-
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
public boolean getDocStoreIsCompoundFile() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
return docStoreIsCompoundFile;
}
-
- void setDocStoreIsCompoundFile(boolean v) {
- docStoreIsCompoundFile = v;
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
+ public void setDocStoreIsCompoundFile(boolean docStoreIsCompoundFile) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ this.docStoreIsCompoundFile = docStoreIsCompoundFile;
clearFilesCache();
}
-
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
+ void setDocStore(int offset, String segment, boolean isCompoundFile) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ docStoreOffset = offset;
+ docStoreSegment = segment;
+ docStoreIsCompoundFile = isCompoundFile;
+ clearFilesCache();
+ }
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
public String getDocStoreSegment() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
return docStoreSegment;
}
-
- public void setDocStoreSegment(String segment) {
- docStoreSegment = segment;
- }
-
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
void setDocStoreOffset(int offset) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
docStoreOffset = offset;
clearFilesCache();
}
- void setDocStore(int offset, String segment, boolean isCompoundFile) {
- docStoreOffset = offset;
- docStoreSegment = segment;
- docStoreIsCompoundFile = isCompoundFile;
- clearFilesCache();
+ /**
+ * @deprecated shared doc stores are not supported in 4.0
+ */
+ @Deprecated
+ public void setDocStoreSegment(String docStoreSegment) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ this.docStoreSegment = docStoreSegment;
}
-
+
/** Save this segment's info. */
public void write(IndexOutput output)
throws IOException {
@@ -509,12 +549,14 @@ public final class SegmentInfo {
output.writeString(name);
output.writeInt(docCount);
output.writeLong(delGen);
+
output.writeInt(docStoreOffset);
if (docStoreOffset != -1) {
output.writeString(docStoreSegment);
output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
}
+
if (normGen == null) {
output.writeInt(NO);
} else {
@@ -524,7 +566,7 @@ public final class SegmentInfo {
output.writeLong(entry.getValue());
}
}
-
+
output.writeByte((byte) (isCompoundFile ? YES : NO));
output.writeInt(delCount);
output.writeByte((byte) (hasProx ? 1:0));
@@ -572,9 +614,9 @@ public final class SegmentInfo {
// Already cached:
return files;
}
-
+
Set<String> fileSet = new HashSet<String>();
-
+
boolean useCompoundFile = getUseCompoundFile();
if (useCompoundFile) {
@@ -608,7 +650,7 @@ public final class SegmentInfo {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
- }
+ }
}
String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
@@ -646,7 +688,7 @@ public final class SegmentInfo {
}
/** Used for debugging. Format may suddenly change.
- *
+ *
* <p>Current format looks like
* <code>_a(3.1):c45/4->_1</code>, which means the segment's
* name is <code>_a</code>; it was created with Lucene 3.1 (or
@@ -661,7 +703,6 @@ public final class SegmentInfo {
StringBuilder s = new StringBuilder();
s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
-
char cfs = getUseCompoundFile() ? 'c' : 'C';
s.append(cfs);
@@ -677,7 +718,7 @@ public final class SegmentInfo {
if (delCount != 0) {
s.append('/').append(delCount);
}
-
+
if (docStoreOffset != -1) {
s.append("->").append(docStoreSegment);
if (docStoreIsCompoundFile) {
@@ -717,13 +758,13 @@ public final class SegmentInfo {
* <b>NOTE:</b> this method is used for internal purposes only - you should
* not modify the version of a SegmentInfo, or it may result in unexpected
* exceptions thrown when you attempt to open the index.
- *
+ *
* @lucene.internal
*/
public void setVersion(String version) {
this.version = version;
}
-
+
/** Returns the version of the code which wrote the segment. */
public String getVersion() {
return version;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Mon May 2 13:50:57 2011
@@ -22,7 +22,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
-import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader.FieldOption;
@@ -31,6 +30,8 @@ import org.apache.lucene.index.codecs.Co
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.MergeState;
+import org.apache.lucene.index.codecs.PerDocConsumer;
+import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -40,24 +41,24 @@ import org.apache.lucene.util.ReaderUtil
/**
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
- * into a single Segment. After adding the appropriate readers, call the merge method to combine the
+ * into a single Segment. After adding the appropriate readers, call the merge method to combine the
* segments.
- *
+ *
* @see #merge
* @see #add
*/
final class SegmentMerger {
-
+
/** norms header placeholder */
- static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
-
+ static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
+
private Directory directory;
private String segment;
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
private List<IndexReader> readers = new ArrayList<IndexReader>();
private final FieldInfos fieldInfos;
-
+
private int mergedDocs;
private final MergeState.CheckAbort checkAbort;
@@ -65,12 +66,12 @@ final class SegmentMerger {
/** Maximum number of contiguous documents to bulk-copy
when merging stored fields */
private final static int MAX_RAW_MERGE_DOCS = 4192;
-
+
private Codec codec;
private SegmentWriteState segmentWriteState;
private PayloadProcessorProvider payloadProcessorProvider;
-
+
SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
this.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
@@ -133,10 +134,10 @@ final class SegmentMerger {
for (String file : files) {
cfsWriter.addFile(file);
}
-
+
// Perform the merge
cfsWriter.close();
-
+
return files;
}
@@ -194,13 +195,12 @@ final class SegmentMerger {
}
/**
- *
+ *
* @return The number of documents in all of the readers
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
private int mergeFields() throws CorruptIndexException, IOException {
-
for (IndexReader reader : readers) {
if (reader instanceof SegmentReader) {
SegmentReader segmentReader = (SegmentReader) reader;
@@ -263,8 +263,8 @@ final class SegmentMerger {
// details.
throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption");
- segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null, new AtomicLong(0));
-
+ segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null);
+
return docCount;
}
@@ -282,7 +282,7 @@ final class SegmentMerger {
++j;
continue;
}
- // We can optimize this case (doing a bulk byte copy) since the field
+ // We can optimize this case (doing a bulk byte copy) since the field
// numbers are identical
int start = j, numDocs = 0;
do {
@@ -294,7 +294,7 @@ final class SegmentMerger {
break;
}
} while(numDocs < MAX_RAW_MERGE_DOCS);
-
+
IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs);
fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs);
docCount += numDocs;
@@ -348,7 +348,7 @@ final class SegmentMerger {
* @throws IOException
*/
private final void mergeVectors() throws IOException {
- TermVectorsWriter termVectorsWriter =
+ TermVectorsWriter termVectorsWriter =
new TermVectorsWriter(directory, segment, fieldInfos);
try {
@@ -368,7 +368,7 @@ final class SegmentMerger {
copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
} else {
copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
-
+
}
}
} finally {
@@ -401,7 +401,7 @@ final class SegmentMerger {
++docNum;
continue;
}
- // We can optimize this case (doing a bulk byte copy) since the field
+ // We can optimize this case (doing a bulk byte copy) since the field
// numbers are identical
int start = docNum, numDocs = 0;
do {
@@ -413,7 +413,7 @@ final class SegmentMerger {
break;
}
} while(numDocs < MAX_RAW_MERGE_DOCS);
-
+
matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
checkAbort.work(300 * numDocs);
@@ -424,7 +424,7 @@ final class SegmentMerger {
// skip deleted docs
continue;
}
-
+
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
TermFreqVector[] vectors = reader.getTermFreqVectors(docNum);
@@ -433,7 +433,7 @@ final class SegmentMerger {
}
}
}
-
+
private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter,
final TermVectorsReader matchingVectorsReader,
final IndexReader reader)
@@ -469,13 +469,20 @@ final class SegmentMerger {
// Let CodecProvider decide which codec will be used to write
// the new segment:
-
+
int docBase = 0;
final List<Fields> fields = new ArrayList<Fields>();
+
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
final List<Bits> bits = new ArrayList<Bits>();
final List<Integer> bitsStarts = new ArrayList<Integer>();
+
+ // TODO: move this into its own method - this merges currently only docvalues
+ final List<PerDocValues> perDocProducers = new ArrayList<PerDocValues>();
+ final List<ReaderUtil.Slice> perDocSlices = new ArrayList<ReaderUtil.Slice>();
+ final List<Bits> perDocBits = new ArrayList<Bits>();
+ final List<Integer> perDocBitsStarts = new ArrayList<Integer>();
for(IndexReader r : readers) {
final Fields f = r.fields();
@@ -486,10 +493,18 @@ final class SegmentMerger {
bits.add(r.getDeletedDocs());
bitsStarts.add(docBase);
}
+ final PerDocValues producer = r.perDocValues();
+ if (producer != null) {
+ perDocSlices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size()));
+ perDocProducers.add(producer);
+ perDocBits.add(r.getDeletedDocs());
+ perDocBitsStarts.add(docBase);
+ }
docBase += maxDoc;
}
bitsStarts.add(docBase);
+ perDocBitsStarts.add(docBase);
// we may gather more readers than mergeState.readerCount
mergeState = new MergeState();
@@ -497,7 +512,7 @@ final class SegmentMerger {
mergeState.readerCount = readers.size();
mergeState.fieldInfos = fieldInfos;
mergeState.mergedDocCount = mergedDocs;
-
+
// Remap docIDs
mergeState.delCounts = new int[mergeState.readerCount];
mergeState.docMaps = new int[mergeState.readerCount][];
@@ -535,7 +550,7 @@ final class SegmentMerger {
}
assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount;
}
-
+
if (payloadProcessorProvider != null) {
mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory());
}
@@ -548,7 +563,7 @@ final class SegmentMerger {
// apart when we step through the docs enums in
// MultiDocsEnum.
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
-
+
try {
consumer.merge(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
@@ -556,6 +571,21 @@ final class SegmentMerger {
} finally {
consumer.close();
}
+ if (!perDocSlices.isEmpty()) {
+ mergeState.multiDeletedDocs = new MultiBits(perDocBits, perDocBitsStarts);
+ final PerDocConsumer docsConsumer = codec
+ .docsConsumer(new PerDocWriteState(segmentWriteState));
+ try {
+ docsConsumer.merge(
+ mergeState,
+ new MultiPerDocValues(perDocProducers
+ .toArray(PerDocValues.EMPTY_ARRAY), perDocSlices
+ .toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
+ } finally {
+ docsConsumer.close();
+ }
+ }
+
}
private MergeState mergeState;
@@ -567,7 +597,7 @@ final class SegmentMerger {
int[] getDelCounts() {
return mergeState.delCounts;
}
-
+
public boolean getAnyNonBulkMerges() {
assert matchedCount <= readers.size();
return matchedCount != readers.size();
@@ -578,7 +608,7 @@ final class SegmentMerger {
try {
for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) {
- if (output == null) {
+ if (output == null) {
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
}
@@ -609,7 +639,7 @@ final class SegmentMerger {
}
}
} finally {
- if (output != null) {
+ if (output != null) {
output.close();
}
}