You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC
svn commit: r1534320 [12/39] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/
dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/...
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java Mon Oct 21 18:58:24 2013
@@ -223,11 +223,30 @@ public class FieldInfos implements Itera
(dvType == null || docValuesType.get(name) == null || dvType == docValuesType.get(name));
}
+ /**
+ * Returns true if the {@code fieldName} exists in the map and is of the
+ * same {@code dvType}.
+ */
+ synchronized boolean contains(String fieldName, DocValuesType dvType) {
+ // used by IndexWriter.updateNumericDocValue
+ if (!nameToNumber.containsKey(fieldName)) {
+ return false;
+ } else {
+ // only return true if the field has the same dvType as the requested one
+ return dvType == docValuesType.get(fieldName);
+ }
+ }
+
synchronized void clear() {
numberToName.clear();
nameToNumber.clear();
docValuesType.clear();
}
+
+ synchronized void setDocValuesType(int number, String name, DocValuesType dvType) {
+ assert containsConsistent(number, name, dvType);
+ docValuesType.put(name, dvType);
+ }
}
static final class Builder {
@@ -287,7 +306,14 @@ public class FieldInfos implements Itera
fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);
if (docValues != null) {
- fi.setDocValuesType(docValues);
+ // only pay the synchronization cost if fi does not already have a DVType
+ boolean updateGlobal = !fi.hasDocValues();
+ fi.setDocValuesType(docValues); // this will also perform the consistency check.
+ if (updateGlobal) {
+ // must also update docValuesType map so it's
+ // aware of this field's DocValueType
+ globalFieldNumbers.setDocValuesType(fi.number, name, docValues);
+ }
}
if (!fi.omitsNorms() && normType != null) {
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Mon Oct 21 18:58:24 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.search.CachingWrapperFilter;
@@ -98,11 +97,6 @@ public class FilterAtomicReader extends
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return in.iterator(reuse);
}
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return in.getComparator();
- }
@Override
public long size() throws IOException {
@@ -125,6 +119,11 @@ public class FilterAtomicReader extends
}
@Override
+ public boolean hasFreqs() {
+ return in.hasFreqs();
+ }
+
+ @Override
public boolean hasOffsets() {
return in.hasOffsets();
}
@@ -200,11 +199,6 @@ public class FilterAtomicReader extends
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
return in.docsAndPositions(liveDocs, reuse, flags);
}
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return in.getComparator();
- }
}
/** Base class for filtering {@link DocsEnum} implementations. */
@@ -414,4 +408,10 @@ public class FilterAtomicReader extends
return in.getNormValues(field);
}
+ @Override
+ public Bits getDocsWithField(String field) throws IOException {
+ ensureOpen();
+ return in.getDocsWithField(field);
+ }
+
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java Mon Oct 21 18:58:24 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
@@ -28,7 +27,7 @@ import org.apache.lucene.util.Bits;
* Abstract class for enumerating a subset of all terms.
*
* <p>Term enumerations are always ordered by
- * {@link #getComparator}. Each term in the enumeration is
+ * {@link BytesRef#compareTo}. Each term in the enumeration is
* greater than all that precede it.</p>
* <p><em>Please note:</em> Consumers of this enum cannot
* call {@code seek()}, it is forward only; it throws
@@ -135,11 +134,6 @@ public abstract class FilteredTermsEnum
}
@Override
- public Comparator<BytesRef> getComparator() {
- return tenum.getComparator();
- }
-
- @Override
public int docFreq() throws IOException {
return tenum.docFreq();
}
@@ -221,7 +215,7 @@ public abstract class FilteredTermsEnum
final BytesRef t = nextSeekTerm(actualTerm);
//System.out.println(" seek to t=" + (t == null ? "null" : t.utf8ToString()) + " tenum=" + tenum);
// Make sure we always seek forward:
- assert actualTerm == null || t == null || getComparator().compare(t, actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
+ assert actualTerm == null || t == null || t.compareTo(actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
if (t == null || tenum.seekCeil(t) == SeekStatus.END) {
// no more terms to seek to or enum exhausted
//System.out.println(" return null");
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java Mon Oct 21 18:58:24 2013
@@ -68,12 +68,11 @@ class FlushByRamOrCountsPolicy extends F
control.setApplyAllDeletes();
}
}
- final DocumentsWriter writer = this.writer.get();
if ((flushOnRAM() &&
control.getDeleteBytesUsed() > (1024*1024*indexWriterConfig.getRAMBufferSizeMB()))) {
control.setApplyAllDeletes();
- if (writer.infoStream.isEnabled("FP")) {
- writer.infoStream.message("FP", "force apply deletes bytesUsed=" + control.getDeleteBytesUsed() + " vs ramBuffer=" + (1024*1024*indexWriterConfig.getRAMBufferSizeMB()));
+ if (infoStream.isEnabled("FP")) {
+ infoStream.message("FP", "force apply deletes bytesUsed=" + control.getDeleteBytesUsed() + " vs ramBuffer=" + (1024*1024*indexWriterConfig.getRAMBufferSizeMB()));
}
}
}
@@ -89,9 +88,8 @@ class FlushByRamOrCountsPolicy extends F
final long limit = (long) (indexWriterConfig.getRAMBufferSizeMB() * 1024.d * 1024.d);
final long totalRam = control.activeBytes() + control.getDeleteBytesUsed();
if (totalRam >= limit) {
- final DocumentsWriter writer = this.writer.get();
- if (writer.infoStream.isEnabled("FP")) {
- writer.infoStream.message("FP", "flush: activeBytes=" + control.activeBytes() + " deleteBytes=" + control.getDeleteBytesUsed() + " vs limit=" + limit);
+ if (infoStream.isEnabled("FP")) {
+ infoStream.message("FP", "flush: activeBytes=" + control.activeBytes() + " deleteBytes=" + control.getDeleteBytesUsed() + " vs limit=" + limit);
}
markLargestWriterPending(control, state, totalRam);
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java Mon Oct 21 18:58:24 2013
@@ -20,6 +20,7 @@ import java.util.Iterator;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.SetOnce;
/**
@@ -52,8 +53,8 @@ import org.apache.lucene.util.SetOnce;
* @see IndexWriterConfig#setFlushPolicy(FlushPolicy)
*/
abstract class FlushPolicy implements Cloneable {
- protected SetOnce<DocumentsWriter> writer = new SetOnce<DocumentsWriter>();
protected LiveIndexWriterConfig indexWriterConfig;
+ protected InfoStream infoStream;
/**
* Called for each delete term. If this is a delete triggered due to an update
@@ -93,9 +94,9 @@ abstract class FlushPolicy implements Cl
/**
* Called by DocumentsWriter to initialize the FlushPolicy
*/
- protected synchronized void init(DocumentsWriter docsWriter) {
- writer.set(docsWriter);
- indexWriterConfig = docsWriter.indexWriter.getConfig();
+ protected synchronized void init(LiveIndexWriterConfig indexWriterConfig) {
+ this.indexWriterConfig = indexWriterConfig;
+ infoStream = indexWriterConfig.getInfoStream();
}
/**
@@ -127,8 +128,8 @@ abstract class FlushPolicy implements Cl
}
private boolean assertMessage(String s) {
- if (writer.get().infoStream.isEnabled("FP")) {
- writer.get().infoStream.message("FP", s);
+ if (infoStream.isEnabled("FP")) {
+ infoStream.message("FP", s);
}
return true;
}
@@ -142,8 +143,8 @@ abstract class FlushPolicy implements Cl
// should not happen
throw new RuntimeException(e);
}
- clone.writer = new SetOnce<DocumentsWriter>();
clone.indexWriterConfig = null;
+ clone.infoStream = null;
return clone;
}
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Mon Oct 21 18:58:24 2013
@@ -19,19 +19,64 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
-import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
-import org.apache.lucene.util.IOUtils;
final class FreqProxTermsWriter extends TermsHashConsumer {
@Override
void abort() {}
+ private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
+ // Process any pending Term deletes for this newly
+ // flushed segment:
+ if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+ Map<Term,Integer> segDeletes = state.segDeletes.terms;
+ List<Term> deleteTerms = new ArrayList<Term>(segDeletes.keySet());
+ Collections.sort(deleteTerms);
+ String lastField = null;
+ TermsEnum termsEnum = null;
+ DocsEnum docsEnum = null;
+ for(Term deleteTerm : deleteTerms) {
+ if (deleteTerm.field().equals(lastField) == false) {
+ lastField = deleteTerm.field();
+ Terms terms = fields.terms(lastField);
+ if (terms != null) {
+ termsEnum = terms.iterator(termsEnum);
+ } else {
+ termsEnum = null;
+ }
+ }
+
+ if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) {
+ docsEnum = termsEnum.docs(null, docsEnum, 0);
+ int delDocLimit = segDeletes.get(deleteTerm);
+ while (true) {
+ int doc = docsEnum.nextDoc();
+ if (doc == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ if (doc < delDocLimit) {
+ if (state.liveDocs == null) {
+ state.liveDocs = state.segmentInfo.getCodec().liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
+ }
+ if (state.liveDocs.get(doc)) {
+ state.delCountOnFlush++;
+ state.liveDocs.clear(doc);
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
// TODO: would be nice to factor out more of this, eg the
// FreqProxFieldMergeState, and code to visit all Fields
// under the same FieldInfo together, up into TermsHash*.
@@ -47,63 +92,20 @@ final class FreqProxTermsWriter extends
for (TermsHashConsumerPerField f : fieldsToFlush.values()) {
final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f;
if (perField.termsHashPerField.bytesHash.size() > 0) {
+ perField.sortPostings();
+ assert perField.fieldInfo.isIndexed();
allFields.add(perField);
}
}
- final int numAllFields = allFields.size();
-
// Sort by field name
CollectionUtil.introSort(allFields);
- final FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
+ Fields fields = new FreqProxFields(allFields);
- boolean success = false;
+ applyDeletes(state, fields);
- try {
- TermsHash termsHash = null;
-
- /*
- Current writer chain:
- FieldsConsumer
- -> IMPL: FormatPostingsTermsDictWriter
- -> TermsConsumer
- -> IMPL: FormatPostingsTermsDictWriter.TermsWriter
- -> DocsConsumer
- -> IMPL: FormatPostingsDocsWriter
- -> PositionsConsumer
- -> IMPL: FormatPostingsPositionsWriter
- */
-
- for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
- final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
-
- final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
-
- // If this field has postings then add them to the
- // segment
- fieldWriter.flush(fieldInfo.name, consumer, state);
-
- TermsHashPerField perField = fieldWriter.termsHashPerField;
- assert termsHash == null || termsHash == perField.termsHash;
- termsHash = perField.termsHash;
- int numPostings = perField.bytesHash.size();
- perField.reset();
- perField.shrinkHash(numPostings);
- fieldWriter.reset();
- }
-
- if (termsHash != null) {
- termsHash.reset();
- }
- success = true;
- } finally {
- if (success) {
- IOUtils.close(consumer);
- } else {
- IOUtils.closeWhileHandlingException(consumer);
- }
- }
+ state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state).write(fields);
}
BytesRef payload;
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Mon Oct 21 18:58:24 2013
@@ -17,19 +17,10 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.Map;
-
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.PostingsConsumer;
-import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
// TODO: break into separate freq and prox writers as
@@ -42,11 +33,16 @@ final class FreqProxTermsWriterPerField
final FieldInfo fieldInfo;
final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
- private boolean hasFreq;
- private boolean hasProx;
- private boolean hasOffsets;
+ boolean hasFreq;
+ boolean hasProx;
+ boolean hasOffsets;
PayloadAttribute payloadAttribute;
OffsetAttribute offsetAttribute;
+ long sumTotalTermFreq;
+ long sumDocFreq;
+
+ // How many docs have this field:
+ int docCount;
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
@@ -68,6 +64,12 @@ final class FreqProxTermsWriterPerField
@Override
void finish() {
+ sumDocFreq += fieldState.uniqueTermCount;
+ sumTotalTermFreq += fieldState.length;
+ if (fieldState.length > 0) {
+ docCount++;
+ }
+
if (hasPayloads) {
fieldInfo.setStorePayloads();
}
@@ -83,14 +85,6 @@ final class FreqProxTermsWriterPerField
return fieldInfo.name.compareTo(other.fieldInfo.name);
}
- // Called after flush
- void reset() {
- // Record, up front, whether our in-RAM format will be
- // with or without term freqs:
- setIndexOptions(fieldInfo.getIndexOptions());
- payloadAttribute = null;
- }
-
private void setIndexOptions(IndexOptions indexOptions) {
if (indexOptions == null) {
// field could later be updated with indexed=true, so set everything on
@@ -318,233 +312,10 @@ final class FreqProxTermsWriterPerField
BytesRef payload;
- /* Walk through all unique text tokens (Posting
- * instances) found in this field and serialize them
- * into a single RAM segment. */
- void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state)
- throws IOException {
-
- if (!fieldInfo.isIndexed()) {
- return; // nothing to flush, don't bother the codec with the unindexed field
- }
-
- final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
- final Comparator<BytesRef> termComp = termsConsumer.getComparator();
-
- // CONFUSING: this.indexOptions holds the index options
- // that were current when we first saw this field. But
- // it's possible this has changed, eg when other
- // documents are indexed that cause a "downgrade" of the
- // IndexOptions. So we must decode the in-RAM buffer
- // according to this.indexOptions, but then write the
- // new segment to the directory according to
- // currentFieldIndexOptions:
- final IndexOptions currentFieldIndexOptions = fieldInfo.getIndexOptions();
- assert currentFieldIndexOptions != null;
-
- final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
-
- final boolean readTermFreq = this.hasFreq;
- final boolean readPositions = this.hasProx;
- final boolean readOffsets = this.hasOffsets;
-
- //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);
-
- // Make sure FieldInfo.update is working correctly!:
- assert !writeTermFreq || readTermFreq;
- assert !writePositions || readPositions;
- assert !writeOffsets || readOffsets;
-
- assert !writeOffsets || writePositions;
-
- final Map<Term,Integer> segDeletes;
- if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
- segDeletes = state.segDeletes.terms;
- } else {
- segDeletes = null;
- }
-
- final int[] termIDs = termsHashPerField.sortPostings(termComp);
- final int numTerms = termsHashPerField.bytesHash.size();
- final BytesRef text = new BytesRef();
- final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
- final ByteSliceReader freq = new ByteSliceReader();
- final ByteSliceReader prox = new ByteSliceReader();
-
- FixedBitSet visitedDocs = new FixedBitSet(state.segmentInfo.getDocCount());
- long sumTotalTermFreq = 0;
- long sumDocFreq = 0;
-
- Term protoTerm = new Term(fieldName);
- for (int i = 0; i < numTerms; i++) {
- final int termID = termIDs[i];
- //System.out.println("term=" + termID);
- // Get BytesRef
- final int textStart = postings.textStarts[termID];
- termsHashPerField.bytePool.setBytesRef(text, textStart);
-
- termsHashPerField.initReader(freq, termID, 0);
- if (readPositions || readOffsets) {
- termsHashPerField.initReader(prox, termID, 1);
- }
-
- // TODO: really TermsHashPerField should take over most
- // of this loop, including merge sort of terms from
- // multiple threads and interacting with the
- // TermsConsumer, only calling out to us (passing us the
- // DocsConsumer) to handle delivery of docs/positions
-
- final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
-
- final int delDocLimit;
- if (segDeletes != null) {
- protoTerm.bytes = text;
- final Integer docIDUpto = segDeletes.get(protoTerm);
- if (docIDUpto != null) {
- delDocLimit = docIDUpto;
- } else {
- delDocLimit = 0;
- }
- } else {
- delDocLimit = 0;
- }
-
- // Now termStates has numToMerge FieldMergeStates
- // which all share the same term. Now we must
- // interleave the docID streams.
- int docFreq = 0;
- long totalTermFreq = 0;
- int docID = 0;
-
- while(true) {
- //System.out.println(" cycle");
- final int termFreq;
- if (freq.eof()) {
- if (postings.lastDocCodes[termID] != -1) {
- // Return last doc
- docID = postings.lastDocIDs[termID];
- if (readTermFreq) {
- termFreq = postings.termFreqs[termID];
- } else {
- termFreq = -1;
- }
- postings.lastDocCodes[termID] = -1;
- } else {
- // EOF
- break;
- }
- } else {
- final int code = freq.readVInt();
- if (!readTermFreq) {
- docID += code;
- termFreq = -1;
- } else {
- docID += code >>> 1;
- if ((code & 1) != 0) {
- termFreq = 1;
- } else {
- termFreq = freq.readVInt();
- }
- }
-
- assert docID != postings.lastDocIDs[termID];
- }
-
- docFreq++;
- assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
-
- // NOTE: we could check here if the docID was
- // deleted, and skip it. However, this is somewhat
- // dangerous because it can yield non-deterministic
- // behavior since we may see the docID before we see
- // the term that caused it to be deleted. This
- // would mean some (but not all) of its postings may
- // make it into the index, which'd alter the docFreq
- // for those terms. We could fix this by doing two
- // passes, ie first sweep marks all del docs, and
- // 2nd sweep does the real flush, but I suspect
- // that'd add too much time to flush.
- visitedDocs.set(docID);
- postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
- if (docID < delDocLimit) {
- // Mark it deleted. TODO: we could also skip
- // writing its postings; this would be
- // deterministic (just for this Term's docs).
-
- // TODO: can we do this reach-around in a cleaner way????
- if (state.liveDocs == null) {
- state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
- }
- if (state.liveDocs.get(docID)) {
- state.delCountOnFlush++;
- state.liveDocs.clear(docID);
- }
- }
-
- totalTermFreq += termFreq;
-
- // Carefully copy over the prox + payload info,
- // changing the format to match Lucene's segment
- // format.
-
- if (readPositions || readOffsets) {
- // we did record positions (& maybe payload) and/or offsets
- int position = 0;
- int offset = 0;
- for(int j=0;j<termFreq;j++) {
- final BytesRef thisPayload;
-
- if (readPositions) {
- final int code = prox.readVInt();
- position += code >>> 1;
-
- if ((code & 1) != 0) {
-
- // This position has a payload
- final int payloadLength = prox.readVInt();
-
- if (payload == null) {
- payload = new BytesRef();
- payload.bytes = new byte[payloadLength];
- } else if (payload.bytes.length < payloadLength) {
- payload.grow(payloadLength);
- }
-
- prox.readBytes(payload.bytes, 0, payloadLength);
- payload.length = payloadLength;
- thisPayload = payload;
-
- } else {
- thisPayload = null;
- }
-
- if (readOffsets) {
- final int startOffset = offset + prox.readVInt();
- final int endOffset = startOffset + prox.readVInt();
- if (writePositions) {
- if (writeOffsets) {
- assert startOffset >=0 && endOffset >= startOffset : "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset;
- postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
- } else {
- postingsConsumer.addPosition(position, thisPayload, -1, -1);
- }
- }
- offset = startOffset;
- } else if (writePositions) {
- postingsConsumer.addPosition(position, thisPayload, -1, -1);
- }
- }
- }
- }
- postingsConsumer.finishDoc();
- }
- termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1));
- sumTotalTermFreq += totalTermFreq;
- sumDocFreq += docFreq;
- }
+ int[] sortedTermIDs;
- termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
+ void sortPostings() {
+ assert sortedTermIDs == null;
+ sortedTermIDs = termsHashPerField.sortPostings();
}
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java Mon Oct 21 18:58:24 2013
@@ -17,32 +17,39 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.util.ArrayList;
import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
+import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
-
-/** Holds buffered deletes by term or query, once pushed.
- * Pushed deletes are write-once, so we shift to more
- * memory efficient data structure to hold them. We don't
- * hold docIDs because these are applied on flush. */
-class FrozenBufferedDeletes {
+/**
+ * Holds buffered deletes and updates by term or query, once pushed. Pushed
+ * deletes/updates are write-once, so we shift to more memory efficient data
+ * structure to hold them. We don't hold docIDs because these are applied on
+ * flush.
+ */
+class FrozenBufferedDeletes { // TODO (DVU_RENAME) FrozenBufferedUpdates?
/* Query we often undercount (say 24 bytes), plus int. */
final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + 24;
-
+
// Terms, in sorted order:
final PrefixCodedTerms terms;
int termCount; // just for debugging
- // Parallel array of deleted query, and the docIDUpto for
- // each
+ // Parallel array of deleted query, and the docIDUpto for each
final Query[] queries;
final int[] queryLimits;
+
+ // numeric DV update term and their updates
+ final NumericUpdate[] updates;
+
final int bytesUsed;
final int numTermDeletes;
private long gen = -1; // assigned by BufferedDeletesStream once pushed
@@ -72,7 +79,21 @@ class FrozenBufferedDeletes {
upto++;
}
- bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY;
+ // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
+ // so that it maps to all fields it affects, sorted by their docUpto, and traverse
+ // that Term only once, applying the update to all fields that still need to be
+ // updated.
+ List<NumericUpdate> allUpdates = new ArrayList<NumericUpdate>();
+ int numericUpdatesSize = 0;
+ for (LinkedHashMap<Term,NumericUpdate> fieldUpdates : deletes.numericUpdates.values()) {
+ for (NumericUpdate update : fieldUpdates.values()) {
+ allUpdates.add(update);
+ numericUpdatesSize += update.sizeInBytes();
+ }
+ }
+ updates = allUpdates.toArray(new NumericUpdate[allUpdates.size()]);
+
+ bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY + numericUpdatesSize + updates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
numTermDeletes = deletes.numTermDeletes.get();
}
@@ -140,6 +161,6 @@ class FrozenBufferedDeletes {
}
boolean any() {
- return termCount > 0 || queries.length > 0;
+ return termCount > 0 || queries.length > 0 || updates.length > 0;
}
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java Mon Oct 21 18:58:24 2013
@@ -92,7 +92,7 @@ final class IndexFileDeleter implements
/* Holds files we had incref'd from the previous
* non-commit checkpoint: */
- private List<Collection<String>> lastFiles = new ArrayList<Collection<String>>();
+ private final List<String> lastFiles = new ArrayList<String>();
/* Commits that the IndexDeletionPolicy have decided to delete: */
private List<CommitPoint> commitsToDelete = new ArrayList<CommitPoint>();
@@ -361,14 +361,13 @@ final class IndexFileDeleter implements
refresh(null);
}
+ @Override
public void close() throws IOException {
// DecRef old files from the last checkpoint, if any:
assert locked();
- int size = lastFiles.size();
- if (size > 0) {
- for(int i=0;i<size;i++) {
- decRef(lastFiles.get(i));
- }
+
+ if (!lastFiles.isEmpty()) {
+ decRef(lastFiles);
lastFiles.clear();
}
@@ -459,13 +458,11 @@ final class IndexFileDeleter implements
deleteCommits();
} else {
// DecRef old files from the last checkpoint, if any:
- for (Collection<String> lastFile : lastFiles) {
- decRef(lastFile);
- }
+ decRef(lastFiles);
lastFiles.clear();
// Save files so we can decr on next checkpoint/commit:
- lastFiles.add(segmentInfos.files(directory, false));
+ lastFiles.addAll(segmentInfos.files(directory, false));
}
if (infoStream.isEnabled("IFD")) {
long t1 = System.nanoTime();
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java Mon Oct 21 18:58:24 2013
@@ -71,6 +71,9 @@ public final class IndexUpgrader {
* command-line. */
@SuppressWarnings("deprecation")
public static void main(String[] args) throws IOException {
+ parseArgs(args).upgrade();
+ }
+ static IndexUpgrader parseArgs(String[] args) throws IOException {
String path = null;
boolean deletePriorCommits = false;
PrintStream out = null;
@@ -82,8 +85,6 @@ public final class IndexUpgrader {
deletePriorCommits = true;
} else if ("-verbose".equals(arg)) {
out = System.out;
- } else if (path == null) {
- path = arg;
} else if ("-dir-impl".equals(arg)) {
if (i == args.length - 1) {
System.out.println("ERROR: missing value for -dir-impl option");
@@ -91,6 +92,8 @@ public final class IndexUpgrader {
}
i++;
dirImpl = args[i];
+ } else if (path == null) {
+ path = arg;
}else {
printUsage();
}
@@ -106,7 +109,7 @@ public final class IndexUpgrader {
} else {
dir = CommandLineUtil.newFSDirectory(dirImpl, new File(path));
}
- new IndexUpgrader(dir, Version.LUCENE_CURRENT, out, deletePriorCommits).upgrade();
+ return new IndexUpgrader(dir, Version.LUCENE_CURRENT, out, deletePriorCommits);
}
private final Directory dir;
@@ -123,7 +126,10 @@ public final class IndexUpgrader {
* {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit points by removing
* all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
public IndexUpgrader(Directory dir, Version matchVersion, PrintStream infoStream, boolean deletePriorCommits) {
- this(dir, new IndexWriterConfig(matchVersion, null).setInfoStream(infoStream), deletePriorCommits);
+ this(dir, new IndexWriterConfig(matchVersion, null), deletePriorCommits);
+ if (null != infoStream) {
+ this.iwc.setInfoStream(infoStream);
+ }
}
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given