You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/08/13 15:53:27 UTC
svn commit: r1372423 [30/45] - in /lucene/dev/branches/LUCENE-2878: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/analysis/common/
dev-tools/maven/lucene/analysis/icu/ d...
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java Mon Aug 13 13:52:46 2012
@@ -45,10 +45,12 @@ public class SimpleTextTermVectorsWriter
static final BytesRef FIELDNAME = new BytesRef(" name ");
static final BytesRef FIELDPOSITIONS = new BytesRef(" positions ");
static final BytesRef FIELDOFFSETS = new BytesRef(" offsets ");
+ static final BytesRef FIELDPAYLOADS = new BytesRef(" payloads ");
static final BytesRef FIELDTERMCOUNT = new BytesRef(" numterms ");
static final BytesRef TERMTEXT = new BytesRef(" term ");
static final BytesRef TERMFREQ = new BytesRef(" freq ");
static final BytesRef POSITION = new BytesRef(" position ");
+ static final BytesRef PAYLOAD = new BytesRef(" payload ");
static final BytesRef STARTOFFSET = new BytesRef(" startoffset ");
static final BytesRef ENDOFFSET = new BytesRef(" endoffset ");
@@ -61,6 +63,7 @@ public class SimpleTextTermVectorsWriter
private final BytesRef scratch = new BytesRef();
private boolean offsets;
private boolean positions;
+ private boolean payloads;
public SimpleTextTermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
this.directory = directory;
@@ -89,7 +92,7 @@ public class SimpleTextTermVectorsWriter
}
@Override
- public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException {
+ public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException {
write(FIELD);
write(Integer.toString(info.number));
newLine();
@@ -106,12 +109,17 @@ public class SimpleTextTermVectorsWriter
write(Boolean.toString(offsets));
newLine();
+ write(FIELDPAYLOADS);
+ write(Boolean.toString(payloads));
+ newLine();
+
write(FIELDTERMCOUNT);
write(Integer.toString(numTerms));
newLine();
this.positions = positions;
this.offsets = offsets;
+ this.payloads = payloads;
}
@Override
@@ -126,13 +134,22 @@ public class SimpleTextTermVectorsWriter
}
@Override
- public void addPosition(int position, int startOffset, int endOffset) throws IOException {
+ public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException {
assert positions || offsets;
if (positions) {
write(POSITION);
write(Integer.toString(position));
newLine();
+
+ if (payloads) {
+ write(PAYLOAD);
+ if (payload != null) {
+ assert payload.length > 0;
+ write(payload);
+ }
+ newLine();
+ }
}
if (offsets) {
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/Field.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/Field.java Mon Aug 13 13:52:46 2012
@@ -377,6 +377,11 @@ public class Field implements IndexableF
* @see org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)
*/
public void setBoost(float boost) {
+ if (boost != 1.0f) {
+ if (type.indexed() == false || type.omitNorms()) {
+ throw new IllegalArgumentException("You cannot set an index-time boost on an unindexed field, or one that omits norms");
+ }
+ }
this.boost = boost;
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/FieldType.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/FieldType.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/FieldType.java Mon Aug 13 13:52:46 2012
@@ -39,6 +39,7 @@ public class FieldType implements Indexa
private boolean storeTermVectors;
private boolean storeTermVectorOffsets;
private boolean storeTermVectorPositions;
+ private boolean storeTermVectorPayloads;
private boolean omitNorms;
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
private DocValues.Type docValueType;
@@ -53,6 +54,7 @@ public class FieldType implements Indexa
this.storeTermVectors = ref.storeTermVectors();
this.storeTermVectorOffsets = ref.storeTermVectorOffsets();
this.storeTermVectorPositions = ref.storeTermVectorPositions();
+ this.storeTermVectorPayloads = ref.storeTermVectorPayloads();
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.docValueType = ref.docValueType();
@@ -132,6 +134,15 @@ public class FieldType implements Indexa
this.storeTermVectorPositions = value;
}
+ public boolean storeTermVectorPayloads() {
+ return this.storeTermVectorPayloads;
+ }
+
+ public void setStoreTermVectorPayloads(boolean value) {
+ checkIfFrozen();
+ this.storeTermVectorPayloads = value;
+ }
+
public boolean omitNorms() {
return this.omitNorms;
}
@@ -198,24 +209,19 @@ public class FieldType implements Indexa
result.append(",");
result.append("indexed");
if (tokenized()) {
- if (result.length() > 0)
- result.append(",");
- result.append("tokenized");
+ result.append(",tokenized");
}
if (storeTermVectors()) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVector");
+ result.append(",termVector");
}
if (storeTermVectorOffsets()) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVectorOffsets");
+ result.append(",termVectorOffsets");
}
if (storeTermVectorPositions()) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVectorPosition");
+ result.append(",termVectorPosition");
+ if (storeTermVectorPayloads()) {
+ result.append(",termVectorPayloads");
+ }
}
if (omitNorms()) {
result.append(",omitNorms");
@@ -232,7 +238,9 @@ public class FieldType implements Indexa
}
}
if (docValueType != null) {
- result.append(",docValueType=");
+ if (result.length() > 0)
+ result.append(",");
+ result.append("docValueType=");
result.append(docValueType);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/StoredField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/StoredField.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/StoredField.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/StoredField.java Mon Aug 13 13:52:46 2012
@@ -49,6 +49,7 @@ public final class StoredField extends F
super(name, value, TYPE);
}
+ // TODO: not great but maybe not a big problem?
public StoredField(String name, int value) {
super(name, TYPE);
fieldsData = value;
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/StringField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/StringField.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/StringField.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/StringField.java Mon Aug 13 13:52:46 2012
@@ -54,9 +54,4 @@ public final class StringField extends F
public StringField(String name, String value, Store stored) {
super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
}
-
- @Override
- public String stringValue() {
- return (fieldsData == null) ? null : fieldsData.toString();
- }
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/TextField.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/TextField.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/document/TextField.java Mon Aug 13 13:52:46 2012
@@ -46,9 +46,9 @@ public final class TextField extends Fie
// TODO: add sugar for term vectors...?
- /** Creates a new TextField with Reader value. */
- public TextField(String name, Reader reader, Store store) {
- super(name, reader, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
+ /** Creates a new un-stored TextField with Reader value. */
+ public TextField(String name, Reader reader) {
+ super(name, reader, TYPE_NOT_STORED);
}
/** Creates a new TextField with String value. */
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java Mon Aug 13 13:52:46 2012
@@ -116,9 +116,19 @@ public abstract class AtomicReader exten
}
/** Returns {@link DocsEnum} for the specified field &
- * term. This may return null, if either the field or
+ * term. This will return null if either the field or
* term does not exist. */
- public final DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
+ public final DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term) throws IOException {
+ return termDocsEnum(liveDocs, field, term, DocsEnum.FLAG_FREQS);
+ }
+
+ /** Returns {@link DocsEnum} for the specified field &
+ * term, with control over whether freqs are required.
+ * Some codecs may be able to optimize their
+ * implementation when freqs are not required. This will
+ * return null if the field or term does not
+ * exist. See {@link TermsEnum#docs(Bits,DocsEnum,int)}. */
+ public final DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, int flags) throws IOException {
assert field != null;
assert term != null;
final Fields fields = fields();
@@ -127,18 +137,30 @@ public abstract class AtomicReader exten
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
- return termsEnum.docs(liveDocs, null, needsFreqs);
+ return termsEnum.docs(liveDocs, null, flags);
}
}
}
return null;
}
+
+ /** Returns {@link DocsAndPositionsEnum} for the specified
+ * field & term. This will return null if the
+ * field or term does not exist or positions weren't indexed.
+ * @see #termPositionsEnum(Bits, String, BytesRef, int) */
+ public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term) throws IOException {
+ return termPositionsEnum(liveDocs, field, term, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS);
+ }
+
/** Returns {@link DocsAndPositionsEnum} for the specified
- * field & term. This may return null, if either the
- * field or term does not exist, or needsOffsets is
- * true but offsets were not indexed for this field. */
- public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
+ * field & term, with control over whether offsets and payloads are
+ * required. Some codecs may be able to optimize their
+ * implementation when offsets and/or payloads are not required.
+ * This will return null if the field or term
+ * does not exist or positions weren't indexed. See
+ * {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}. */
+ public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, int flags) throws IOException {
assert field != null;
assert term != null;
final Fields fields = fields();
@@ -147,7 +169,7 @@ public abstract class AtomicReader exten
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
- return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
+ return termsEnum.docsAndPositions(liveDocs, null, flags);
}
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java Mon Aug 13 13:52:46 2012
@@ -1,8 +1,5 @@
package org.apache.lucene.index;
-import java.util.Collections;
-import java.util.List;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,6 +17,9 @@ import java.util.List;
* limitations under the License.
*/
+import java.util.Collections;
+import java.util.List;
+
/**
* {@link IndexReaderContext} for {@link AtomicReader} instances
* @lucene.experimental
@@ -51,8 +51,9 @@ public final class AtomicReaderContext e
@Override
public List<AtomicReaderContext> leaves() {
- if (!isTopLevel)
+ if (!isTopLevel) {
throw new UnsupportedOperationException("This is not a top-level context.");
+ }
assert leaves != null;
return leaves;
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java Mon Aug 13 13:52:46 2012
@@ -396,7 +396,8 @@ class BufferedDeletesStream {
// System.out.println(" term=" + term);
if (termsEnum.seekExact(term.bytes(), false)) {
- DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, false);
+ // we don't need term frequencies for this
+ DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, 0);
//System.out.println("BDS: got docsEnum=" + docsEnum);
if (docsEnum != null) {
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Mon Aug 13 13:52:46 2012
@@ -34,6 +34,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.FieldType; // for javadocs
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -682,15 +683,9 @@ public class CheckIndex {
DocsEnum docs = null;
DocsEnum docsAndFreqs = null;
DocsAndPositionsEnum postings = null;
- DocsAndPositionsEnum offsets = null;
String lastField = null;
- final FieldsEnum fieldsEnum = fields.iterator();
- while(true) {
- final String field = fieldsEnum.next();
- if (field == null) {
- break;
- }
+ for (String field : fields) {
// MultiFieldsEnum relies upon this order...
if (lastField != null && field.compareTo(lastField) <= 0) {
throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
@@ -699,11 +694,11 @@ public class CheckIndex {
// check that the field is in fieldinfos, and is indexed.
// TODO: add a separate test to check this for different reader impls
- FieldInfo fi = fieldInfos.fieldInfo(field);
- if (fi == null) {
+ FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+ if (fieldInfo == null) {
throw new RuntimeException("fieldsEnum inconsistent with fieldInfos, no fieldInfos for: " + field);
}
- if (!fi.isIndexed()) {
+ if (!fieldInfo.isIndexed()) {
throw new RuntimeException("fieldsEnum inconsistent with fieldInfos, isIndexed == false for: " + field);
}
@@ -713,11 +708,16 @@ public class CheckIndex {
// assert fields.terms(field) != null;
computedFieldCount++;
- final Terms terms = fieldsEnum.terms();
+ final Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
+ final boolean hasPositions = terms.hasPositions();
+ final boolean hasOffsets = terms.hasOffsets();
+ // term vectors cannot omit TF
+ final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+
final TermsEnum termsEnum = terms.iterator(null);
boolean hasOrd = true;
@@ -755,10 +755,8 @@ public class CheckIndex {
status.totFreq += docFreq;
sumDocFreq += docFreq;
- docs = termsEnum.docs(liveDocs, docs, false);
- docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
- postings = termsEnum.docsAndPositions(liveDocs, postings, false);
- offsets = termsEnum.docsAndPositions(liveDocs, offsets, true);
+ docs = termsEnum.docs(liveDocs, docs);
+ postings = termsEnum.docsAndPositions(liveDocs, postings);
if (hasOrd) {
long ord = -1;
@@ -779,34 +777,10 @@ public class CheckIndex {
status.termCount++;
final DocsEnum docs2;
- final DocsEnum docsAndFreqs2;
- final boolean hasPositions;
- final boolean hasFreqs;
- final boolean hasOffsets;
- if (offsets != null) {
- docs2 = postings = offsets;
- docsAndFreqs2 = postings = offsets;
- hasOffsets = true;
- hasPositions = true;
- hasFreqs = true;
- } else if (postings != null) {
+ if (postings != null) {
docs2 = postings;
- docsAndFreqs2 = postings;
- hasOffsets = false;
- hasPositions = true;
- hasFreqs = true;
- } else if (docsAndFreqs != null) {
- docs2 = docsAndFreqs;
- docsAndFreqs2 = docsAndFreqs;
- hasOffsets = false;
- hasPositions = false;
- hasFreqs = true;
} else {
docs2 = docs;
- docsAndFreqs2 = null;
- hasOffsets = false;
- hasPositions = false;
- hasFreqs = false;
}
int lastDoc = -1;
@@ -820,7 +794,7 @@ public class CheckIndex {
visitedDocs.set(doc);
int freq = -1;
if (hasFreqs) {
- freq = docsAndFreqs2.freq();
+ freq = docs2.freq();
if (freq <= 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
}
@@ -843,22 +817,17 @@ public class CheckIndex {
if (hasPositions) {
for(int j=0;j<freq;j++) {
final int pos = postings.nextPosition();
- // NOTE: pos=-1 is allowed because of ancient bug
- // (LUCENE-1542) whereby IndexWriter could
- // write pos=-1 when first token's posInc is 0
- // (separately: analyzers should not give
- // posInc=0 to first token); also, term
- // vectors are allowed to return pos=-1 if
- // they indexed offset but not positions:
- if (pos < -1) {
+
+ if (pos < 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
}
if (pos < lastPos) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
}
lastPos = pos;
- if (postings.hasPayload()) {
- postings.getPayload();
+ BytesRef payload = postings.getPayload();
+ if (payload != null && payload.length < 1) {
+ throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " payload length is out of bounds " + payload.length);
}
if (hasOffsets) {
int startOffset = postings.startOffset();
@@ -886,12 +855,12 @@ public class CheckIndex {
}
final long totalTermFreq2 = termsEnum.totalTermFreq();
- final boolean hasTotalTermFreq = postings != null && totalTermFreq2 != -1;
+ final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1;
// Re-count if there are deleted docs:
if (liveDocs != null) {
if (hasFreqs) {
- final DocsEnum docsNoDel = termsEnum.docs(null, docsAndFreqs, true);
+ final DocsEnum docsNoDel = termsEnum.docs(null, docsAndFreqs);
docCount = 0;
totalTermFreq = 0;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
@@ -900,7 +869,7 @@ public class CheckIndex {
totalTermFreq += docsNoDel.freq();
}
} else {
- final DocsEnum docsNoDel = termsEnum.docs(null, docs, false);
+ final DocsEnum docsNoDel = termsEnum.docs(null, docs, 0);
docCount = 0;
totalTermFreq = -1;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
@@ -927,7 +896,7 @@ public class CheckIndex {
if (hasPositions) {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
- postings = termsEnum.docsAndPositions(liveDocs, postings, hasOffsets);
+ postings = termsEnum.docsAndPositions(liveDocs, postings);
final int docID = postings.advance(skipDocID);
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
@@ -943,14 +912,8 @@ public class CheckIndex {
int lastOffset = 0;
for(int posUpto=0;posUpto<freq;posUpto++) {
final int pos = postings.nextPosition();
- // NOTE: pos=-1 is allowed because of ancient bug
- // (LUCENE-1542) whereby IndexWriter could
- // write pos=-1 when first token's posInc is 0
- // (separately: analyzers should not give
- // posInc=0 to first token); also, term
- // vectors are allowed to return pos=-1 if
- // they indexed offset but not positions:
- if (pos < -1) {
+
+ if (pos < 0) {
throw new RuntimeException("position " + pos + " is out of bounds");
}
if (pos < lastPosition) {
@@ -992,7 +955,7 @@ public class CheckIndex {
} else {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
- docs = termsEnum.docs(liveDocs, docs, false);
+ docs = termsEnum.docs(liveDocs, docs, 0);
final int docID = docs.advance(skipDocID);
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
@@ -1019,11 +982,7 @@ public class CheckIndex {
// only happen if it's a ghost field (field with
// no terms, eg there used to be terms but all
// docs got deleted and then merged away):
- // make sure TermsEnum is empty:
- final Terms fieldTerms2 = fieldsEnum.terms();
- if (fieldTerms2 != null && fieldTerms2.iterator(null).next() != null) {
- throw new RuntimeException("Fields.terms(field=" + field + ") returned null yet the field appears to have terms");
- }
+
} else {
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
@@ -1062,7 +1021,7 @@ public class CheckIndex {
}
int expectedDocFreq = termsEnum.docFreq();
- DocsEnum d = termsEnum.docs(null, null, false);
+ DocsEnum d = termsEnum.docs(null, null, 0);
int docFreq = 0;
while (d.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
docFreq++;
@@ -1103,7 +1062,7 @@ public class CheckIndex {
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
}
- docs = termsEnum.docs(liveDocs, docs, false);
+ docs = termsEnum.docs(liveDocs, docs, 0);
if (docs == null) {
throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
}
@@ -1121,7 +1080,7 @@ public class CheckIndex {
}
totDocFreq += termsEnum.docFreq();
- docs = termsEnum.docs(null, docs, false);
+ docs = termsEnum.docs(null, docs, 0);
if (docs == null) {
throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
}
@@ -1434,9 +1393,7 @@ public class CheckIndex {
status.docCount++;
}
- FieldsEnum fieldsEnum = tfv.iterator();
- String field = null;
- while((field = fieldsEnum.next()) != null) {
+ for(String field : tfv) {
if (doStats) {
status.totVectors++;
}
@@ -1450,6 +1407,9 @@ public class CheckIndex {
if (crossCheckTermVectors) {
Terms terms = tfv.terms(field);
termsEnum = terms.iterator(termsEnum);
+ final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ final boolean postingsHasPayload = fieldInfo.hasPayloads();
+ final boolean vectorsHasPayload = terms.hasPayloads();
Terms postingsTerms = postingsFields.terms(field);
if (postingsTerms == null) {
@@ -1457,47 +1417,22 @@ public class CheckIndex {
}
postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
+ final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
BytesRef term = null;
while ((term = termsEnum.next()) != null) {
-
- final boolean hasPositions;
- final boolean hasOffsets;
- final boolean hasFreqs;
-
- // TODO: really we need a reflection/query
- // API so we can just ask what was indexed
- // instead of "probing"...
-
- // Try offsets:
- postings = termsEnum.docsAndPositions(null, postings, true);
- if (postings == null) {
- hasOffsets = false;
- // Try only positions:
- postings = termsEnum.docsAndPositions(null, postings, false);
- if (postings == null) {
- hasPositions = false;
- // Try docIDs & freqs:
- docs = termsEnum.docs(null, docs, true);
- if (docs == null) {
- // OK, only docIDs:
- hasFreqs = false;
- docs = termsEnum.docs(null, docs, false);
- } else {
- hasFreqs = true;
- }
- } else {
- hasPositions = true;
- hasFreqs = true;
- }
+
+ if (hasProx) {
+ postings = termsEnum.docsAndPositions(null, postings);
+ assert postings != null;
+ docs = null;
} else {
- hasOffsets = true;
- // NOTE: may be a lie... but we accept -1
- hasPositions = true;
- hasFreqs = true;
+ docs = termsEnum.docs(null, docs);
+ assert docs != null;
+ postings = null;
}
final DocsEnum docs2;
- if (hasPositions || hasOffsets) {
+ if (hasProx) {
assert postings != null;
docs2 = postings;
} else {
@@ -1506,30 +1441,16 @@ public class CheckIndex {
}
final DocsEnum postingsDocs2;
- final boolean postingsHasFreq;
if (!postingsTermsEnum.seekExact(term, true)) {
throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
}
- postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, true);
+ postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings);
if (postingsPostings == null) {
- // Term vectors were indexed w/ offsets but postings were not
- postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, false);
- if (postingsPostings == null) {
- postingsDocs = postingsTermsEnum.docs(null, postingsDocs, true);
- if (postingsDocs == null) {
- postingsHasFreq = false;
- postingsDocs = postingsTermsEnum.docs(null, postingsDocs, false);
- if (postingsDocs == null) {
- throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
- }
- } else {
- postingsHasFreq = true;
- }
- } else {
- postingsHasFreq = true;
+ // Term vectors were indexed w/ pos but postings were not
+ postingsDocs = postingsTermsEnum.docs(null, postingsDocs);
+ if (postingsDocs == null) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
}
- } else {
- postingsHasFreq = true;
}
if (postingsPostings != null) {
@@ -1549,47 +1470,73 @@ public class CheckIndex {
throw new RuntimeException("vector for doc " + j + " didn't return docID=0: got docID=" + doc);
}
- if (hasFreqs) {
+ if (postingsHasFreq) {
final int tf = docs2.freq();
if (postingsHasFreq && postingsDocs2.freq() != tf) {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.freq());
}
- if (hasPositions || hasOffsets) {
+ if (hasProx) {
for (int i = 0; i < tf; i++) {
int pos = postings.nextPosition();
if (postingsPostings != null) {
int postingsPos = postingsPostings.nextPosition();
- if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
+ if (terms.hasPositions() && pos != postingsPos) {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
}
}
- if (hasOffsets) {
- // Call the methods to at least make
- // sure they don't throw exc:
- final int startOffset = postings.startOffset();
- final int endOffset = postings.endOffset();
- // TODO: these are too anal...?
- /*
- if (endOffset < startOffset) {
- throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset);
- }
- if (startOffset < lastStartOffset) {
- throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset);
- }
- lastStartOffset = startOffset;
- */
+ // Call the methods to at least make
+ // sure they don't throw exc:
+ final int startOffset = postings.startOffset();
+ final int endOffset = postings.endOffset();
+ // TODO: these are too anal...?
+ /*
+ if (endOffset < startOffset) {
+ throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset);
+ }
+ if (startOffset < lastStartOffset) {
+ throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset);
+ }
+ lastStartOffset = startOffset;
+ */
- if (postingsPostings != null) {
- final int postingsStartOffset = postingsPostings.startOffset();
+ if (postingsPostings != null) {
+ final int postingsStartOffset = postingsPostings.startOffset();
- final int postingsEndOffset = postingsPostings.endOffset();
- if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) {
- throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset);
+ final int postingsEndOffset = postingsPostings.endOffset();
+ if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset);
+ }
+ if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
+ }
+ }
+
+ BytesRef payload = postings.getPayload();
+
+ if (payload != null) {
+ assert vectorsHasPayload;
+ }
+
+ if (postingsHasPayload && vectorsHasPayload) {
+ assert postingsPostings != null;
+
+ if (payload == null) {
+ // we have payloads, but not at this position.
+ // postings has payloads too, it should not have one at this position
+ if (postingsPostings.getPayload() != null) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.getPayload());
+ }
+ } else {
+ // we have payloads, and one at this position
+ // postings should also have one at this position, with the same bytes.
+ if (postingsPostings.getPayload() == null) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");
}
- if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) {
- throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
+ BytesRef postingsPayload = postingsPostings.getPayload();
+ if (!payload.equals(postingsPayload)) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload);
}
}
}
@@ -1601,8 +1548,9 @@ public class CheckIndex {
}
}
}
+ float vectorAvg = status.docCount == 0 ? 0 : status.totVectors / (float)status.docCount;
msg("OK [" + status.totVectors + " total vector count; avg " +
- format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]");
+ format.format(vectorAvg) + " term/freq vector fields per doc]");
} catch (Throwable e) {
msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
status.error = e;
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java Mon Aug 13 13:52:46 2012
@@ -24,7 +24,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.search.Query;
-import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.MergedIterator;
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
class CoalescedDeletes {
@@ -48,13 +48,14 @@ class CoalescedDeletes {
public Iterable<Term> termsIterable() {
return new Iterable<Term>() {
+ @SuppressWarnings("unchecked")
@Override
public Iterator<Term> iterator() {
- ArrayList<Iterator<Term>> subs = new ArrayList<Iterator<Term>>(iterables.size());
- for (Iterable<Term> iterable : iterables) {
- subs.add(iterable.iterator());
+ Iterator<Term> subs[] = new Iterator[iterables.size()];
+ for (int i = 0; i < iterables.size(); i++) {
+ subs[i] = iterables.get(i).iterator();
}
- return mergedIterator(subs);
+ return new MergedIterator<Term>(subs);
}
};
}
@@ -86,106 +87,4 @@ class CoalescedDeletes {
}
};
}
-
- /** provides a merged view across multiple iterators */
- static Iterator<Term> mergedIterator(final List<Iterator<Term>> iterators) {
- return new Iterator<Term>() {
- Term current;
- TermMergeQueue queue = new TermMergeQueue(iterators.size());
- SubIterator[] top = new SubIterator[iterators.size()];
- int numTop;
-
- {
- int index = 0;
- for (Iterator<Term> iterator : iterators) {
- if (iterator.hasNext()) {
- SubIterator sub = new SubIterator();
- sub.current = iterator.next();
- sub.iterator = iterator;
- sub.index = index++;
- queue.add(sub);
- }
- }
- }
-
- public boolean hasNext() {
- if (queue.size() > 0) {
- return true;
- }
-
- for (int i = 0; i < numTop; i++) {
- if (top[i].iterator.hasNext()) {
- return true;
- }
- }
- return false;
- }
-
- public Term next() {
- // restore queue
- pushTop();
-
- // gather equal top fields
- if (queue.size() > 0) {
- pullTop();
- } else {
- current = null;
- }
- return current;
- }
-
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
- private void pullTop() {
- // extract all subs from the queue that have the same top term
- assert numTop == 0;
- while (true) {
- top[numTop++] = queue.pop();
- if (queue.size() == 0
- || !(queue.top()).current.equals(top[0].current)) {
- break;
- }
- }
- current = top[0].current;
- }
-
- private void pushTop() {
- // call next() on each top, and put back into queue
- for (int i = 0; i < numTop; i++) {
- if (top[i].iterator.hasNext()) {
- top[i].current = top[i].iterator.next();
- queue.add(top[i]);
- } else {
- // no more terms
- top[i].current = null;
- }
- }
- numTop = 0;
- }
- };
- }
-
- private static class SubIterator {
- Iterator<Term> iterator;
- Term current;
- int index;
- }
-
- private static class TermMergeQueue extends PriorityQueue<SubIterator> {
- TermMergeQueue(int size) {
- super(size);
- }
-
- @Override
- protected boolean lessThan(SubIterator a, SubIterator b) {
- final int cmp = a.current.compareTo(b.current);
- if (cmp != 0) {
- return cmp < 0;
- } else {
- return a.index < b.index;
- }
- }
- }
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Mon Aug 13 13:52:46 2012
@@ -243,27 +243,34 @@ public class ConcurrentMergeScheduler ex
sync();
}
- /** Wait for any running merge threads to finish */
+ /** Wait for any running merge threads to finish. This call is not interruptible as used by {@link #close()}. */
public void sync() {
- while (true) {
- MergeThread toSync = null;
- synchronized (this) {
- for (MergeThread t : mergeThreads) {
- if (t.isAlive()) {
- toSync = t;
- break;
+ boolean interrupted = false;
+ try {
+ while (true) {
+ MergeThread toSync = null;
+ synchronized (this) {
+ for (MergeThread t : mergeThreads) {
+ if (t.isAlive()) {
+ toSync = t;
+ break;
+ }
}
}
- }
- if (toSync != null) {
- try {
- toSync.join();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
+ if (toSync != null) {
+ try {
+ toSync.join();
+ } catch (InterruptedException ie) {
+ // ignore this Exception, we will retry until all threads are dead
+ interrupted = true;
+ }
+ } else {
+ break;
}
- } else {
- break;
}
+ } finally {
+ // finally, restore interrupt status:
+ if (interrupted) Thread.currentThread().interrupt();
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java Mon Aug 13 13:52:46 2012
@@ -75,6 +75,9 @@ public abstract class DirectoryReader ex
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
* to -1 to skip loading the terms index entirely.
+ * <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
+ * implementations, including the default one in this release. It only makes
+ * sense for terms indexes that can efficiently re-sample terms at load time.
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws IOException {
@@ -126,6 +129,9 @@ public abstract class DirectoryReader ex
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
* to -1 to skip loading the terms index entirely.
+ * <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
+ * implementations, including the default one in this release. It only makes
+ * sense for terms indexes that can efficiently re-sample terms at load time.
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws IOException {
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java Mon Aug 13 13:52:46 2012
@@ -76,6 +76,7 @@ final class DocInverterPerField extends
// consumer if it wants to see this particular field
// tokenized.
if (fieldType.indexed() && doInvert) {
+ final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
// if the field omits norms, the boost cannot be indexed.
if (fieldType.omitNorms() && field.boost() != 1.0f) {
@@ -88,7 +89,7 @@ final class DocInverterPerField extends
int lastStartOffset = 0;
if (i > 0) {
- fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
+ fieldState.position += analyzed ? docState.analyzer.getPositionIncrementGap(fieldInfo.name) : 0;
}
final TokenStream stream = field.tokenStream(docState.analyzer);
@@ -188,7 +189,7 @@ final class DocInverterPerField extends
}
}
- fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
+ fieldState.offset += analyzed ? docState.analyzer.getOffsetGap(fieldInfo.name) : 0;
fieldState.boost *= field.boost();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Mon Aug 13 13:52:46 2012
@@ -334,7 +334,7 @@ public class DocTermOrds {
final int df = te.docFreq();
if (df <= maxTermDocFreq) {
- docsEnum = te.docs(liveDocs, docsEnum, false);
+ docsEnum = te.docs(liveDocs, docsEnum, 0);
// dF, but takes deletions into account
int actualDF = 0;
@@ -668,13 +668,13 @@ public class DocTermOrds {
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
- return termsEnum.docs(liveDocs, reuse, needsFreqs);
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ return termsEnum.docs(liveDocs, reuse, flags);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
- return termsEnum.docsAndPositions(liveDocs, reuse, needsOffsets);
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ return termsEnum.docsAndPositions(liveDocs, reuse, flags);
}
@Override
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocValues.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocValues.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocValues.java Mon Aug 13 13:52:46 2012
@@ -105,7 +105,7 @@ public abstract class DocValues implemen
* <p>
* {@link Source} instances obtained from this method are closed / released
* from the cache once this {@link DocValues} instance is closed by the
- * {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the
+ * {@link IndexReader}, {@link Fields} or the
* {@link DocValues} was created from.
*/
public Source getSource() throws IOException {
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java Mon Aug 13 13:52:46 2012
@@ -23,7 +23,14 @@ import org.apache.lucene.util.BytesRef;
/** Also iterates through positions. */
public abstract class DocsAndPositionsEnum extends DocsEnum {
-
+ /** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}
+ * if you require offsets in the returned enum. */
+ public static final int FLAG_OFFSETS = 0x1;
+
+ /** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}
+ * if you require payloads in the returned enum. */
+ public static final int FLAG_PAYLOADS = 0x2;
+
/** Returns the next position. You should only call this
* up to {@link DocsEnum#freq()} times else
* the behavior is not defined. If positions were not
@@ -41,9 +48,8 @@ public abstract class DocsAndPositionsEn
public abstract int endOffset() throws IOException;
/** Returns the payload at this position, or null if no
- * payload was indexed. Only call this once per
- * position. */
+ * payload was indexed. You should not modify anything
+ * (neither members of the returned BytesRef nor bytes
+ * in the byte[]). */
public abstract BytesRef getPayload() throws IOException;
-
- public abstract boolean hasPayload();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java Mon Aug 13 13:52:46 2012
@@ -27,6 +27,10 @@ import org.apache.lucene.util.AttributeS
* any of the per-doc methods. */
public abstract class DocsEnum extends DocIdSetIterator {
+ /** Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)}
+ * if you require term frequencies in the returned enum. */
+ public static final int FLAG_FREQS = 0x1;
+
private AttributeSource atts = null;
/** Returns term frequency in the current document. Do
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Aug 13 13:52:46 2012
@@ -202,11 +202,9 @@ final class DocumentsWriter {
* discarding any docs added since last flush. */
synchronized void abort() {
boolean success = false;
- synchronized (this) {
- deleteQueue.clear();
- }
try {
+ deleteQueue.clear();
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "abort");
}
@@ -230,6 +228,7 @@ final class DocumentsWriter {
perThread.unlock();
}
}
+ flushControl.abortPendingFlushes();
flushControl.waitForFlush();
success = true;
} finally {
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java Mon Aug 13 13:52:46 2012
@@ -567,19 +567,34 @@ final class DocumentsWriterFlushControl
}
synchronized void abortFullFlushes() {
+ try {
+ abortPendingFlushes();
+ } finally {
+ fullFlush = false;
+ }
+ }
+
+ synchronized void abortPendingFlushes() {
try {
for (DocumentsWriterPerThread dwpt : flushQueue) {
- doAfterFlush(dwpt);
- dwpt.abort();
+ try {
+ dwpt.abort();
+ doAfterFlush(dwpt);
+ } catch (Throwable ex) {
+ // ignore - keep on aborting the flush queue
+ }
}
for (BlockedFlush blockedFlush : blockedFlushes) {
- flushingWriters
- .put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
- doAfterFlush(blockedFlush.dwpt);
- blockedFlush.dwpt.abort();
+ try {
+ flushingWriters
+ .put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
+ blockedFlush.dwpt.abort();
+ doAfterFlush(blockedFlush.dwpt);
+ } catch (Throwable ex) {
+ // ignore - keep on aborting the blocked queue
+ }
}
} finally {
- fullFlush = false;
flushQueue.clear();
blockedFlushes.clear();
updateStallState();
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/Fields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/Fields.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/Fields.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/Fields.java Mon Aug 13 13:52:46 2012
@@ -18,15 +18,16 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.util.Iterator;
/** Flex API for access to fields and terms
* @lucene.experimental */
-public abstract class Fields {
+public abstract class Fields implements Iterable<String> {
/** Returns an iterator that will step through all fields
* names. This will not return null. */
- public abstract FieldsEnum iterator() throws IOException;
+ public abstract Iterator<String> iterator();
/** Get the {@link Terms} for this field. This will return
* null if the field does not exist. */
@@ -45,12 +46,7 @@ public abstract class Fields {
// TODO: deprecate?
public long getUniqueTermCount() throws IOException {
long numTerms = 0;
- FieldsEnum it = iterator();
- while(true) {
- String field = it.next();
- if (field == null) {
- break;
- }
+ for (String field : this) {
Terms terms = terms(field);
if (terms != null) {
final long termCount = terms.size();
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Mon Aug 13 13:52:46 2012
@@ -24,6 +24,7 @@ import org.apache.lucene.util.automaton.
import java.io.IOException;
import java.util.Comparator;
+import java.util.Iterator;
/** A <code>FilterAtomicReader</code> contains another AtomicReader, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -46,7 +47,7 @@ public class FilterAtomicReader extends
}
@Override
- public FieldsEnum iterator() throws IOException {
+ public Iterator<String> iterator() {
return in.iterator();
}
@@ -109,28 +110,20 @@ public class FilterAtomicReader extends
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
return in.intersect(automaton, bytes);
}
- }
-
- /** Base class for filtering {@link TermsEnum} implementations. */
- public static class FilterFieldsEnum extends FieldsEnum {
- protected final FieldsEnum in;
- public FilterFieldsEnum(FieldsEnum in) {
- this.in = in;
- }
@Override
- public String next() throws IOException {
- return in.next();
+ public boolean hasOffsets() {
+ return in.hasOffsets();
}
@Override
- public Terms terms() throws IOException {
- return in.terms();
+ public boolean hasPositions() {
+ return in.hasPositions();
}
@Override
- public AttributeSource attributes() {
- return in.attributes();
+ public boolean hasPayloads() {
+ return in.hasPayloads();
}
}
@@ -181,13 +174,13 @@ public class FilterAtomicReader extends
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
- return in.docs(liveDocs, reuse, needsFreqs);
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ return in.docs(liveDocs, reuse, flags);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
- return in.docsAndPositions(liveDocs, reuse, needsOffsets);
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ return in.docsAndPositions(liveDocs, reuse, flags);
}
@Override
@@ -292,11 +285,6 @@ public class FilterAtomicReader extends
public BytesRef getPayload() throws IOException {
return in.getPayload();
}
-
- @Override
- public boolean hasPayload() {
- return in.hasPayload();
- }
@Override
public AttributeSource attributes() {
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java Mon Aug 13 13:52:46 2012
@@ -166,13 +166,13 @@ public abstract class FilteredTermsEnum
}
@Override
- public DocsEnum docs(Bits bits, DocsEnum reuse, boolean needsFreqs) throws IOException {
- return tenum.docs(bits, reuse, needsFreqs);
+ public DocsEnum docs(Bits bits, DocsEnum reuse, int flags) throws IOException {
+ return tenum.docs(bits, reuse, flags);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
- return tenum.docsAndPositions(bits, reuse, needsOffsets);
+ public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ return tenum.docsAndPositions(bits, reuse, flags);
}
/** This enum does not support seeking!
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Mon Aug 13 13:52:46 2012
@@ -173,7 +173,7 @@ final class FreqProxTermsWriterPerField
postings.lastDocCodes[termID] = docState.docID;
} else {
postings.lastDocCodes[termID] = docState.docID << 1;
- postings.docFreqs[termID] = 1;
+ postings.termFreqs[termID] = 1;
if (hasProx) {
writeProx(termID, fieldState.position);
if (hasOffsets) {
@@ -194,10 +194,10 @@ final class FreqProxTermsWriterPerField
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
- assert !hasFreq || postings.docFreqs[termID] > 0;
+ assert !hasFreq || postings.termFreqs[termID] > 0;
if (!hasFreq) {
- assert postings.docFreqs == null;
+ assert postings.termFreqs == null;
if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID];
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@@ -212,13 +212,13 @@ final class FreqProxTermsWriterPerField
// Now that we know doc freq for previous doc,
// write it & lastDocCode
- if (1 == postings.docFreqs[termID]) {
+ if (1 == postings.termFreqs[termID]) {
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
} else {
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
- termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
+ termsHashPerField.writeVInt(0, postings.termFreqs[termID]);
}
- postings.docFreqs[termID] = 1;
+ postings.termFreqs[termID] = 1;
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
@@ -233,7 +233,7 @@ final class FreqProxTermsWriterPerField
}
fieldState.uniqueTermCount++;
} else {
- fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
+ fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
if (hasProx) {
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
}
@@ -252,7 +252,7 @@ final class FreqProxTermsWriterPerField
public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
super(size);
if (writeFreqs) {
- docFreqs = new int[size];
+ termFreqs = new int[size];
}
lastDocIDs = new int[size];
lastDocCodes = new int[size];
@@ -267,7 +267,7 @@ final class FreqProxTermsWriterPerField
//System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
}
- int docFreqs[]; // # times this term occurs in the current doc
+ int termFreqs[]; // # times this term occurs in the current doc
int lastDocIDs[]; // Last docID where this term occurred
int lastDocCodes[]; // Code for prior doc
int lastPositions[]; // Last position where this term occurred
@@ -275,7 +275,7 @@ final class FreqProxTermsWriterPerField
@Override
ParallelPostingsArray newInstance(int size) {
- return new FreqProxPostingsArray(size, docFreqs != null, lastPositions != null, lastOffsets != null);
+ return new FreqProxPostingsArray(size, termFreqs != null, lastPositions != null, lastOffsets != null);
}
@Override
@@ -295,9 +295,9 @@ final class FreqProxTermsWriterPerField
assert to.lastOffsets != null;
System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
}
- if (docFreqs != null) {
- assert to.docFreqs != null;
- System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
+ if (termFreqs != null) {
+ assert to.termFreqs != null;
+ System.arraycopy(termFreqs, 0, to.termFreqs, 0, numToCopy);
}
}
@@ -310,7 +310,7 @@ final class FreqProxTermsWriterPerField
if (lastOffsets != null) {
bytes += RamUsageEstimator.NUM_BYTES_INT;
}
- if (docFreqs != null) {
+ if (termFreqs != null) {
bytes += RamUsageEstimator.NUM_BYTES_INT;
}
@@ -416,21 +416,21 @@ final class FreqProxTermsWriterPerField
// Now termStates has numToMerge FieldMergeStates
// which all share the same term. Now we must
// interleave the docID streams.
- int numDocs = 0;
+ int docFreq = 0;
long totTF = 0;
int docID = 0;
while(true) {
//System.out.println(" cycle");
- final int termDocFreq;
+ final int termFreq;
if (freq.eof()) {
if (postings.lastDocCodes[termID] != -1) {
// Return last doc
docID = postings.lastDocIDs[termID];
if (readTermFreq) {
- termDocFreq = postings.docFreqs[termID];
+ termFreq = postings.termFreqs[termID];
} else {
- termDocFreq = 0;
+ termFreq = -1;
}
postings.lastDocCodes[termID] = -1;
} else {
@@ -441,20 +441,20 @@ final class FreqProxTermsWriterPerField
final int code = freq.readVInt();
if (!readTermFreq) {
docID += code;
- termDocFreq = 0;
+ termFreq = -1;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
- termDocFreq = 1;
+ termFreq = 1;
} else {
- termDocFreq = freq.readVInt();
+ termFreq = freq.readVInt();
}
}
assert docID != postings.lastDocIDs[termID];
}
- numDocs++;
+ docFreq++;
assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
// NOTE: we could check here if the docID was
@@ -469,7 +469,7 @@ final class FreqProxTermsWriterPerField
// 2nd sweep does the real flush, but I suspect
// that'd add too much time to flush.
visitedDocs.set(docID);
- postingsConsumer.startDoc(docID, termDocFreq);
+ postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
if (docID < delDocLimit) {
// Mark it deleted. TODO: we could also skip
// writing its postings; this would be
@@ -485,7 +485,7 @@ final class FreqProxTermsWriterPerField
}
}
- totTF += termDocFreq;
+ totTF += termFreq;
// Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment
@@ -495,7 +495,7 @@ final class FreqProxTermsWriterPerField
// we did record positions (& maybe payload) and/or offsets
int position = 0;
int offset = 0;
- for(int j=0;j<termDocFreq;j++) {
+ for(int j=0;j<termFreq;j++) {
final BytesRef thisPayload;
if (readPositions) {
@@ -542,11 +542,11 @@ final class FreqProxTermsWriterPerField
}
postingsConsumer.finishDoc();
}
- termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
+ termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totTF : -1));
sumTotalTermFreq += totTF;
- sumDocFreq += numDocs;
+ sumDocFreq += docFreq;
}
- termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
+ termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java Mon Aug 13 13:52:46 2012
@@ -25,6 +25,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.regex.Matcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NoSuchDirectoryException;
@@ -146,57 +147,61 @@ final class IndexFileDeleter {
// it means the directory is empty, so ignore it.
files = new String[0];
}
-
- for (String fileName : files) {
-
- if (!fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
-
- // Add this file to refCounts with initial count 0:
- getRefCount(fileName);
-
- if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
-
- // This is a commit (segments or segments_N), and
- // it's valid (<= the max gen). Load it, then
- // incref all files it refers to:
- if (infoStream.isEnabled("IFD")) {
- infoStream.message("IFD", "init: load commit \"" + fileName + "\"");
- }
- SegmentInfos sis = new SegmentInfos();
- try {
- sis.read(directory, fileName);
- } catch (FileNotFoundException e) {
- // LUCENE-948: on NFS (and maybe others), if
- // you have writers switching back and forth
- // between machines, it's very likely that the
- // dir listing will be stale and will claim a
- // file segments_X exists when in fact it
- // doesn't. So, we catch this and handle it
- // as if the file does not exist
+
+ if (currentSegmentsFile != null) {
+ Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
+ for (String fileName : files) {
+ m.reset(fileName);
+ if (!fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)
+ && (m.matches() || fileName.startsWith(IndexFileNames.SEGMENTS))) {
+
+ // Add this file to refCounts with initial count 0:
+ getRefCount(fileName);
+
+ if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
+
+ // This is a commit (segments or segments_N), and
+ // it's valid (<= the max gen). Load it, then
+ // incref all files it refers to:
if (infoStream.isEnabled("IFD")) {
- infoStream.message("IFD", "init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
+ infoStream.message("IFD", "init: load commit \"" + fileName + "\"");
}
- sis = null;
- } catch (IOException e) {
- if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen && directory.fileLength(fileName) > 0) {
- throw e;
- } else {
- // Most likely we are opening an index that
- // has an aborted "future" commit, so suppress
- // exc in this case
+ SegmentInfos sis = new SegmentInfos();
+ try {
+ sis.read(directory, fileName);
+ } catch (FileNotFoundException e) {
+ // LUCENE-948: on NFS (and maybe others), if
+ // you have writers switching back and forth
+ // between machines, it's very likely that the
+ // dir listing will be stale and will claim a
+ // file segments_X exists when in fact it
+ // doesn't. So, we catch this and handle it
+ // as if the file does not exist
+ if (infoStream.isEnabled("IFD")) {
+ infoStream.message("IFD", "init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
+ }
sis = null;
+ } catch (IOException e) {
+ if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen && directory.fileLength(fileName) > 0) {
+ throw e;
+ } else {
+ // Most likely we are opening an index that
+ // has an aborted "future" commit, so suppress
+ // exc in this case
+ sis = null;
+ }
}
- }
- if (sis != null) {
- final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
- if (sis.getGeneration() == segmentInfos.getGeneration()) {
- currentCommitPoint = commitPoint;
- }
- commits.add(commitPoint);
- incRef(sis, true);
-
- if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
- lastSegmentInfos = sis;
+ if (sis != null) {
+ final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
+ if (sis.getGeneration() == segmentInfos.getGeneration()) {
+ currentCommitPoint = commitPoint;
+ }
+ commits.add(commitPoint);
+ incRef(sis, true);
+
+ if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
+ lastSegmentInfos = sis;
+ }
}
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java Mon Aug 13 13:52:46 2012
@@ -17,6 +17,8 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.util.regex.Pattern;
+
import org.apache.lucene.codecs.Codec;
// TODO: put all files under codec and remove all the static extensions here
@@ -189,4 +191,8 @@ public final class IndexFileNames {
}
return filename;
}
+
+ // All files created by codecs much match this pattern (we
+ // check this in SegmentInfo.java):
+ static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[a-z0-9]+(_.*)?\\..*");
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java Mon Aug 13 13:52:46 2012
@@ -1,7 +1,5 @@
package org.apache.lucene.index;
-import java.util.List;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -19,6 +17,8 @@ import java.util.List;
* limitations under the License.
*/
+import java.util.List;
+
/**
* A struct like class that represents a hierarchical relationship between
* {@link IndexReader} instances.