You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/08/21 00:47:32 UTC
svn commit: r1516012 [2/4] - in /lucene/dev/branches/branch_4x: ./
dev-tools/ lucene/ lucene/codecs/
lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/
lucene/codecs/src/java/org/apache/lucene/codecs/memory/
lucene/codecs/src/java/org/apache/lucen...
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Tue Aug 20 22:47:30 2013
@@ -17,11 +17,6 @@ package org.apache.lucene.codecs.lucene4
* limitations under the License.
*/
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.DELTA_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.GCD_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.TABLE_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.UNCOMPRESSED;
-
import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
@@ -78,6 +73,22 @@ class Lucene42DocValuesProducer extends
new HashMap<Integer,FST<Long>>();
private final int maxDoc;
+
+
+ static final byte NUMBER = 0;
+ static final byte BYTES = 1;
+ static final byte FST = 2;
+
+ static final int BLOCK_SIZE = 4096;
+
+ static final byte DELTA_COMPRESSED = 0;
+ static final byte TABLE_COMPRESSED = 1;
+ static final byte UNCOMPRESSED = 2;
+ static final byte GCD_COMPRESSED = 3;
+
+ static final int VERSION_START = 0;
+ static final int VERSION_GCD_COMPRESSION = 1;
+ static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
@@ -88,8 +99,8 @@ class Lucene42DocValuesProducer extends
final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
- Lucene42DocValuesConsumer.VERSION_START,
- Lucene42DocValuesConsumer.VERSION_CURRENT);
+ VERSION_START,
+ VERSION_CURRENT);
numerics = new HashMap<Integer,NumericEntry>();
binaries = new HashMap<Integer,BinaryEntry>();
fsts = new HashMap<Integer,FSTEntry>();
@@ -109,8 +120,8 @@ class Lucene42DocValuesProducer extends
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
data = state.directory.openInput(dataName, state.context);
final int version2 = CodecUtil.checkHeader(data, dataCodec,
- Lucene42DocValuesConsumer.VERSION_START,
- Lucene42DocValuesConsumer.VERSION_CURRENT);
+ VERSION_START,
+ VERSION_CURRENT);
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch");
}
@@ -127,7 +138,7 @@ class Lucene42DocValuesProducer extends
int fieldNumber = meta.readVInt();
while (fieldNumber != -1) {
int fieldType = meta.readByte();
- if (fieldType == Lucene42DocValuesConsumer.NUMBER) {
+ if (fieldType == NUMBER) {
NumericEntry entry = new NumericEntry();
entry.offset = meta.readLong();
entry.format = meta.readByte();
@@ -140,11 +151,11 @@ class Lucene42DocValuesProducer extends
default:
throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
}
- if (entry.format != Lucene42DocValuesConsumer.UNCOMPRESSED) {
+ if (entry.format != UNCOMPRESSED) {
entry.packedIntsVersion = meta.readVInt();
}
numerics.put(fieldNumber, entry);
- } else if (fieldType == Lucene42DocValuesConsumer.BYTES) {
+ } else if (fieldType == BYTES) {
BinaryEntry entry = new BinaryEntry();
entry.offset = meta.readLong();
entry.numBytes = meta.readLong();
@@ -155,7 +166,7 @@ class Lucene42DocValuesProducer extends
entry.blockSize = meta.readVInt();
}
binaries.put(fieldNumber, entry);
- } else if (fieldType == Lucene42DocValuesConsumer.FST) {
+ } else if (fieldType == FST) {
FSTEntry entry = new FSTEntry();
entry.offset = meta.readLong();
entry.numOrds = meta.readVLong();
@@ -429,6 +440,15 @@ class Lucene42DocValuesProducer extends
}
};
}
+
+ @Override
+ public Bits getDocsWithField(FieldInfo field) throws IOException {
+ if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
+ return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+ } else {
+ return new Bits.MatchAllBits(maxDoc);
+ }
+ }
@Override
public void close() throws IOException {
Copied: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java (from r1515977, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java?p2=lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java&p1=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java&r1=1515977&r2=1516012&rev=1516012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java Tue Aug 20 22:47:30 2013
@@ -85,7 +85,7 @@ class Lucene42NormsConsumer extends DocV
// TODO: more efficient?
HashSet<Long> uniqueValues = null;
if (true) {
- uniqueValues = new HashSet<>();
+ uniqueValues = new HashSet<Long>();
long count = 0;
for (Number nv : values) {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java Tue Aug 20 22:47:30 2013
@@ -41,7 +41,7 @@ import org.apache.lucene.util.packed.Pac
* </ul>
* @see Lucene42DocValuesFormat
*/
-public final class Lucene42NormsFormat extends NormsFormat {
+public class Lucene42NormsFormat extends NormsFormat {
final float acceptableOverheadRatio;
/**
@@ -67,7 +67,7 @@ public final class Lucene42NormsFormat e
@Override
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
- return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
+ return new Lucene42NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html Tue Aug 20 22:47:30 2013
@@ -178,7 +178,7 @@ For each field in each document, a value
that is multiplied into the score for hits on that field.
</li>
<li>
-{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}.
+{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}.
For each field in each document, the term vector (sometimes
called document vector) may be stored. A term vector consists of term text and
term frequency. To add Term Vectors to your index see the
@@ -299,17 +299,17 @@ systems that frequently run out of file
<td>Encodes additional scoring factors or other per-document information.</td>
</tr>
<tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}</td>
<td>.tvx</td>
<td>Stores offset into the document data file</td>
</tr>
<tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}</td>
<td>.tvd</td>
<td>Contains information about each document that has term vectors</td>
</tr>
<tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}</td>
<td>.tvf</td>
<td>The field level info about term vectors</td>
</tr>
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java?rev=1516012&r1=1515977&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java Tue Aug 20 22:47:30 2013
@@ -93,7 +93,7 @@ public class Lucene45DocValuesConsumer e
// TODO: more efficient?
HashSet<Long> uniqueValues = null;
if (optimizeStorage) {
- uniqueValues = new HashSet<>();
+ uniqueValues = new HashSet<Long>();
for (Number nv : values) {
final long v;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/package.html?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/package.html (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/package.html Tue Aug 20 22:47:30 2013
@@ -61,9 +61,13 @@ name of your codec.
If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings
formats for different fields, then you can register your custom postings format in the same way (in
META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default
- {@link org.apache.lucene.codecs.lucene42.Lucene42Codec} and override
- {@link org.apache.lucene.codecs.lucene42.Lucene42Codec#getPostingsFormatForField(String)} to return your custom
+ {@link org.apache.lucene.codecs.lucene45.Lucene45Codec} and override
+ {@link org.apache.lucene.codecs.lucene45.Lucene45Codec#getPostingsFormatForField(String)} to return your custom
postings format.
</p>
+<p>
+ Similarly, if you just want to customise the {@link org.apache.lucene.codecs.DocValuesFormat} per-field, have
+ a look at {@link org.apache.lucene.codecs.lucene45.Lucene45Codec#getDocValuesFormatForField(String)}.
+</p>
</body>
</html>
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java Tue Aug 20 22:47:30 2013
@@ -36,6 +36,7 @@ import org.apache.lucene.index.SegmentRe
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -265,6 +266,12 @@ public abstract class PerFieldDocValuesF
DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSortedSet(field);
}
+
+ @Override
+ public Bits getDocsWithField(FieldInfo field) throws IOException {
+ DocValuesProducer producer = fields.get(field.name);
+ return producer == null ? null : producer.getDocsWithField(field);
+ }
@Override
public void close() throws IOException {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java Tue Aug 20 22:47:30 2013
@@ -220,6 +220,12 @@ public abstract class AtomicReader exten
* this field. The returned instance should only be
* used by a single thread. */
public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;
+
+ /** Returns a {@link Bits} at the size of <code>reader.maxDoc()</code>,
+ * with turned on bits for each docid that does have a value for this field,
+ * or null if no DocValues were indexed for this field. The
+ * returned instance should only be used by a single thread */
+ public abstract Bits getDocsWithField(String field) throws IOException;
/** Returns {@link NumericDocValues} representing norms
* for this field, or null if no {@link NumericDocValues}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java Tue Aug 20 22:47:30 2013
@@ -26,6 +26,8 @@ import org.apache.lucene.store.DataInput
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
@@ -49,6 +51,7 @@ class BinaryDocValuesWriter extends DocV
private final Counter iwBytesUsed;
private final AppendingDeltaPackedLongBuffer lengths;
+ private final OpenBitSet docsWithField;
private final FieldInfo fieldInfo;
private int addedValues;
private long bytesUsed;
@@ -59,6 +62,9 @@ class BinaryDocValuesWriter extends DocV
this.bytesOut = bytes.getDataOutput();
this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
this.iwBytesUsed = iwBytesUsed;
+ this.docsWithField = new OpenBitSet();
+ this.bytesUsed = docsWithFieldBytesUsed();
+ iwBytesUsed.addAndGet(bytesUsed);
}
public void addValue(int docID, BytesRef value) {
@@ -85,11 +91,17 @@ class BinaryDocValuesWriter extends DocV
// Should never happen!
throw new RuntimeException(ioe);
}
+ docsWithField.set(docID);
updateBytesUsed();
}
+
+ private long docsWithFieldBytesUsed() {
+ // size of the long[] + some overhead
+ return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
+ }
private void updateBytesUsed() {
- final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed();
+ final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithFieldBytesUsed();
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
bytesUsed = newBytesUsed;
}
@@ -138,6 +150,7 @@ class BinaryDocValuesWriter extends DocV
if (!hasNext()) {
throw new NoSuchElementException();
}
+ final BytesRef v;
if (upto < size) {
int length = (int) lengthsIterator.next();
value.grow(length);
@@ -148,13 +161,16 @@ class BinaryDocValuesWriter extends DocV
// Should never happen!
throw new RuntimeException(ioe);
}
+ if (docsWithField.get(upto)) {
+ v = value;
+ } else {
+ v = null;
+ }
} else {
- // This is to handle last N documents not having
- // this DV field in the end of the segment:
- value.length = 0;
+ v = null;
}
upto++;
- return value;
+ return v;
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Tue Aug 20 22:47:30 2013
@@ -1309,7 +1309,8 @@ public class CheckIndex {
if (reader.getBinaryDocValues(fieldInfo.name) != null ||
reader.getNumericDocValues(fieldInfo.name) != null ||
reader.getSortedDocValues(fieldInfo.name) != null ||
- reader.getSortedSetDocValues(fieldInfo.name) != null) {
+ reader.getSortedSetDocValues(fieldInfo.name) != null ||
+ reader.getDocsWithField(fieldInfo.name) != null) {
throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!");
}
}
@@ -1330,26 +1331,37 @@ public class CheckIndex {
return status;
}
- private static void checkBinaryDocValues(String fieldName, AtomicReader reader, BinaryDocValues dv) {
+ private static void checkBinaryDocValues(String fieldName, AtomicReader reader, BinaryDocValues dv, Bits docsWithField) {
BytesRef scratch = new BytesRef();
for (int i = 0; i < reader.maxDoc(); i++) {
dv.get(i, scratch);
assert scratch.isValid();
+ if (docsWithField.get(i) == false && scratch.length > 0) {
+ throw new RuntimeException("dv for field: " + fieldName + " is missing but has value=" + scratch + " for doc: " + i);
+ }
}
}
- private static void checkSortedDocValues(String fieldName, AtomicReader reader, SortedDocValues dv) {
- checkBinaryDocValues(fieldName, reader, dv);
+ private static void checkSortedDocValues(String fieldName, AtomicReader reader, SortedDocValues dv, Bits docsWithField) {
+ checkBinaryDocValues(fieldName, reader, dv, docsWithField);
final int maxOrd = dv.getValueCount()-1;
FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
int maxOrd2 = -1;
for (int i = 0; i < reader.maxDoc(); i++) {
int ord = dv.getOrd(i);
- if (ord < 0 || ord > maxOrd) {
+ if (ord == -1) {
+ if (docsWithField.get(i)) {
+ throw new RuntimeException("dv for field: " + fieldName + " has -1 ord but is not marked missing for doc: " + i);
+ }
+ } else if (ord < -1 || ord > maxOrd) {
throw new RuntimeException("ord out of bounds: " + ord);
+ } else {
+ if (!docsWithField.get(i)) {
+ throw new RuntimeException("dv for field: " + fieldName + " is missing but has ord=" + ord + " for doc: " + i);
+ }
+ maxOrd2 = Math.max(maxOrd2, ord);
+ seenOrds.set(ord);
}
- maxOrd2 = Math.max(maxOrd2, ord);
- seenOrds.set(ord);
}
if (maxOrd != maxOrd2) {
throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
@@ -1371,7 +1383,7 @@ public class CheckIndex {
}
}
- private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv) {
+ private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) {
final long maxOrd = dv.getValueCount()-1;
OpenBitSet seenOrds = new OpenBitSet(dv.getValueCount());
long maxOrd2 = -1;
@@ -1379,16 +1391,28 @@ public class CheckIndex {
dv.setDocument(i);
long lastOrd = -1;
long ord;
- while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
- if (ord <= lastOrd) {
- throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
+ if (docsWithField.get(i)) {
+ int ordCount = 0;
+ while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ ordCount++;
+ if (ord <= lastOrd) {
+ throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
+ }
+ if (ord < 0 || ord > maxOrd) {
+ throw new RuntimeException("ord out of bounds: " + ord);
+ }
+ lastOrd = ord;
+ maxOrd2 = Math.max(maxOrd2, ord);
+ seenOrds.set(ord);
}
- if (ord < 0 || ord > maxOrd) {
- throw new RuntimeException("ord out of bounds: " + ord);
+ if (ordCount == 0) {
+ throw new RuntimeException("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i);
+ }
+ } else {
+ long o = dv.nextOrd();
+ if (o != SortedSetDocValues.NO_MORE_ORDS) {
+ throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i);
}
- lastOrd = ord;
- maxOrd2 = Math.max(maxOrd2, ord);
- seenOrds.set(ord);
}
}
if (maxOrd != maxOrd2) {
@@ -1412,17 +1436,26 @@ public class CheckIndex {
}
}
- private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv) {
+ private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField) {
for (int i = 0; i < reader.maxDoc(); i++) {
- ndv.get(i);
+ long value = ndv.get(i);
+ if (docsWithField.get(i) == false && value != 0) {
+ throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i);
+ }
}
}
private static void checkDocValues(FieldInfo fi, AtomicReader reader, PrintStream infoStream, DocValuesStatus status) throws Exception {
+ Bits docsWithField = reader.getDocsWithField(fi.name);
+ if (docsWithField == null) {
+ throw new RuntimeException(fi.name + " docsWithField does not exist");
+ } else if (docsWithField.length() != reader.maxDoc()) {
+ throw new RuntimeException(fi.name + " docsWithField has incorrect length: " + docsWithField.length() + ",expected: " + reader.maxDoc());
+ }
switch(fi.getDocValuesType()) {
case SORTED:
status.totalSortedFields++;
- checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name));
+ checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
@@ -1431,7 +1464,7 @@ public class CheckIndex {
break;
case SORTED_SET:
status.totalSortedSetFields++;
- checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name));
+ checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null) {
@@ -1440,7 +1473,7 @@ public class CheckIndex {
break;
case BINARY:
status.totalBinaryFields++;
- checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name));
+ checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name), docsWithField);
if (reader.getNumericDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
@@ -1449,7 +1482,7 @@ public class CheckIndex {
break;
case NUMERIC:
status.totalNumericFields++;
- checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name));
+ checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
@@ -1464,7 +1497,7 @@ public class CheckIndex {
private static void checkNorms(FieldInfo fi, AtomicReader reader, PrintStream infoStream) throws IOException {
switch(fi.getNormType()) {
case NUMERIC:
- checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name));
+ checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name), new Bits.MatchAllBits(reader.maxDoc()));
break;
default:
throw new AssertionError("wtf: " + fi.getNormType());
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java Tue Aug 20 22:47:30 2013
@@ -143,7 +143,7 @@ final class DocValuesProcessor extends S
DocValuesWriter writer = writers.get(fieldInfo.name);
NumericDocValuesWriter numericWriter;
if (writer == null) {
- numericWriter = new NumericDocValuesWriter(fieldInfo, bytesUsed);
+ numericWriter = new NumericDocValuesWriter(fieldInfo, bytesUsed, true);
writers.put(fieldInfo.name, numericWriter);
} else if (!(writer instanceof NumericDocValuesWriter)) {
throw new IllegalArgumentException("Incompatible DocValues type: field \"" + fieldInfo.name + "\" changed from " + getTypeDesc(writer) + " to numeric");
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Tue Aug 20 22:47:30 2013
@@ -414,4 +414,10 @@ public class FilterAtomicReader extends
return in.getNormValues(field);
}
+ @Override
+ public Bits getDocsWithField(String field) throws IOException {
+ ensureOpen();
+ return in.getDocsWithField(field);
+ }
+
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java Tue Aug 20 22:47:30 2013
@@ -22,6 +22,7 @@ import java.util.List;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
@@ -135,6 +136,51 @@ public class MultiDocValues {
};
}
}
+
+ /** Returns a Bits for a reader's docsWithField (potentially merging on-the-fly)
+ * <p>
+ * This is a slow way to access this bitset. Instead, access them per-segment
+ * with {@link AtomicReader#getDocsWithField(String)}
+ * </p>
+ * */
+ public static Bits getDocsWithField(final IndexReader r, final String field) throws IOException {
+ final List<AtomicReaderContext> leaves = r.leaves();
+ final int size = leaves.size();
+ if (size == 0) {
+ return null;
+ } else if (size == 1) {
+ return leaves.get(0).reader().getDocsWithField(field);
+ }
+
+ boolean anyReal = false;
+ boolean anyMissing = false;
+ final Bits[] values = new Bits[size];
+ final int[] starts = new int[size+1];
+ for (int i = 0; i < size; i++) {
+ AtomicReaderContext context = leaves.get(i);
+ Bits v = context.reader().getDocsWithField(field);
+ if (v == null) {
+ v = new Bits.MatchNoBits(context.reader().maxDoc());
+ anyMissing = true;
+ } else {
+ anyReal = true;
+ if (v instanceof Bits.MatchAllBits == false) {
+ anyMissing = true;
+ }
+ }
+ values[i] = v;
+ starts[i] = context.docBase;
+ }
+ starts[size] = r.maxDoc();
+
+ if (!anyReal) {
+ return null;
+ } else if (!anyMissing) {
+ return new Bits.MatchAllBits(r.maxDoc());
+ } else {
+ return new MultiBits(values, starts, false);
+ }
+ }
/** Returns a BinaryDocValues for a reader's docvalues (potentially merging on-the-fly)
* <p>
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java Tue Aug 20 22:47:30 2013
@@ -44,7 +44,7 @@ final class NormsConsumerPerField extend
if (fieldInfo.isIndexed() && !fieldInfo.omitsNorms()) {
if (consumer == null) {
fieldInfo.setNormValueType(FieldInfo.DocValuesType.NUMERIC);
- consumer = new NumericDocValuesWriter(fieldInfo, docState.docWriter.bytesUsed);
+ consumer = new NumericDocValuesWriter(fieldInfo, docState.docWriter.bytesUsed, false);
}
consumer.addValue(docState.docID, similarity.computeNorm(fieldState));
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java Tue Aug 20 22:47:30 2013
@@ -23,6 +23,8 @@ import java.util.NoSuchElementException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
@@ -35,14 +37,18 @@ class NumericDocValuesWriter extends Doc
private AppendingDeltaPackedLongBuffer pending;
private final Counter iwBytesUsed;
private long bytesUsed;
+ private final OpenBitSet docsWithField;
private final FieldInfo fieldInfo;
+ private final boolean trackDocsWithField;
- public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
+ public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, boolean trackDocsWithField) {
pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
- bytesUsed = pending.ramBytesUsed();
+ docsWithField = new OpenBitSet();
+ bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
this.fieldInfo = fieldInfo;
this.iwBytesUsed = iwBytesUsed;
iwBytesUsed.addAndGet(bytesUsed);
+ this.trackDocsWithField = trackDocsWithField;
}
public void addValue(int docID, long value) {
@@ -56,12 +62,20 @@ class NumericDocValuesWriter extends Doc
}
pending.add(value);
+ if (trackDocsWithField) {
+ docsWithField.set(docID);
+ }
updateBytesUsed();
}
+
+ private long docsWithFieldBytesUsed() {
+ // size of the long[] + some overhead
+ return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
+ }
private void updateBytesUsed() {
- final long newBytesUsed = pending.ramBytesUsed();
+ final long newBytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
bytesUsed = newBytesUsed;
}
@@ -109,14 +123,18 @@ class NumericDocValuesWriter extends Doc
if (!hasNext()) {
throw new NoSuchElementException();
}
- long value;
+ Long value;
if (upto < size) {
- value = iter.next();
+ long v = iter.next();
+ if (!trackDocsWithField || docsWithField.get(upto)) {
+ value = v;
+ } else {
+ value = null;
+ }
} else {
- value = 0;
+ value = trackDocsWithField ? null : MISSING;
}
upto++;
- // TODO: make reusable Number
return value;
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java Tue Aug 20 22:47:30 2013
@@ -286,6 +286,13 @@ public class ParallelAtomicReader extend
}
@Override
+ public Bits getDocsWithField(String field) throws IOException {
+ ensureOpen();
+ AtomicReader reader = fieldToReader.get(field);
+ return reader == null ? null : reader.getDocsWithField(field);
+ }
+
+ @Override
public NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();
AtomicReader reader = fieldToReader.get(field);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java Tue Aug 20 22:47:30 2013
@@ -36,6 +36,7 @@ import org.apache.lucene.index.SegmentRe
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.IOUtils;
@@ -89,6 +90,13 @@ final class SegmentCoreReaders {
return new HashMap<String,Object>();
}
};
+
+ final CloseableThreadLocal<Map<String,Bits>> docsWithFieldLocal = new CloseableThreadLocal<Map<String,Bits>>() {
+ @Override
+ protected Map<String,Bits> initialValue() {
+ return new HashMap<String,Bits>();
+ }
+ };
final CloseableThreadLocal<Map<String,Object>> normsLocal = new CloseableThreadLocal<Map<String,Object>>() {
@Override
@@ -281,6 +289,30 @@ final class SegmentCoreReaders {
return dvs;
}
+
+ Bits getDocsWithField(String field) throws IOException {
+ FieldInfo fi = fieldInfos.fieldInfo(field);
+ if (fi == null) {
+ // Field does not exist
+ return null;
+ }
+ if (fi.getDocValuesType() == null) {
+ // Field was not indexed with doc values
+ return null;
+ }
+
+ assert dvProducer != null;
+
+ Map<String,Bits> dvFields = docsWithFieldLocal.get();
+
+ Bits dvs = dvFields.get(field);
+ if (dvs == null) {
+ dvs = dvProducer.getDocsWithField(fi);
+ dvFields.put(field, dvs);
+ }
+
+ return dvs;
+ }
NumericDocValues getNormValues(String field) throws IOException {
FieldInfo fi = fieldInfos.fieldInfo(field);
@@ -307,8 +339,8 @@ final class SegmentCoreReaders {
void decRef() throws IOException {
if (ref.decrementAndGet() == 0) {
- IOUtils.close(termVectorsLocal, fieldsReaderLocal, docValuesLocal, normsLocal, fields, dvProducer,
- termVectorsReaderOrig, fieldsReaderOrig, cfsReader, normsProducer);
+ IOUtils.close(termVectorsLocal, fieldsReaderLocal, docValuesLocal, normsLocal, docsWithFieldLocal, fields,
+ dvProducer, termVectorsReaderOrig, fieldsReaderOrig, cfsReader, normsProducer);
notifyCoreClosedListeners();
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java Tue Aug 20 22:47:30 2013
@@ -30,6 +30,7 @@ import org.apache.lucene.codecs.TermVect
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
@@ -158,24 +159,32 @@ final class SegmentMerger {
if (type != null) {
if (type == DocValuesType.NUMERIC) {
List<NumericDocValues> toMerge = new ArrayList<NumericDocValues>();
+ List<Bits> docsWithField = new ArrayList<Bits>();
for (AtomicReader reader : mergeState.readers) {
NumericDocValues values = reader.getNumericDocValues(field.name);
+ Bits bits = reader.getDocsWithField(field.name);
if (values == null) {
values = NumericDocValues.EMPTY;
+ bits = new Bits.MatchNoBits(reader.maxDoc());
}
toMerge.add(values);
+ docsWithField.add(bits);
}
- consumer.mergeNumericField(field, mergeState, toMerge);
+ consumer.mergeNumericField(field, mergeState, toMerge, docsWithField);
} else if (type == DocValuesType.BINARY) {
List<BinaryDocValues> toMerge = new ArrayList<BinaryDocValues>();
+ List<Bits> docsWithField = new ArrayList<Bits>();
for (AtomicReader reader : mergeState.readers) {
BinaryDocValues values = reader.getBinaryDocValues(field.name);
+ Bits bits = reader.getDocsWithField(field.name);
if (values == null) {
values = BinaryDocValues.EMPTY;
+ bits = new Bits.MatchNoBits(reader.maxDoc());
}
toMerge.add(values);
+ docsWithField.add(bits);
}
- consumer.mergeBinaryField(field, mergeState, toMerge);
+ consumer.mergeBinaryField(field, mergeState, toMerge, docsWithField);
} else if (type == DocValuesType.SORTED) {
List<SortedDocValues> toMerge = new ArrayList<SortedDocValues>();
for (AtomicReader reader : mergeState.readers) {
@@ -218,14 +227,16 @@ final class SegmentMerger {
for (FieldInfo field : mergeState.fieldInfos) {
if (field.hasNorms()) {
List<NumericDocValues> toMerge = new ArrayList<NumericDocValues>();
+ List<Bits> docsWithField = new ArrayList<Bits>();
for (AtomicReader reader : mergeState.readers) {
NumericDocValues norms = reader.getNormValues(field.name);
if (norms == null) {
norms = NumericDocValues.EMPTY;
}
toMerge.add(norms);
+ docsWithField.add(new Bits.MatchAllBits(reader.maxDoc()));
}
- consumer.mergeNumericField(field, mergeState, toMerge);
+ consumer.mergeNumericField(field, mergeState, toMerge, docsWithField);
}
}
success = true;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java Tue Aug 20 22:47:30 2013
@@ -230,6 +230,12 @@ public final class SegmentReader extends
}
@Override
+ public Bits getDocsWithField(String field) throws IOException {
+ ensureOpen();
+ return core.getDocsWithField(field);
+ }
+
+ @Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
ensureOpen();
return core.getBinaryDocValues(field);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java Tue Aug 20 22:47:30 2013
@@ -92,6 +92,12 @@ public final class SlowCompositeReaderWr
}
@Override
+ public Bits getDocsWithField(String field) throws IOException {
+ ensureOpen();
+ return MultiDocValues.getDocsWithField(in, field);
+ }
+
+ @Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getBinaryValues(in, field);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java Tue Aug 20 22:47:30 2013
@@ -37,12 +37,13 @@ public abstract class SortedDocValues ex
* Returns the ordinal for the specified docID.
* @param docID document ID to lookup
* @return ordinal for the document: this is dense, starts at 0, then
- * increments by 1 for the next value in sorted order.
+ * increments by 1 for the next value in sorted order. Note that
+ * missing values are indicated by -1.
*/
public abstract int getOrd(int docID);
/** Retrieves the value for the specified ordinal.
- * @param ord ordinal to lookup
+ * @param ord ordinal to lookup (must be >= 0 and < {@link #getValueCount()})
* @param result will be populated with the ordinal's value
* @see #getOrd(int)
*/
@@ -71,7 +72,7 @@ public abstract class SortedDocValues ex
public static final SortedDocValues EMPTY = new SortedDocValues() {
@Override
public int getOrd(int docID) {
- return 0;
+ return -1;
}
@Override
@@ -83,7 +84,7 @@ public abstract class SortedDocValues ex
@Override
public int getValueCount() {
- return 1;
+ return 0;
}
};
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java Tue Aug 20 22:47:30 2013
@@ -30,19 +30,19 @@ import org.apache.lucene.util.BytesRefHa
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
/** Buffers up pending byte[] per doc, deref and sorting via
* int ord, then flushes when segment flushes. */
class SortedDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
- private AppendingPackedLongBuffer pending;
+ private AppendingDeltaPackedLongBuffer pending;
private final Counter iwBytesUsed;
private long bytesUsed; // this currently only tracks differences in 'pending'
private final FieldInfo fieldInfo;
- private static final BytesRef EMPTY = new BytesRef(BytesRef.EMPTY_BYTES);
+ private static final int EMPTY_ORD = -1;
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
@@ -52,7 +52,7 @@ class SortedDocValuesWriter extends DocV
new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
BytesRefHash.DEFAULT_CAPACITY,
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
- pending = new AppendingPackedLongBuffer(PackedInts.COMPACT);
+ pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@@ -70,7 +70,7 @@ class SortedDocValuesWriter extends DocV
// Fill in any holes:
while(pending.size() < docID) {
- addOneValue(EMPTY);
+ pending.add(EMPTY_ORD);
}
addOneValue(value);
@@ -79,8 +79,9 @@ class SortedDocValuesWriter extends DocV
@Override
public void finish(int maxDoc) {
while(pending.size() < maxDoc) {
- addOneValue(EMPTY);
+ pending.add(EMPTY_ORD);
}
+ updateBytesUsed();
}
private void addOneValue(BytesRef value) {
@@ -177,7 +178,7 @@ class SortedDocValuesWriter extends DocV
// iterates over the ords for each doc we have in ram
private class OrdsIterator implements Iterator<Number> {
- final AppendingPackedLongBuffer.Iterator iter = pending.iterator();
+ final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
final int ordMap[];
final int maxDoc;
int docUpto;
@@ -200,8 +201,7 @@ class SortedDocValuesWriter extends DocV
}
int ord = (int) iter.next();
docUpto++;
- // TODO: make reusable Number
- return ordMap[ord];
+ return ord == -1 ? ord : ordMap[ord];
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCache.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCache.java Tue Aug 20 22:47:30 2013
@@ -132,26 +132,6 @@ public interface FieldCache {
}
};
}
-
- /** Returns MISSING/-1 ordinal for every document */
- public static final SortedDocValues EMPTY_TERMSINDEX = new SortedDocValues() {
- @Override
- public int getOrd(int docID) {
- return -1;
- }
-
- @Override
- public void lookupOrd(int ord, BytesRef result) {
- result.bytes = MISSING;
- result.offset = 0;
- result.length = 0;
- }
-
- @Override
- public int getValueCount() {
- return 0;
- }
- };
/**
* Placeholder indicating creation of this cache is currently in-progress.
@@ -447,13 +427,10 @@ public interface FieldCache {
}
};
-
/** Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms in <code>field</code> and returns a bit set at the size of
* <code>reader.maxDoc()</code>, with turned on bits for each docid that
- * does have a value for this field. Note that if the field was only indexed
- * as DocValues then this method will not work (it will return a Bits stating
- * that no documents contain the field).
+ * does have a value for this field.
*/
public Bits getDocsWithField(AtomicReader reader, String field) throws IOException;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Tue Aug 20 22:47:30 2013
@@ -695,8 +695,7 @@ class FieldCacheImpl implements FieldCac
// field does not exist or has no value
return new Bits.MatchNoBits(reader.maxDoc());
} else if (fieldInfo.hasDocValues()) {
- // doc values are dense
- return new Bits.MatchAllBits(reader.maxDoc());
+ return reader.getDocsWithField(field);
} else if (!fieldInfo.isIndexed()) {
return new Bits.MatchNoBits(reader.maxDoc());
}
@@ -1156,13 +1155,13 @@ class FieldCacheImpl implements FieldCac
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
- return EMPTY_TERMSINDEX;
+ return SortedDocValues.EMPTY;
} else if (info.hasDocValues()) {
// we don't try to build a sorted instance from numeric/binary doc
// values because dedup can be very costly
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
- return EMPTY_TERMSINDEX;
+ return SortedDocValues.EMPTY;
}
return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec Tue Aug 20 22:47:30 2013
@@ -17,3 +17,4 @@ org.apache.lucene.codecs.lucene40.Lucene
org.apache.lucene.codecs.lucene3x.Lucene3xCodec
org.apache.lucene.codecs.lucene41.Lucene41Codec
org.apache.lucene.codecs.lucene42.Lucene42Codec
+org.apache.lucene.codecs.lucene45.Lucene45Codec
\ No newline at end of file
Modified: lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat Tue Aug 20 22:47:30 2013
@@ -14,3 +14,4 @@
# limitations under the License.
org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat
+org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat
\ No newline at end of file
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java Tue Aug 20 22:47:30 2013
@@ -17,21 +17,27 @@ package org.apache.lucene;
* limitations under the License.
*/
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.codecs.*;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
-import org.apache.lucene.document.*;
-import org.apache.lucene.index.*;
-import org.apache.lucene.search.*;
-import org.apache.lucene.store.*;
-import org.apache.lucene.util.*;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.BaseDirectoryWrapper;
+import org.apache.lucene.util.LuceneTestCase;
+
/* Intentionally outside of oal.index to verify fully
external codecs work fine */
public class TestExternalCodecs extends LuceneTestCase {
- private static final class CustomPerFieldCodec extends Lucene42Codec {
+ private static final class CustomPerFieldCodec extends Lucene45Codec {
private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly");
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java Tue Aug 20 22:47:30 2013
@@ -24,7 +24,7 @@ import org.apache.lucene.index.BaseCompr
* Tests Lucene42DocValuesFormat
*/
public class TestLucene42DocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
- private final Codec codec = new Lucene42Codec();
+ private final Codec codec = new Lucene42RWCodec();
@Override
protected Codec getCodec() {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java Tue Aug 20 22:47:30 2013
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -78,9 +78,9 @@ public class TestPerFieldDocValuesFormat
Directory directory = newDirectory();
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
- final DocValuesFormat fast = DocValuesFormat.forName("Lucene42");
+ final DocValuesFormat fast = DocValuesFormat.forName("Lucene45");
final DocValuesFormat slow = DocValuesFormat.forName("SimpleText");
- iwc.setCodec(new Lucene42Codec() {
+ iwc.setCodec(new Lucene45Codec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if ("dv1".equals(field)) {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java Tue Aug 20 22:47:30 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
@@ -200,7 +200,7 @@ public class TestPerFieldPostingsFormat2
}
- public static class MockCodec extends Lucene42Codec {
+ public static class MockCodec extends Lucene45Codec {
final PostingsFormat lucene40 = new Lucene41PostingsFormat();
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
final PostingsFormat mockSep = new MockSepPostingsFormat();
@@ -217,7 +217,7 @@ public class TestPerFieldPostingsFormat2
}
}
- public static class MockCodec2 extends Lucene42Codec {
+ public static class MockCodec2 extends Lucene45Codec {
final PostingsFormat lucene40 = new Lucene41PostingsFormat();
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
@@ -268,7 +268,7 @@ public class TestPerFieldPostingsFormat2
}
public void testSameCodecDifferentInstance() throws Exception {
- Codec codec = new Lucene42Codec() {
+ Codec codec = new Lucene45Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
@@ -284,7 +284,7 @@ public class TestPerFieldPostingsFormat2
}
public void testSameCodecDifferentParams() throws Exception {
- Codec codec = new Lucene42Codec() {
+ Codec codec = new Lucene45Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java Tue Aug 20 22:47:30 2013
@@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -1059,7 +1059,7 @@ public class TestAddIndexes extends Luce
aux2.close();
}
- private static final class CustomPerFieldCodec extends Lucene42Codec {
+ private static final class CustomPerFieldCodec extends Lucene45Codec {
private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText");
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep");
@@ -1110,7 +1110,7 @@ public class TestAddIndexes extends Luce
private static final class UnRegisteredCodec extends FilterCodec {
public UnRegisteredCodec() {
- super("NotRegistered", new Lucene42Codec());
+ super("NotRegistered", new Lucene45Codec());
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java Tue Aug 20 22:47:30 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene45.Lucene45Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexFileNames;
@@ -41,7 +41,7 @@ public class TestAllFilesHaveCodecHeader
public void test() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
- conf.setCodec(new Lucene42Codec());
+ conf.setCodec(new Lucene45Codec());
// riw should sometimes create docvalues fields, etc
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Tue Aug 20 22:47:30 2013
@@ -74,7 +74,7 @@ import org.junit.Ignore;
// we won't even be running the actual code, only the impostor
// @SuppressCodecs("Lucene4x")
// Sep codec cannot yet handle the offsets in our 4.x index!
-@SuppressCodecs({"Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41", "Appending"})
+@SuppressCodecs({"Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41", "Appending", "Lucene42"})
public class TestBackwardsCompatibility extends LuceneTestCase {
// Uncomment these cases & run them on an older Lucene version,
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java Tue Aug 20 22:47:30 2013
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -49,7 +50,7 @@ public class TestDuelingCodecs extends L
public void setUp() throws Exception {
super.setUp();
- // for now its SimpleText vs Lucene42(random postings format)
+ // for now its SimpleText vs Lucene45(random postings format)
// as this gives the best overall coverage. when we have more
// codecs we should probably pick 2 from Codec.availableCodecs()
@@ -134,6 +135,11 @@ public class TestDuelingCodecs extends L
for (String trash : split) {
document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
}
+ // add a numeric dv field sometimes
+ document.removeFields("sparsenumeric");
+ if (random.nextInt(4) == 2) {
+ document.add(new NumericDocValuesField("sparsenumeric", random.nextInt()));
+ }
writer.addDocument(document);
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java Tue Aug 20 22:47:30 2013
@@ -26,6 +26,7 @@ import org.apache.lucene.document.Numeri
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -323,4 +324,52 @@ public class TestMultiDocValues extends
ir2.close();
dir.close();
}
+
+ public void testDocsWithField() throws Exception {
+ assumeTrue("codec does not support docsWithField", defaultCodecSupportsDocsWithField());
+ Directory dir = newDirectory();
+
+ IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null);
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ int numDocs = atLeast(500);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ if (random().nextInt(4) >= 0) {
+ doc.add(new NumericDocValuesField("numbers", random().nextLong()));
+ }
+ doc.add(new NumericDocValuesField("numbersAlways", random().nextLong()));
+ iw.addDocument(doc);
+ if (random().nextInt(17) == 0) {
+ iw.commit();
+ }
+ }
+ DirectoryReader ir = iw.getReader();
+ iw.forceMerge(1);
+ DirectoryReader ir2 = iw.getReader();
+ AtomicReader merged = getOnlySegmentReader(ir2);
+ iw.close();
+
+ Bits multi = MultiDocValues.getDocsWithField(ir, "numbers");
+ Bits single = merged.getDocsWithField("numbers");
+ if (multi == null) {
+ assertNull(single);
+ } else {
+ assertEquals(single.length(), multi.length());
+ for (int i = 0; i < numDocs; i++) {
+ assertEquals(single.get(i), multi.get(i));
+ }
+ }
+
+ multi = MultiDocValues.getDocsWithField(ir, "numbersAlways");
+ single = merged.getDocsWithField("numbersAlways");
+ assertEquals(single.length(), multi.length());
+ for (int i = 0; i < numDocs; i++) {
+ assertEquals(single.get(i), multi.get(i));
+ }
+ ir.close();
+ ir2.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java Tue Aug 20 22:47:30 2013
@@ -488,7 +488,7 @@ public class TestFieldCache extends Luce
} catch (IllegalStateException expected) {}
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary");
- assertTrue(bits instanceof Bits.MatchAllBits);
+ assertTrue(bits.get(0));
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
try {
@@ -518,7 +518,7 @@ public class TestFieldCache extends Luce
assertEquals(1, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted");
- assertTrue(bits instanceof Bits.MatchAllBits);
+ assertTrue(bits.get(0));
// Numeric type: can be retrieved via getInts() and so on
Ints numeric = FieldCache.DEFAULT.getInts(ar, "numeric", false);
@@ -545,7 +545,7 @@ public class TestFieldCache extends Luce
} catch (IllegalStateException expected) {}
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric");
- assertTrue(bits instanceof Bits.MatchAllBits);
+ assertTrue(bits.get(0));
// SortedSet type: can be retrieved via getDocTermOrds()
if (defaultCodecSupportsSortedSet()) {
@@ -577,7 +577,7 @@ public class TestFieldCache extends Luce
assertEquals(2, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset");
- assertTrue(bits instanceof Bits.MatchAllBits);
+ assertTrue(bits.get(0));
}
ir.close();
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java Tue Aug 20 22:47:30 2013
@@ -36,7 +36,7 @@ import org.apache.lucene.util.LuceneTest
/** Tests basic sorting on docvalues fields.
* These are mostly like TestSort's tests, except each test
* indexes the field up-front as docvalues, and checks no fieldcaches were made */
-@SuppressCodecs("Lucene3x")
+@SuppressCodecs({"Lucene3x", "Appending", "Lucene40", "Lucene41", "Lucene42"}) // avoid codecs that don't support "missing"
public class TestSortDocValues extends LuceneTestCase {
@Override
@@ -429,6 +429,70 @@ public class TestSortDocValues extends L
dir.close();
}
+ /** Tests sorting on type int with a missing value */
+ public void testIntMissing() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("value", -1));
+ doc.add(newStringField("value", "-1", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("value", 4));
+ doc.add(newStringField("value", "4", Field.Store.YES));
+ writer.addDocument(doc);
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ Sort sort = new Sort(new SortField("value", SortField.Type.INT));
+
+ TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.totalHits);
+ // null is treated as a 0
+ assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
+ assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
+ assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+ ir.close();
+ dir.close();
+ }
+
+ /** Tests sorting on type int, specifying the missing value should be treated as Integer.MAX_VALUE */
+ public void testIntMissingLast() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("value", -1));
+ doc.add(newStringField("value", "-1", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("value", 4));
+ doc.add(newStringField("value", "4", Field.Store.YES));
+ writer.addDocument(doc);
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ SortField sortField = new SortField("value", SortField.Type.INT);
+ sortField.setMissingValue(Integer.MAX_VALUE);
+ Sort sort = new Sort(sortField);
+
+ TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.totalHits);
+ // null is treated as a Integer.MAX_VALUE
+ assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
+ assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
+ assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+ ir.close();
+ dir.close();
+ }
+
/** Tests sorting on type long */
public void testLong() throws IOException {
Directory dir = newDirectory();
@@ -497,6 +561,70 @@ public class TestSortDocValues extends L
dir.close();
}
+ /** Tests sorting on type long with a missing value */
+ public void testLongMissing() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("value", -1));
+ doc.add(newStringField("value", "-1", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("value", 4));
+ doc.add(newStringField("value", "4", Field.Store.YES));
+ writer.addDocument(doc);
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ Sort sort = new Sort(new SortField("value", SortField.Type.LONG));
+
+ TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.totalHits);
+ // null is treated as 0
+ assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
+ assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
+ assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+ ir.close();
+ dir.close();
+ }
+
+ /** Tests sorting on type long, specifying the missing value should be treated as Long.MAX_VALUE */
+ public void testLongMissingLast() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("value", -1));
+ doc.add(newStringField("value", "-1", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("value", 4));
+ doc.add(newStringField("value", "4", Field.Store.YES));
+ writer.addDocument(doc);
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ SortField sortField = new SortField("value", SortField.Type.LONG);
+ sortField.setMissingValue(Long.MAX_VALUE);
+ Sort sort = new Sort(sortField);
+
+ TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.totalHits);
+ // null is treated as Long.MAX_VALUE
+ assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
+ assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
+ assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+ ir.close();
+ dir.close();
+ }
+
/** Tests sorting on type float */
public void testFloat() throws IOException {
Directory dir = newDirectory();
@@ -565,6 +693,70 @@ public class TestSortDocValues extends L
dir.close();
}
+ /** Tests sorting on type float with a missing value */
+ public void testFloatMissing() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new FloatDocValuesField("value", -1.3F));
+ doc.add(newStringField("value", "-1.3", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new FloatDocValuesField("value", 4.2F));
+ doc.add(newStringField("value", "4.2", Field.Store.YES));
+ writer.addDocument(doc);
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT));
+
+ TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.totalHits);
+ // null is treated as 0
+ assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
+ assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
+ assertEquals("4.2", searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+ ir.close();
+ dir.close();
+ }
+
+ /** Tests sorting on type float, specifying the missing value should be treated as Float.MAX_VALUE */
+ public void testFloatMissingLast() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new FloatDocValuesField("value", -1.3F));
+ doc.add(newStringField("value", "-1.3", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new FloatDocValuesField("value", 4.2F));
+ doc.add(newStringField("value", "4.2", Field.Store.YES));
+ writer.addDocument(doc);
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ SortField sortField = new SortField("value", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.MAX_VALUE);
+ Sort sort = new Sort(sortField);
+
+ TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.totalHits);
+ // null is treated as Float.MAX_VALUE
+ assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
+ assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value"));
+ assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
+
+ ir.close();
+ dir.close();
+ }
+
/** Tests sorting on type double */
public void testDouble() throws IOException {
Directory dir = newDirectory();
@@ -671,4 +863,78 @@ public class TestSortDocValues extends L
ir.close();
dir.close();
}
+
+ /** Tests sorting on type double with a missing value */
+ public void testDoubleMissing() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("value", -1.3));
+ doc.add(newStringField("value", "-1.3", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("value", 4.2333333333333));
+ doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("value", 4.2333333333332));
+ doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
+ writer.addDocument(doc);
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE));
+
+ TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(4, td.totalHits);
+ // null treated as a 0
+ assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
+ assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
+ assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[2].doc).get("value"));
+ assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[3].doc).get("value"));
+
+ ir.close();
+ dir.close();
+ }
+
+ /** Tests sorting on type double, specifying the missing value should be treated as Double.MAX_VALUE */
+ public void testDoubleMissingLast() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("value", -1.3));
+ doc.add(newStringField("value", "-1.3", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("value", 4.2333333333333));
+ doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("value", 4.2333333333332));
+ doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
+ writer.addDocument(doc);
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ SortField sortField = new SortField("value", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.MAX_VALUE);
+ Sort sort = new Sort(sortField);
+
+ TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(4, td.totalHits);
+ // null treated as Double.MAX_VALUE
+ assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
+ assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value"));
+ assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value"));
+ assertNull(searcher.doc(td.scoreDocs[3].doc).get("value"));
+
+ ir.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java?rev=1516012&r1=1516011&r2=1516012&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java Tue Aug 20 22:47:30 2013
@@ -25,8 +25,8 @@ import org.apache.lucene.codecs.Codec;
// enough to test the basics via Codec
public class TestNamedSPILoader extends LuceneTestCase {
public void testLookup() {
- Codec codec = Codec.forName("Lucene42");
- assertEquals("Lucene42", codec.getName());
+ Codec codec = Codec.forName("Lucene45");
+ assertEquals("Lucene45", codec.getName());
}
// we want an exception if its not found.
@@ -39,6 +39,6 @@ public class TestNamedSPILoader extends
public void testAvailableServices() {
Set<String> codecs = Codec.availableCodecs();
- assertTrue(codecs.contains("Lucene42"));
+ assertTrue(codecs.contains("Lucene45"));
}
}