You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2018/01/31 13:55:03 UTC
[2/3] lucene-solr:master: LUCENE-4198: Give codecs the opportunity to
index impacts.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java
new file mode 100644
index 0000000..cb1f54a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene50;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Objects;
+
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+
+final class Lucene50ScoreSkipReader extends Lucene50SkipReader {
+
+ private final SimScorer scorer;
+ private final float[] maxScore;
+ private final byte[][] impacts;
+ private final int[] impactsLength;
+ private final float globalMaxScore;
+ private final ByteArrayDataInput badi = new ByteArrayDataInput();
+
+ public Lucene50ScoreSkipReader(int version, IndexInput skipStream, int maxSkipLevels,
+ boolean hasPos, boolean hasOffsets, boolean hasPayloads, SimScorer scorer) {
+ super(version, skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
+ if (version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
+ throw new IllegalStateException("Cannot skip based on scores if impacts are not indexed");
+ }
+ this.scorer = Objects.requireNonNull(scorer);
+ this.maxScore = new float[maxSkipLevels];
+ this.impacts = new byte[maxSkipLevels][];
+ Arrays.fill(impacts, new byte[0]);
+ this.impactsLength = new int[maxSkipLevels];
+ this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1);
+ }
+
+ @Override
+ public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) throws IOException {
+ super.init(skipPointer, docBasePointer, posBasePointer, payBasePointer, df);
+ Arrays.fill(impactsLength, 0);
+ Arrays.fill(maxScore, globalMaxScore);
+ }
+
+ /** Upper bound of scores up to {@code upTo} included. */
+ public float getMaxScore(int upTo) throws IOException {
+ for (int level = 0; level < numberOfSkipLevels; ++level) {
+ if (upTo <= skipDoc[level]) {
+ return maxScore(level);
+ }
+ }
+ return globalMaxScore;
+ }
+
+ private float maxScore(int level) throws IOException {
+ assert level < numberOfSkipLevels;
+ if (impactsLength[level] > 0) {
+ badi.reset(impacts[level], 0, impactsLength[level]);
+ maxScore[level] = readImpacts(badi, scorer);
+ impactsLength[level] = 0;
+ }
+ return maxScore[level];
+ }
+
+ @Override
+ protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+ int length = skipStream.readVInt();
+ if (impacts[level].length < length) {
+ impacts[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
+ }
+ skipStream.readBytes(impacts[level], 0, length);
+ impactsLength[level] = length;
+ }
+
+ static float readImpacts(ByteArrayDataInput in, SimScorer scorer) throws IOException {
+ int freq = 0;
+ long norm = 0;
+ float maxScore = 0;
+ while (in.getPosition() < in.length()) {
+ int freqDelta = in.readVInt();
+ if ((freqDelta & 0x01) != 0) {
+ freq += 1 + (freqDelta >>> 1);
+ norm += 1 + in.readZLong();
+ } else {
+ freq += 1 + (freqDelta >>> 1);
+ norm++;
+ }
+ maxScore = Math.max(maxScore, scorer.score(freq, norm));
+ }
+ return maxScore;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java
index 8c037c5..b92cd42 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java
@@ -52,7 +52,8 @@ import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZ
* Therefore, we'll trim df before passing it to the interface. see trim(int)
*
*/
-final class Lucene50SkipReader extends MultiLevelSkipListReader {
+class Lucene50SkipReader extends MultiLevelSkipListReader {
+ private final int version;
private long docPointer[];
private long posPointer[];
private long payPointer[];
@@ -65,8 +66,11 @@ final class Lucene50SkipReader extends MultiLevelSkipListReader {
private long lastDocPointer;
private int lastPosBufferUpto;
- public Lucene50SkipReader(IndexInput skipStream, int maxSkipLevels, boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
+ public Lucene50SkipReader(int version,
+ IndexInput skipStream, int maxSkipLevels,
+ boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
super(skipStream, maxSkipLevels, BLOCK_SIZE, 8);
+ this.version = version;
docPointer = new long[maxSkipLevels];
if (hasPos) {
posPointer = new long[maxSkipLevels];
@@ -192,6 +196,17 @@ final class Lucene50SkipReader extends MultiLevelSkipListReader {
payPointer[level] += skipStream.readVLong();
}
}
+ readImpacts(level, skipStream);
return delta;
}
+
+ // The default impl skips impacts since they are only useful if we have a SimScorer
+ // to compute the scores that impacts map to.
+ protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+ if (version >= Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
+ // The base implementation skips impacts, they are not used
+ skipStream.skipBytes(skipStream.readVInt());
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java
index a4556c6..cc94ed0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java
@@ -19,9 +19,14 @@ package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Set;
+import java.util.SortedSet;
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
/**
* Write skip lists with multiple levels, and support skip within block ints.
@@ -60,6 +65,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
private long curPayPointer;
private int curPosBufferUpto;
private int curPayloadByteUpto;
+ private CompetitiveFreqNormAccumulator[] curCompetitiveFreqNorms;
private boolean fieldHasPositions;
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
@@ -79,6 +85,10 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
}
lastPayloadByteUpto = new int[maxSkipLevels];
}
+ curCompetitiveFreqNorms = new CompetitiveFreqNormAccumulator[maxSkipLevels];
+ for (int i = 0; i < maxSkipLevels; ++i) {
+ curCompetitiveFreqNorms[i] = new CompetitiveFreqNormAccumulator();
+ }
}
public void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
@@ -105,10 +115,15 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
lastPayFP = payOut.getFilePointer();
}
}
+ if (initialized) {
+ for (CompetitiveFreqNormAccumulator acc : curCompetitiveFreqNorms) {
+ acc.clear();
+ }
+ }
initialized = false;
}
- public void initSkip() {
+ private void initSkip() {
if (!initialized) {
super.resetSkip();
Arrays.fill(lastSkipDoc, 0);
@@ -122,6 +137,11 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
Arrays.fill(lastSkipPayPointer, lastPayFP);
}
}
+ // sets of competitive freq,norm pairs should be empty at this point
+ assert Arrays.stream(curCompetitiveFreqNorms)
+ .map(CompetitiveFreqNormAccumulator::getCompetitiveFreqNormPairs)
+ .mapToInt(Set::size)
+ .sum() == 0;
initialized = true;
}
}
@@ -129,7 +149,8 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
/**
* Sets the values for the current skip data.
*/
- public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
+ public void bufferSkip(int doc, CompetitiveFreqNormAccumulator competitiveFreqNorms,
+ int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
initSkip();
this.curDoc = doc;
this.curDocPointer = docOut.getFilePointer();
@@ -137,11 +158,15 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
this.curPayPointer = payFP;
this.curPosBufferUpto = posBufferUpto;
this.curPayloadByteUpto = payloadByteUpto;
+ this.curCompetitiveFreqNorms[0].addAll(competitiveFreqNorms);
bufferSkip(numDocs);
}
-
+
+ private final RAMOutputStream freqNormOut = new RAMOutputStream();
+
@Override
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
+
int delta = curDoc - lastSkipDoc[level];
skipBuffer.writeVInt(delta);
@@ -165,5 +190,35 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
lastSkipPayPointer[level] = curPayPointer;
}
}
+
+ CompetitiveFreqNormAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
+ assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
+ if (level + 1 < numberOfSkipLevels) {
+ curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
+ }
+ writeImpacts(competitiveFreqNorms, freqNormOut);
+ skipBuffer.writeVInt(Math.toIntExact(freqNormOut.getFilePointer()));
+ freqNormOut.writeTo(skipBuffer);
+ freqNormOut.reset();
+ competitiveFreqNorms.clear();
+ }
+
+ static void writeImpacts(CompetitiveFreqNormAccumulator acc, IndexOutput out) throws IOException {
+ SortedSet<FreqAndNorm> freqAndNorms = acc.getCompetitiveFreqNormPairs();
+ FreqAndNorm previous = new FreqAndNorm(0, 0);
+ for (FreqAndNorm freqAndNorm : freqAndNorms) {
+ assert freqAndNorm.freq > previous.freq;
+ assert Long.compareUnsigned(freqAndNorm.norm, previous.norm) > 0;
+ int freqDelta = freqAndNorm.freq - previous.freq - 1;
+ long normDelta = freqAndNorm.norm - previous.norm - 1;
+ if (normDelta == 0) {
+ // most of time, norm only increases by 1, so we can fold everything in a single byte
+ out.writeVInt(freqDelta << 1);
+ } else {
+ out.writeVInt((freqDelta << 1) | 1);
+ out.writeZLong(normDelta);
+ }
+ previous = freqAndNorm;
+ }
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
index 24eaf7a..6138896 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
@@ -100,7 +100,13 @@ final class IndexedDISI extends DocIdSetIterator {
private final long cost;
IndexedDISI(IndexInput in, long offset, long length, long cost) throws IOException {
- this.slice = in.slice("docs", offset, length);
+ this(in.slice("docs", offset, length), cost);
+ }
+
+ // This constructor allows to pass the slice directly in case it helps reuse
+ // see eg. Lucene70 norms producer's merge instance
+ IndexedDISI(IndexInput slice, long cost) throws IOException {
+ this.slice = slice;
this.cost = cost;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
index 386655e..7bea274 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
@@ -37,6 +38,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
@@ -1158,6 +1160,11 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public int docFreq() throws IOException {
throw new UnsupportedOperationException();
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
index eb7c41a..c7310e8 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
@@ -40,11 +40,14 @@ import org.apache.lucene.util.IOUtils;
/**
* Reader for {@link Lucene70NormsFormat}
*/
-final class Lucene70NormsProducer extends NormsProducer {
+final class Lucene70NormsProducer extends NormsProducer implements Cloneable {
// metadata maps (just file pointers and minimal stuff)
private final Map<Integer,NormsEntry> norms = new HashMap<>();
- private final IndexInput data;
private final int maxDoc;
+ private IndexInput data;
+ private boolean merging;
+ private Map<Integer, IndexInput> disiInputs;
+ private Map<Integer, RandomAccessInput> dataInputs;
Lucene70NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.maxDoc();
@@ -87,6 +90,22 @@ final class Lucene70NormsProducer extends NormsProducer {
}
}
+ @Override
+ public NormsProducer getMergeInstance() throws IOException {
+ Lucene70NormsProducer clone;
+ try {
+ clone = (Lucene70NormsProducer) super.clone();
+ } catch (CloneNotSupportedException e) {
+ // cannot happen
+ throw new RuntimeException(e);
+ }
+ clone.data = data.clone();
+ clone.dataInputs = new HashMap<>();
+ clone.disiInputs = new HashMap<>();
+ clone.merging = true;
+ return clone;
+ }
+
static class NormsEntry {
byte bytesPerNorm;
long docsWithFieldOffset;
@@ -193,6 +212,34 @@ final class Lucene70NormsProducer extends NormsProducer {
}
}
+ private RandomAccessInput getDataInput(FieldInfo field, NormsEntry entry) throws IOException {
+ RandomAccessInput slice = null;
+ if (merging) {
+ slice = dataInputs.get(field.number);
+ }
+ if (slice == null) {
+ slice = data.randomAccessSlice(entry.normsOffset, entry.numDocsWithField * (long) entry.bytesPerNorm);
+ if (merging) {
+ dataInputs.put(field.number, slice);
+ }
+ }
+ return slice;
+ }
+
+ private IndexInput getDisiInput(FieldInfo field, NormsEntry entry) throws IOException {
+ IndexInput slice = null;
+ if (merging) {
+ slice = disiInputs.get(field.number);
+ }
+ if (slice == null) {
+ slice = data.slice("docs", entry.docsWithFieldOffset, entry.docsWithFieldLength);
+ if (merging) {
+ disiInputs.put(field.number, slice);
+ }
+ }
+ return slice;
+ }
+
@Override
public NumericDocValues getNorms(FieldInfo field) throws IOException {
final NormsEntry entry = norms.get(field.number);
@@ -209,7 +256,7 @@ final class Lucene70NormsProducer extends NormsProducer {
}
};
}
- final RandomAccessInput slice = data.randomAccessSlice(entry.normsOffset, entry.numDocsWithField * (long) entry.bytesPerNorm);
+ final RandomAccessInput slice = getDataInput(field, entry);
switch (entry.bytesPerNorm) {
case 1:
return new DenseNormsIterator(maxDoc) {
@@ -245,7 +292,8 @@ final class Lucene70NormsProducer extends NormsProducer {
}
} else {
// sparse
- final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
+ final IndexInput disiInput = getDisiInput(field, entry);
+ final IndexedDISI disi = new IndexedDISI(disiInput, entry.numDocsWithField);
if (entry.bytesPerNorm == 0) {
return new SparseNormsIterator(disi) {
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
index 281b08f..36f0358 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
@@ -34,6 +34,7 @@ import java.util.TreeSet;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
@@ -117,7 +118,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);
// Write postings
@@ -137,7 +138,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
- consumer.write(maskedFields);
+ consumer.write(maskedFields, norms);
}
success = true;
} finally {
@@ -148,7 +149,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
}
@Override
- public void merge(MergeState mergeState) throws IOException {
+ public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(new MultiFields(mergeState.fieldsProducers, null));
// Merge postings
@@ -161,7 +162,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
- consumer.merge(pfMergeState.apply(group.fields));
+ consumer.merge(pfMergeState.apply(group.fields), norms);
}
success = true;
} finally {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index c676568..7dd1aa9 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -48,6 +48,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -1598,8 +1599,109 @@ public final class CheckIndex implements Closeable {
}
}
}
+
+ // Test score blocks
+ // We only score on freq to keep things simple and not pull norms
+ SimScorer scorer = new SimScorer(field) {
+ @Override
+ public float score(float freq, long norm) {
+ return freq;
+ }
+ };
+
+ // First check max scores and block uptos
+ int max = -1;
+ float maxScore = 0;
+ ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
+ postings = termsEnum.postings(postings, PostingsEnum.FREQS);
+ for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
+ if (postings.nextDoc() != doc) {
+ throw new RuntimeException("Wrong next doc: " + doc + ", expected " + postings.docID());
+ }
+ if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ if (postings.freq() != impacts.freq()) {
+ throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
+ }
+ if (doc > max) {
+ max = impacts.advanceShallow(doc);
+ if (max < doc) {
+ throw new RuntimeException("max block doc id " + max + " must be greater than the target: " + doc);
+ }
+ maxScore = impacts.getMaxScore(max);
+ }
+ int max2 = impacts.advanceShallow(doc);
+ if (max != max2) {
+ throw new RuntimeException("max is not stable, initially had " + max + " but now " + max2);
+ }
+ float score = scorer.score(impacts.freq(), 1);
+ if (score > maxScore) {
+ throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
+ }
+ }
+
+ // Now check advancing
+ impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
+ postings = termsEnum.postings(postings, PostingsEnum.FREQS);
+
+ max = -1;
+ while (true) {
+ int doc = impacts.docID();
+ boolean advance;
+ int target;
+ if (((field.hashCode() + doc) & 1) == 1) {
+ advance = false;
+ target = doc + 1;
+ } else {
+ advance = true;
+ int delta = Math.min(1 + ((31 * field.hashCode() + doc) & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
+ target = impacts.docID() + delta;
+ }
+
+ if (target > max && target % 2 == 1) {
+ int delta = Math.min((31 * field.hashCode() + target) & 0x1ff, DocIdSetIterator.NO_MORE_DOCS - target);
+ max = target + delta;
+ int m = impacts.advanceShallow(target);
+ if (m < target) {
+ throw new RuntimeException("Block max doc: " + m + " is less than the target " + target);
+ }
+ maxScore = impacts.getMaxScore(max);
+ }
+
+ if (advance) {
+ doc = impacts.advance(target);
+ } else {
+ doc = impacts.nextDoc();
+ }
+
+ if (postings.advance(target) != doc) {
+ throw new RuntimeException("Impacts do not advance to the same document as postings for target " + target + ", postings: " + postings.docID() + ", impacts: " + doc);
+ }
+ if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ if (postings.freq() != impacts.freq()) {
+ throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
+ }
+
+ if (doc >= max) {
+ int delta = Math.min((31 * field.hashCode() + target & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
+ max = doc + delta;
+ int m = impacts.advanceShallow(doc);
+ if (m < doc) {
+ throw new RuntimeException("Block max doc: " + m + " is less than the target " + doc);
+ }
+ maxScore = impacts.getMaxScore(max);
+ }
+
+ float score = scorer.score(impacts.freq(), 1);
+ if (score > maxScore) {
+ throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
+ }
+ }
}
-
+
if (minTerm != null && status.termCount + status.delTermCount == 0) {
throw new RuntimeException("field=\"" + field + "\": minTerm is non-null yet we saw no terms: " + minTerm);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
index fd24105..4e05aa6 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@@ -31,6 +31,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.FieldType;
@@ -126,6 +127,7 @@ final class DefaultIndexingChain extends DocConsumer {
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write norms");
}
+ SegmentReadState readState = new SegmentReadState(state.directory, state.segmentInfo, state.fieldInfos, IOContext.READ, state.segmentSuffix);
t0 = System.nanoTime();
writeDocValues(state, sortMap);
@@ -159,7 +161,16 @@ final class DefaultIndexingChain extends DocConsumer {
}
}
- termsHash.flush(fieldsToFlush, state, sortMap);
+ try (NormsProducer norms = readState.fieldInfos.hasNorms()
+ ? state.segmentInfo.getCodec().normsFormat().normsProducer(readState)
+ : null) {
+ NormsProducer normsMergeInstance = null;
+ if (norms != null) {
+ // Use the merge instance in order to reuse the same IndexInput for all terms
+ normsMergeInstance = norms.getMergeInstance();
+ }
+ termsHash.flush(fieldsToFlush, state, sortMap, normsMergeInstance);
+ }
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write postings and finish vectors");
}
@@ -693,6 +704,9 @@ final class DefaultIndexingChain extends DocConsumer {
normValue = 0;
} else {
normValue = similarity.computeNorm(invertState);
+ if (normValue == 0) {
+ throw new IllegalStateException("Similarity " + similarity + " return 0 for non-empty field");
+ }
}
norms.addValue(docState.docID, normValue);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
index 0450038..4a9b660 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Iterator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -214,6 +215,10 @@ public abstract class FilterLeafReader extends LeafReader {
return in.postings(reuse, flags);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return in.impacts(scorer, flags);
+ }
}
/** Base class for filtering {@link PostingsEnum} implementations. */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
index 6498dc0..411b435 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
/**
@@ -181,7 +182,12 @@ public abstract class FilteredTermsEnum extends TermsEnum {
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
return tenum.postings(reuse, flags);
}
-
+
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return tenum.impacts(scorer, flags);
+ }
+
/** This enum does not support seeking!
* @throws UnsupportedOperationException In general, subclasses do not
* support seeking.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
index fb78a92..c3e7d71 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -273,6 +274,11 @@ class FreqProxFields extends Fields {
return docsEnum;
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
/**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
* without re-seeking the term dictionary.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
index d953f8d..ac70669 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.IOUtils;
@@ -78,8 +79,9 @@ final class FreqProxTermsWriter extends TermsHash {
}
@Override
- public void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
- super.flush(fieldsToFlush, state, sortMap);
+ public void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state,
+ Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
+ super.flush(fieldsToFlush, state, sortMap, norms);
// Gather all fields that saw any postings:
List<FreqProxTermsWriterPerField> allFields = new ArrayList<>();
@@ -105,7 +107,7 @@ final class FreqProxTermsWriter extends TermsHash {
FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
boolean success = false;
try {
- consumer.write(fields);
+ consumer.write(fields, norms);
success = true;
} finally {
if (success) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java b/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java
new file mode 100644
index 0000000..8deccff
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+
+/**
+ * Extension of {@link PostingsEnum} which also provides information about the
+ * produced scores.
+ * @lucene.experimental
+ */
+public abstract class ImpactsEnum extends PostingsEnum {
+
+ /** Sole constructor. */
+ protected ImpactsEnum() {}
+
+ /**
+ * Advance to the block of documents that contains {@code target} in order to
+ * get scoring information about this block. This method is implicitly called
+ * by {@link DocIdSetIterator#advance(int)} and
+ * {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the
+ * current {@link DocIdSetIterator#docID()}.
+ * It returns a number that is greater than or equal to all documents
+ * contained in the current block, but less than any doc IDS of the next block.
+ * {@code target} must be >= {@link #docID()} as well as all targets that
+ * have been passed to {@link #advanceShallow(int)} so far.
+ */
+ public abstract int advanceShallow(int target) throws IOException;
+
+ /**
+ * Return the maximum score that documents between the last {@code target}
+ * that this iterator was {@link #advanceShallow(int) shallow-advanced} to
+ * included and {@code upTo} included.
+ */
+ public abstract float getMaxScore(int upTo) throws IOException;
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
index 7db838b..7de8427 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -367,6 +368,11 @@ public final class MultiTermsEnum extends TermsEnum {
return docsEnum.reset(subDocs, upto);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
final static class TermsEnumWithSlice {
private final ReaderSlice subSlice;
TermsEnum terms;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
index c67b92d..ad60a94 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -24,6 +24,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.NormsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsWriter;
@@ -109,10 +110,33 @@ final class SegmentMerger {
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo,
mergeState.mergeFieldInfos, null, context);
+ final SegmentReadState segmentReadState = new SegmentReadState(directory, mergeState.segmentInfo, mergeState.mergeFieldInfos,
+ IOContext.READ, segmentWriteState.segmentSuffix);
+
+ if (mergeState.mergeFieldInfos.hasNorms()) {
+ if (mergeState.infoStream.isEnabled("SM")) {
+ t0 = System.nanoTime();
+ }
+ mergeNorms(segmentWriteState);
+ if (mergeState.infoStream.isEnabled("SM")) {
+ long t1 = System.nanoTime();
+ mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge norms [" + numMerged + " docs]");
+ }
+ }
+
if (mergeState.infoStream.isEnabled("SM")) {
t0 = System.nanoTime();
}
- mergeTerms(segmentWriteState);
+ try (NormsProducer norms = mergeState.mergeFieldInfos.hasNorms()
+ ? codec.normsFormat().normsProducer(segmentReadState)
+ : null) {
+ NormsProducer normsMergeInstance = null;
+ if (norms != null) {
+ // Use the merge instance in order to reuse the same IndexInput for all terms
+ normsMergeInstance = norms.getMergeInstance();
+ }
+ mergeTerms(segmentWriteState, normsMergeInstance);
+ }
if (mergeState.infoStream.isEnabled("SM")) {
long t1 = System.nanoTime();
mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge postings [" + numMerged + " docs]");
@@ -139,17 +163,6 @@ final class SegmentMerger {
long t1 = System.nanoTime();
mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge points [" + numMerged + " docs]");
}
-
- if (mergeState.mergeFieldInfos.hasNorms()) {
- if (mergeState.infoStream.isEnabled("SM")) {
- t0 = System.nanoTime();
- }
- mergeNorms(segmentWriteState);
- if (mergeState.infoStream.isEnabled("SM")) {
- long t1 = System.nanoTime();
- mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge norms [" + numMerged + " docs]");
- }
- }
if (mergeState.mergeFieldInfos.hasVectors()) {
if (mergeState.infoStream.isEnabled("SM")) {
@@ -225,9 +238,9 @@ final class SegmentMerger {
}
}
- private void mergeTerms(SegmentWriteState segmentWriteState) throws IOException {
+ private void mergeTerms(SegmentWriteState segmentWriteState, NormsProducer norms) throws IOException {
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState)) {
- consumer.merge(mergeState);
+ consumer.merge(mergeState, norms);
}
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java
new file mode 100644
index 0000000..9ba27e2
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * {@link ImpactsEnum} that doesn't index impacts but implements the API in a
+ * legal way. This should typically be used for short postings that do not need
+ * skipping.
+ */
+public final class SlowImpactsEnum extends ImpactsEnum {
+
+ private final PostingsEnum delegate;
+ private final float maxScore;
+
+ /** Wrap the given {@link PostingsEnum}. */
+ public SlowImpactsEnum(PostingsEnum delegate, float maxScore) {
+ this.delegate = delegate;
+ this.maxScore = maxScore;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return delegate.nextDoc();
+ }
+
+ @Override
+ public int docID() {
+ return delegate.docID();
+ }
+
+ @Override
+ public long cost() {
+ return delegate.cost();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return delegate.advance(target);
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return delegate.startOffset();
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ return delegate.nextPosition();
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ return delegate.getPayload();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return delegate.freq();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return delegate.endOffset();
+ }
+
+ @Override
+ public int advanceShallow(int target) {
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public float getMaxScore(int maxDoc) {
+ return maxScore;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
index ccee7a3..70d4387 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -110,6 +111,11 @@ class SortedDocValuesTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
index eba95c9..9099ac8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
@@ -17,6 +17,7 @@
package org.apache.lucene.index;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -110,6 +111,11 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
index dff808e..054ca50 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.search.DocIdSetIterator;
@@ -37,8 +38,8 @@ final class SortingTermVectorsConsumer extends TermVectorsConsumer {
}
@Override
- void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
- super.flush(fieldsToFlush, state, sortMap);
+ void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
+ super.flush(fieldsToFlush, state, sortMap, norms);
if (tmpDirectory != null) {
if (sortMap == null) {
// we're lucky the index is already sorted, just rename the temporary file and return
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
index 46dc63c..1ac20dd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
@@ -53,7 +54,7 @@ class TermVectorsConsumer extends TermsHash {
}
@Override
- void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
+ void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
if (writer != null) {
int numDocs = state.segmentInfo.maxDoc();
assert numDocs > 0;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
index 4b5755a..7bbb3f7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
@@ -171,6 +172,12 @@ public abstract class TermsEnum implements BytesRefIterator {
public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException;
/**
+ * Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
+ * @see #postings(PostingsEnum, int)
+ */
+ public abstract ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException;
+
+ /**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
* without re-seeking the term dictionary.
* <p>
@@ -228,7 +235,12 @@ public abstract class TermsEnum implements BytesRefIterator {
public PostingsEnum postings(PostingsEnum reuse, int flags) {
throw new IllegalStateException("this method should never be called");
}
-
+
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new IllegalStateException("this method should never be called");
+ }
+
@Override
public BytesRef next() {
return null;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/index/TermsHash.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHash.java b/lucene/core/src/java/org/apache/lucene/index/TermsHash.java
index bede2f8..f420aca 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsHash.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsHash.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
@@ -76,13 +77,14 @@ abstract class TermsHash {
bytePool.reset(false, false);
}
- void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
+ void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state,
+ Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
if (nextTermsHash != null) {
Map<String,TermsHashPerField> nextChildFields = new HashMap<>();
for (final Map.Entry<String,TermsHashPerField> entry : fieldsToFlush.entrySet()) {
nextChildFields.put(entry.getKey(), entry.getValue().nextPerField);
}
- nextTermsHash.flush(nextChildFields, state, sortMap);
+ nextTermsHash.flush(nextChildFields, state, sortMap, norms);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
index 881c5dd..72f9473 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@@ -17,11 +17,13 @@
package org.apache.lucene.search;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
@@ -273,6 +275,11 @@ public final class FuzzyTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return actualEnum.impacts(scorer, flags);
+ }
+
+ @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
actualEnum.seekExact(term, state);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java
index 5de8295..f3dc5ea 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java
@@ -40,6 +40,11 @@ public final class LeafSimScorer {
maxScore = needsScores ? scorer.score(maxFreq, 1) : Float.MAX_VALUE;
}
+ /** Return the wrapped {@link SimScorer}. */
+ public SimScorer getSimScorer() {
+ return scorer;
+ }
+
private long getNormValue(int doc) throws IOException {
if (norms != null) {
boolean found = norms.advanceExact(doc);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
index d9335cf..1eba910 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
@@ -29,7 +29,6 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
@@ -208,9 +207,8 @@ public final class SynonymQuery extends Query {
termsEnum.seekExact(terms[i].bytes(), state);
long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq());
totalMaxFreq += termMaxFreq;
- PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS);
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq);
- subScorers.add(new TermScorer(this, postings, simScorer));
+ subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer));
}
}
if (subScorers.isEmpty()) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
index d629acd..f1f4415 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
@@ -46,21 +45,21 @@ public class TermQuery extends Query {
private final Similarity similarity;
private final Similarity.SimScorer simScorer;
private final TermStates termStates;
- private final boolean needsScores;
+ private final ScoreMode scoreMode;
- public TermWeight(IndexSearcher searcher, boolean needsScores,
+ public TermWeight(IndexSearcher searcher, ScoreMode scoreMode,
float boost, TermStates termStates) throws IOException {
super(TermQuery.this);
- if (needsScores && termStates == null) {
+ if (scoreMode.needsScores() && termStates == null) {
throw new IllegalStateException("termStates are required when scores are needed");
}
- this.needsScores = needsScores;
+ this.scoreMode = scoreMode;
this.termStates = termStates;
this.similarity = searcher.getSimilarity();
final CollectionStatistics collectionStats;
final TermStatistics termStats;
- if (needsScores) {
+ if (scoreMode.needsScores()) {
collectionStats = searcher.collectionStatistics(term.field());
termStats = searcher.termStatistics(term, termStates);
} else {
@@ -97,10 +96,9 @@ public class TermQuery extends Query {
.getFieldInfos()
.fieldInfo(getTerm().field())
.getIndexOptions();
- PostingsEnum docs = termsEnum.postings(null, needsScores ? PostingsEnum.FREQS : PostingsEnum.NONE);
- assert docs != null;
float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq());
- return new TermScorer(this, docs, new LeafSimScorer(simScorer, context.reader(), needsScores, maxFreq));
+ LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq);
+ return new TermScorer(this, termsEnum, scoreMode, scorer);
}
private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) {
@@ -198,7 +196,7 @@ public class TermQuery extends Query {
termState = this.perReaderTermState;
}
- return new TermWeight(searcher, scoreMode.needsScores(), boost, termState);
+ return new TermWeight(searcher, scoreMode, boost, termState);
}
/** Prints a user-readable version of this query. */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
index 653a60e..fc426da 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
@@ -19,29 +19,92 @@ package org.apache.lucene.search;
import java.io.IOException;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.TermsEnum;
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
*/
final class TermScorer extends Scorer {
private final PostingsEnum postingsEnum;
+ private final DocIdSetIterator iterator;
private final LeafSimScorer docScorer;
+ private float minCompetitiveScore;
/**
* Construct a <code>TermScorer</code>.
*
* @param weight
* The weight of the <code>Term</code> in the query.
- * @param td
- * An iterator over the documents matching the <code>Term</code>.
+ * @param te
+ * A {@link TermsEnum} positioned on the expected term.
* @param docScorer
- * The <code>Similarity.SimScorer</code> implementation
- * to be used for score computations.
+ * A {@link LeafSimScorer} for the appropriate field.
*/
- TermScorer(Weight weight, PostingsEnum td, LeafSimScorer docScorer) {
+ TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
- this.postingsEnum = td;
+ if (scoreMode == ScoreMode.TOP_SCORES) {
+ ImpactsEnum impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS);
+ postingsEnum = impactsEnum;
+ iterator = new DocIdSetIterator() {
+
+ int upTo = -1;
+ float maxScore;
+
+ private int advanceTarget(int target) throws IOException {
+ if (minCompetitiveScore == 0) {
+ // no potential for skipping
+ return target;
+ }
+
+ if (target > upTo) {
+ upTo = impactsEnum.advanceShallow(target);
+ maxScore = impactsEnum.getMaxScore(upTo);
+ }
+
+ while (true) {
+ assert upTo >= target;
+
+ if (maxScore >= minCompetitiveScore) {
+ return target;
+ }
+
+ if (upTo == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+
+ target = upTo + 1;
+
+ upTo = impactsEnum.advanceShallow(target);
+ maxScore = impactsEnum.getMaxScore(upTo);
+ }
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return impactsEnum.advance(advanceTarget(target));
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(impactsEnum.docID() + 1);
+ }
+
+ @Override
+ public int docID() {
+ return impactsEnum.docID();
+ }
+
+ @Override
+ public long cost() {
+ return impactsEnum.cost();
+ }
+ };
+ } else {
+ postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
+ iterator = postingsEnum;
+ }
}
@Override
@@ -55,7 +118,7 @@ final class TermScorer extends Scorer {
@Override
public DocIdSetIterator iterator() {
- return postingsEnum;
+ return iterator;
}
@Override
@@ -69,6 +132,11 @@ final class TermScorer extends Scorer {
return docScorer.maxScore();
}
+ @Override
+ public void setMinCompetitiveScore(float minScore) {
+ this.minCompetitiveScore = minScore;
+ }
+
/** Returns a string representation of this <code>TermScorer</code>. */
@Override
public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java
new file mode 100644
index 0000000..5743e64
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestCompetitiveFreqNormAccumulator extends LuceneTestCase {
+
+ public void testBasics() {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+ Set<FreqAndNorm> expected = new HashSet<>();
+
+ acc.add(3, 5);
+ expected.add(new FreqAndNorm(3, 5));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(6, 11);
+ expected.add(new FreqAndNorm(6, 11));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(10, 13);
+ expected.add(new FreqAndNorm(10, 13));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(1, 2);
+ expected.add(new FreqAndNorm(1, 2));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(7, 9);
+ expected.remove(new FreqAndNorm(6, 11));
+ expected.add(new FreqAndNorm(7, 9));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(8, 2);
+ expected.clear();
+ expected.add(new FreqAndNorm(10, 13));
+ expected.add(new FreqAndNorm(8, 2));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+ }
+
+ public void testExtremeNorms() {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+ Set<FreqAndNorm> expected = new HashSet<>();
+
+ acc.add(3, 5);
+ expected.add(new FreqAndNorm(3, 5));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(10, 10000);
+ expected.add(new FreqAndNorm(10, 10000));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(5, 200);
+ expected.add(new FreqAndNorm(5, 200));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(20, -100);
+ expected.add(new FreqAndNorm(20, -100));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(30, -3);
+ expected.add(new FreqAndNorm(30, -3));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+ }
+
+ public void testOmitFreqs() {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+
+ acc.add(1, 5);
+ acc.add(1, 7);
+ acc.add(1, 4);
+
+ assertEquals(Collections.singleton(new FreqAndNorm(1, 4)), acc.getCompetitiveFreqNormPairs());
+ }
+
+ public void testOmitNorms() {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+
+ acc.add(5, 1);
+ acc.add(7, 1);
+ acc.add(4, 1);
+
+ assertEquals(Collections.singleton(new FreqAndNorm(7, 1)), acc.getCompetitiveFreqNormPairs());
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
index f2ed86c..d507b7b 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
@@ -17,8 +17,11 @@
package org.apache.lucene.codecs.lucene50;
+import java.io.IOException;
+
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
import org.apache.lucene.codecs.blocktree.FieldReader;
import org.apache.lucene.codecs.blocktree.Stats;
import org.apache.lucene.document.Document;
@@ -27,7 +30,12 @@ import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.TestUtil;
/**
@@ -78,4 +86,56 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
shouldFail(10, -1);
shouldFail(10, 12);
}
+
+ public void testImpactSerialization() throws IOException {
+ // omit norms and omit freqs
+ doTestImpactSerialization(new int[] { 1 }, new long[] { 1L });
+
+ // omit freqs
+ doTestImpactSerialization(new int[] { 1 }, new long[] { 42L });
+ // omit freqs with very large norms
+ doTestImpactSerialization(new int[] { 1 }, new long[] { -100L });
+
+ // omit norms
+ doTestImpactSerialization(new int[] { 30 }, new long[] { 1L });
+ // omit norms with large freq
+ doTestImpactSerialization(new int[] { 500 }, new long[] { 1L });
+
+ // freqs and norms, basic
+ doTestImpactSerialization(
+ new int[] { 1, 3, 7, 15, 20, 28 },
+ new long[] { 7L, 9L, 10L, 11L, 13L, 14L });
+
+ // freqs and norms, high values
+ doTestImpactSerialization(
+ new int[] { 2, 10, 12, 50, 1000, 1005 },
+ new long[] { 2L, 10L, 50L, -100L, -80L, -3L });
+ }
+
+ private void doTestImpactSerialization(int[] freqs, long[] norms) throws IOException {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+ for (int i = 0; i < freqs.length; ++i) {
+ acc.add(freqs[i], norms[i]);
+ }
+ try(Directory dir = newDirectory()) {
+ try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
+ Lucene50SkipWriter.writeImpacts(acc, out);
+ }
+ try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
+ byte[] b = new byte[Math.toIntExact(in.length())];
+ in.readBytes(b, 0, b.length);
+ Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new SimScorer("") {
+ int i = 0;
+
+ @Override
+ public float score(float freq, long norm) {
+ assert freq == freqs[i];
+ assert norm == norms[i];
+ i++;
+ return 0;
+ }
+ });
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
index 804f507..84544bc 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
@@ -28,6 +28,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
@@ -407,17 +408,17 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
final FieldsConsumer consumer = delegate.fieldsConsumer(state);
return new FieldsConsumer() {
@Override
- public void write(Fields fields) throws IOException {
- consumer.write(fields);
+ public void write(Fields fields, NormsProducer norms) throws IOException {
+ consumer.write(fields, norms);
}
@Override
- public void merge(MergeState mergeState) throws IOException {
+ public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
nbMergeCalls++;
for (FieldInfo fi : mergeState.mergeFieldInfos) {
fieldNames.add(fi.name);
}
- consumer.merge(mergeState);
+ consumer.merge(mergeState, norms);
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
index 4625f73..efe4587 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
@@ -17,6 +17,7 @@
package org.apache.lucene.index;
+import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@@ -28,10 +29,12 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -676,6 +679,10 @@ public class TestCodecs extends LuceneTestCase {
return new DataPostingsEnum(fieldData.terms[upto]);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
}
private static class DataPostingsEnum extends PostingsEnum {
@@ -752,9 +759,65 @@ public class TestCodecs extends LuceneTestCase {
Arrays.sort(fields);
FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state);
+ NormsProducer fakeNorms = new NormsProducer() {
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
+
+ @Override
+ public void close() throws IOException {}
+
+ @Override
+ public NumericDocValues getNorms(FieldInfo field) throws IOException {
+ return new NumericDocValues() {
+
+ int doc = -1;
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return si.maxDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= si.maxDoc()) {
+ return doc = NO_MORE_DOCS;
+ } else {
+ return doc = target;
+ }
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ doc = target;
+ return true;
+ }
+
+ @Override
+ public long longValue() throws IOException {
+ return 1;
+ }
+ };
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {}
+ };
boolean success = false;
try {
- consumer.write(new DataFields(fields));
+ consumer.write(new DataFields(fields), fakeNorms);
success = true;
} finally {
if (success) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
index 2758c96..73d3e6a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
@@ -24,12 +24,18 @@ import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -202,4 +208,57 @@ public class TestTermScorer extends LuceneTestCase {
// should not fail this time since norms are not necessary
weight2.scorer(forbiddenNorms.getContext()).iterator().nextDoc();
}
+
+ public void testRandomTopDocs() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+ int numDocs = atLeast(128 * 8 * 8 * 3); // make sure some terms have skip data
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ int numValues = random().nextInt(1 << random().nextInt(5));
+ int start = random().nextInt(10);
+ for (int j = 0; j < numValues; ++j) {
+ doc.add(new StringField("foo", Integer.toString(start + j), Store.NO));
+ }
+ w.addDocument(doc);
+ }
+ IndexReader reader = DirectoryReader.open(w);
+ w.close();
+ IndexSearcher searcher = newSearcher(reader);
+
+ for (int iter = 0; iter < 15; ++iter) {
+ Query query = new TermQuery(new Term("foo", Integer.toString(iter)));
+
+ TopScoreDocCollector collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
+ TopScoreDocCollector collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
+
+ searcher.search(query, collector1);
+ searcher.search(query, collector2);
+ assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
+
+ int filterTerm = random().nextInt(15);
+ Query filteredQuery = new BooleanQuery.Builder()
+ .add(query, Occur.MUST)
+ .add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
+ .build();
+
+ collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
+ collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
+ searcher.search(filteredQuery, collector1);
+ searcher.search(filteredQuery, collector2);
+ assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
+ }
+ reader.close();
+ dir.close();
+ }
+
+ private static void assertTopDocsEquals(TopDocs td1, TopDocs td2) {
+ assertEquals(td1.scoreDocs.length, td2.scoreDocs.length);
+ for (int i = 0; i < td1.scoreDocs.length; ++i) {
+ ScoreDoc sd1 = td1.scoreDocs[i];
+ ScoreDoc sd2 = td2.scoreDocs[i];
+ assertEquals(sd1.doc, sd2.doc);
+ assertEquals(sd1.score, sd2.score, 0f);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
----------------------------------------------------------------------
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 0d8d949..4014e8c 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -42,6 +42,7 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@@ -1425,6 +1426,11 @@ public class MemoryIndex {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1L));
+ }
+
+ @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null;
this.seekExact(((OrdTermState)state).ord);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
index 54f4aa4..4203e07 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
@@ -23,7 +23,9 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
@@ -88,6 +90,11 @@ final class IDVersionPostingsReader extends PostingsReaderBase {
}
@Override
+ public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException("Should never be called, IDVersionSegmentTermsEnum implements impacts directly");
+ }
+
+ @Override
public long ramBytesUsed() {
return 0;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
index fc643d2..30e1980 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
@@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
@@ -78,7 +79,7 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
}
@Override
- public void startTerm() {
+ public void startTerm(NumericDocValues norms) {
lastDocID = -1;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f410df81/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
index 0af64d9..d5f51e0 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
@@ -20,9 +20,12 @@ import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -1006,6 +1009,13 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ // Only one posting, the slow impl is fine
+ // We could make this throw UOE but then CheckIndex is angry
+ return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
+ }
+
+ @Override
public void seekExact(BytesRef target, TermState otherState) {
// if (DEBUG) {
// System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);