You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by za...@apache.org on 2021/07/13 06:34:58 UTC
[lucene] branch main updated: LUCENE-9959: Add non thread local
based API for term vector reader usage (#180)
This is an automated email from the ASF dual-hosted git repository.
zacharymorn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 180cfa2 LUCENE-9959: Add non thread local based API for term vector reader usage (#180)
180cfa2 is described below
commit 180cfa241b133c75c31daf3628db4979c949f7f6
Author: zacharymorn <za...@yahoo.com>
AuthorDate: Mon Jul 12 23:34:52 2021 -0700
LUCENE-9959: Add non thread local based API for term vector reader usage (#180)
---
lucene/CHANGES.txt | 3 ++
.../apache/lucene/codecs/TermVectorsReader.java | 12 ++-----
.../apache/lucene/index/BaseCompositeReader.java | 27 +++++++++++---
.../java/org/apache/lucene/index/CodecReader.java | 13 ++-----
.../apache/lucene/index/DocValuesLeafReader.java | 2 +-
.../org/apache/lucene/index/FilterLeafReader.java | 5 ++-
.../java/org/apache/lucene/index/IndexReader.java | 15 +++++++-
.../apache/lucene/index/MergeReaderWrapper.java | 19 ++++++----
.../apache/lucene/index/ParallelLeafReader.java | 31 +++++++++-------
.../apache/lucene/index/SegmentCoreReaders.java | 17 +++------
.../org/apache/lucene/index/SegmentReader.java | 16 +++++----
.../TermVectors.java} | 41 +++-------------------
.../lucene/index/TestExitableDirectoryReader.java | 9 ++++-
.../apache/lucene/index/TestFilterCodecReader.java | 13 ++++---
.../lucene/index/TestSegmentToThreadMapping.java | 3 +-
.../lucene/search/TestMultiThreadTermVectors.java | 12 +++++--
.../search/highlight/TermVectorLeafReader.java | 16 ++++++---
.../PostingsWithTermVectorsOffsetStrategy.java | 18 ++++++++--
.../uhighlight/TermVectorOffsetStrategy.java | 13 ++++++-
.../search/uhighlight/UnifiedHighlighter.java | 27 +++++++++-----
.../uhighlight/TestUnifiedHighlighterTermVec.java | 30 +++++++++-------
.../apache/lucene/index/memory/MemoryIndex.java | 23 +++++++-----
.../apache/lucene/index/AssertingLeafReader.java | 6 ----
.../apache/lucene/index/FieldFilterLeafReader.java | 23 +++++++-----
.../java/org/apache/lucene/search/QueryUtils.java | 4 +--
25 files changed, 227 insertions(+), 171 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index c55f399..3fe2043 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -124,6 +124,9 @@ API Changes
* LUCENE-9998: Remove unused parameter fis in StoredFieldsWriter.finish() and TermVectorsWriter.finish(),
including those subclasses. (kkewwei)
+* LUCENE-9959: Add non thread local based API for term vector reader usage. (Zach Chen, Adrien Grand,
+ David Smiley, Robert Muir, Mike Drob)
+
Improvements
* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
index 257621a..0ad39dd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
@@ -18,27 +18,19 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; // javadocs
-import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.TermVectors;
/**
* Codec API for reading term vectors:
*
* @lucene.experimental
*/
-public abstract class TermVectorsReader implements Cloneable, Closeable {
+public abstract class TermVectorsReader extends TermVectors implements Cloneable, Closeable {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected TermVectorsReader() {}
/**
- * Returns term vectors for this document, or null if term vectors were not indexed. If offsets
- * are available they are in an {@link OffsetAttribute} available from the {@link
- * org.apache.lucene.index.PostingsEnum}.
- */
- public abstract Fields get(int doc) throws IOException;
-
- /**
* Checks consistency of this reader.
*
* <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
diff --git a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
index 2a7d6e7..3f16f98 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
@@ -112,10 +112,29 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
}
@Override
- public final Fields getTermVectors(int docID) throws IOException {
- ensureOpen();
- final int i = readerIndex(docID); // find subreader num
- return subReaders[i].getTermVectors(docID - starts[i]); // dispatch to subreader
+ public final TermVectors getTermVectorsReader() {
+ TermVectors[] termVectors = new TermVectors[subReaders.length];
+
+ return new TermVectors() {
+ @Override
+ public Fields get(int doc) throws IOException {
+ ensureOpen();
+ final int i = readerIndex(doc); // find subreader num
+
+ if (termVectors[i] != null) {
+ return termVectors[i].get(doc - starts[i]); // dispatch to subreader
+ } else {
+ TermVectors reader = subReaders[i].getTermVectorsReader();
+ if (reader != null) {
+ // the getTermVectorsReader would clone a new instance, hence saving it into an array
+ // to avoid re-cloning from direct subReaders[i].getTermVectorsReader() call
+ termVectors[i] = reader;
+ return reader.get(doc - starts[i]);
+ }
+ return null;
+ }
+ }
+ };
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/CodecReader.java b/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
index e46d4bd..ab49534 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
@@ -41,10 +41,11 @@ public abstract class CodecReader extends LeafReader {
public abstract StoredFieldsReader getFieldsReader();
/**
- * Expert: retrieve thread-private TermVectorsReader
+ * Expert: retrieve TermVectorsReader
*
* @lucene.internal
*/
+ @Override
public abstract TermVectorsReader getTermVectorsReader();
/**
@@ -88,16 +89,6 @@ public abstract class CodecReader extends LeafReader {
getFieldsReader().visitDocument(docID, visitor);
}
- @Override
- public final Fields getTermVectors(int docID) throws IOException {
- TermVectorsReader termVectorsReader = getTermVectorsReader();
- if (termVectorsReader == null) {
- return null;
- }
- checkBounds(docID);
- return termVectorsReader.get(docID);
- }
-
private void checkBounds(int docID) {
Objects.checkIndex(docID, maxDoc());
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesLeafReader.java
index 02b48e8..cf4f87a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValuesLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesLeafReader.java
@@ -69,7 +69,7 @@ abstract class DocValuesLeafReader extends LeafReader {
}
@Override
- public final Fields getTermVectors(int docID) throws IOException {
+ public TermVectors getTermVectorsReader() {
throw new UnsupportedOperationException();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
index d591ff2..5d00c76 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
@@ -351,9 +351,8 @@ public abstract class FilterLeafReader extends LeafReader {
}
@Override
- public Fields getTermVectors(int docID) throws IOException {
- ensureOpen();
- return in.getTermVectors(docID);
+ public TermVectors getTermVectorsReader() {
+ return in.getTermVectorsReader();
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
index 4f60f90..1956364 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
@@ -307,8 +307,21 @@ public abstract class IndexReader implements Closeable {
/**
* Retrieve term vectors for this document, or null if term vectors were not indexed. The returned
* Fields instance acts like a single-document inverted index (the docID will be 0).
+ *
+ * @deprecated Use {@link IndexReader#getTermVectorsReader} instead.
*/
- public abstract Fields getTermVectors(int docID) throws IOException;
+ @Deprecated
+ public final Fields getTermVectors(int docID) throws IOException {
+ TermVectors termVectors = getTermVectorsReader();
+ if (termVectors != null) {
+ return termVectors.get(docID);
+ }
+ return null;
+ }
+ ;
+
+ /** Get TermVectors from this index, or null if term vectors were not indexed. */
+ public abstract TermVectors getTermVectorsReader();
/**
* Retrieve term vector for this document and field, or null if term vectors were not indexed. The
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
index 8413cff..8926946 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
@@ -184,13 +184,18 @@ class MergeReaderWrapper extends LeafReader {
}
@Override
- public Fields getTermVectors(int docID) throws IOException {
- ensureOpen();
- checkBounds(docID);
- if (vectors == null) {
- return null;
- }
- return vectors.get(docID);
+ public TermVectors getTermVectorsReader() {
+ return new TermVectors() {
+ @Override
+ public Fields get(int docID) throws IOException {
+ ensureOpen();
+ checkBounds(docID);
+ if (vectors == null) {
+ return null;
+ }
+ return vectors.get(docID);
+ }
+ };
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
index ff70da2..df73fab 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
@@ -300,21 +300,26 @@ public class ParallelLeafReader extends LeafReader {
}
@Override
- public Fields getTermVectors(int docID) throws IOException {
- ensureOpen();
- ParallelFields fields = null;
- for (Map.Entry<String, LeafReader> ent : tvFieldToReader.entrySet()) {
- String fieldName = ent.getKey();
- Terms vector = ent.getValue().getTermVector(docID, fieldName);
- if (vector != null) {
- if (fields == null) {
- fields = new ParallelFields();
+ public TermVectors getTermVectorsReader() {
+ return new TermVectors() {
+ @Override
+ public Fields get(int doc) throws IOException {
+ ensureOpen();
+ ParallelFields fields = null;
+ for (Map.Entry<String, LeafReader> ent : tvFieldToReader.entrySet()) {
+ String fieldName = ent.getKey();
+ Terms vector = ent.getValue().getTermVector(doc, fieldName);
+ if (vector != null) {
+ if (fields == null) {
+ fields = new ParallelFields();
+ }
+ fields.addField(fieldName, vector);
+ }
}
- fields.addField(fieldName, vector);
- }
- }
- return fields;
+ return fields;
+ }
+ };
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
index 7f03941..3f81147 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
@@ -57,7 +57,7 @@ final class SegmentCoreReaders {
final NormsProducer normsProducer;
final StoredFieldsReader fieldsReaderOrig;
- final TermVectorsReader termVectorsReaderOrig;
+ final TermVectorsReader termVectorsReader;
final PointsReader pointsReader;
final VectorReader vectorReader;
final CompoundDirectory cfsReader;
@@ -80,14 +80,6 @@ final class SegmentCoreReaders {
}
};
- final CloseableThreadLocal<TermVectorsReader> termVectorsLocal =
- new CloseableThreadLocal<TermVectorsReader>() {
- @Override
- protected TermVectorsReader initialValue() {
- return (termVectorsReaderOrig == null) ? null : termVectorsReaderOrig.clone();
- }
- };
-
private final Set<IndexReader.ClosedListener> coreClosedListeners =
Collections.synchronizedSet(new LinkedHashSet<IndexReader.ClosedListener>());
@@ -134,13 +126,13 @@ final class SegmentCoreReaders {
.fieldsReader(cfsDir, si.info, coreFieldInfos, context);
if (coreFieldInfos.hasVectors()) { // open term vector files only as needed
- termVectorsReaderOrig =
+ termVectorsReader =
si.info
.getCodec()
.termVectorsFormat()
.vectorsReader(cfsDir, si.info, coreFieldInfos, context);
} else {
- termVectorsReaderOrig = null;
+ termVectorsReader = null;
}
if (coreFieldInfos.hasPointValues()) {
@@ -186,10 +178,9 @@ final class SegmentCoreReaders {
if (ref.decrementAndGet() == 0) {
try (Closeable finalizer = this::notifyCoreClosedListeners) {
IOUtils.close(
- termVectorsLocal,
fieldsReaderLocal,
fields,
- termVectorsReaderOrig,
+ termVectorsReader,
fieldsReaderOrig,
cfsReader,
normsProducer,
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
index 0e98e8b..d2d0a6d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
@@ -243,12 +243,6 @@ public final class SegmentReader extends CodecReader {
}
@Override
- public TermVectorsReader getTermVectorsReader() {
- ensureOpen();
- return core.termVectorsLocal.get();
- }
-
- @Override
public StoredFieldsReader getFieldsReader() {
ensureOpen();
return core.fieldsReaderLocal.get();
@@ -311,6 +305,16 @@ public final class SegmentReader extends CodecReader {
private final Set<ClosedListener> readerClosedListeners = new CopyOnWriteArraySet<>();
@Override
+ public TermVectorsReader getTermVectorsReader() {
+ ensureOpen();
+ if (core.termVectorsReader != null) {
+ return core.termVectorsReader.clone();
+ } else {
+ return null;
+ }
+ }
+
+ @Override
void notifyReaderClosedListeners() throws IOException {
synchronized (readerClosedListeners) {
IOUtils.applyToAll(readerClosedListeners, l -> l.onClose(readerCacheHelper.getKey()));
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/index/TermVectors.java
similarity index 56%
copy from lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
copy to lucene/core/src/java/org/apache/lucene/index/TermVectors.java
index 257621a..4bb69ab 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermVectors.java
@@ -14,22 +14,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs;
+package org.apache.lucene.index;
-import java.io.Closeable;
import java.io.IOException;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; // javadocs
-import org.apache.lucene.index.Fields;
-
-/**
- * Codec API for reading term vectors:
- *
- * @lucene.experimental
- */
-public abstract class TermVectorsReader implements Cloneable, Closeable {
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+/** Index API to access TermVectors */
+public abstract class TermVectors {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
- protected TermVectorsReader() {}
+ protected TermVectors() {}
/**
* Returns term vectors for this document, or null if term vectors were not indexed. If offsets
@@ -37,28 +30,4 @@ public abstract class TermVectorsReader implements Cloneable, Closeable {
* org.apache.lucene.index.PostingsEnum}.
*/
public abstract Fields get(int doc) throws IOException;
-
- /**
- * Checks consistency of this reader.
- *
- * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
- * against large data files.
- *
- * @lucene.internal
- */
- public abstract void checkIntegrity() throws IOException;
-
- /** Create a clone that one caller at a time may use to read term vectors. */
- @Override
- public abstract TermVectorsReader clone();
-
- /**
- * Returns an instance optimized for merging. This instance may only be consumed in the thread
- * that called {@link #getMergeInstance()}.
- *
- * <p>The default implementation returns {@code this}
- */
- public TermVectorsReader getMergeInstance() {
- return this;
- }
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestExitableDirectoryReader.java b/lucene/core/src/test/org/apache/lucene/index/TestExitableDirectoryReader.java
index 195d65e..595ba6a 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestExitableDirectoryReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestExitableDirectoryReader.java
@@ -19,7 +19,14 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.*;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.ExitableDirectoryReader.ExitingReaderException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFilterCodecReader.java b/lucene/core/src/test/org/apache/lucene/index/TestFilterCodecReader.java
index 5e0563c..4a959b4 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFilterCodecReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFilterCodecReader.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
+import java.util.Objects;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -56,10 +57,10 @@ public class TestFilterCodecReader extends LuceneTestCase {
final Method subClassMethod =
subClass.getDeclaredMethod(
superClassMethod.getName(), superClassMethod.getParameterTypes());
- assertEquals(
- "getReturnType() difference",
- superClassMethod.getReturnType(),
- subClassMethod.getReturnType());
+ assertTrue(
+ "getReturnType() difference and not compatible",
+ isTypeEqualOrAssignable(
+ superClassMethod.getReturnType(), subClassMethod.getReturnType()));
} catch (
@SuppressWarnings("unused")
NoSuchMethodException e) {
@@ -67,4 +68,8 @@ public class TestFilterCodecReader extends LuceneTestCase {
}
}
}
+
+ private boolean isTypeEqualOrAssignable(Class<?> superClass, Class<?> subClass) {
+ return Objects.equals(subClass, superClass) || superClass.isAssignableFrom(subClass);
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentToThreadMapping.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentToThreadMapping.java
index 54b9e5f..46f6dac 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentToThreadMapping.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentToThreadMapping.java
@@ -24,6 +24,7 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@@ -66,7 +67,7 @@ public class TestSegmentToThreadMapping extends LuceneTestCase {
}
@Override
- public Fields getTermVectors(int doc) {
+ public TermVectorsReader getTermVectorsReader() {
return null;
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java b/lucene/core/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
index dfd3d2b..da57136 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
@@ -18,11 +18,15 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.*;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
@@ -123,11 +127,13 @@ public class TestMultiThreadTermVectors extends LuceneTestCase {
private void testTermVectors() throws Exception {
// check:
int numDocs = reader.numDocs();
+ TermVectors termVectors = reader.getTermVectorsReader();
for (int docId = 0; docId < numDocs; docId++) {
- Fields vectors = reader.getTermVectors(docId);
+ // reader is StandardDirectoryReader, method impl from BaseCompositeReader
+ Fields vectors = termVectors.get(docId);
// verify vectors result
verifyVectors(vectors, docId);
- Terms vector = reader.getTermVectors(docId).terms("field");
+ Terms vector = termVectors.get(docId).terms("field");
verifyVector(vector.iterator(), docId);
}
}
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
index cc1470a..c7f898c 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
@@ -33,6 +33,7 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.search.TopDocs;
@@ -168,11 +169,16 @@ public class TermVectorLeafReader extends LeafReader {
public void checkIntegrity() throws IOException {}
@Override
- public Fields getTermVectors(int docID) throws IOException {
- if (docID != 0) {
- return null;
- }
- return fields;
+ public TermVectors getTermVectorsReader() {
+ return new TermVectors() {
+ @Override
+ public Fields get(int docID) {
+ if (docID != 0) {
+ return null;
+ }
+ return fields;
+ }
+ };
}
@Override
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
index ae9405b..b1583c7 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
@@ -17,7 +17,9 @@
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
/**
@@ -35,11 +37,21 @@ public class PostingsWithTermVectorsOffsetStrategy extends FieldOffsetStrategy {
@Override
public OffsetsEnum getOffsetsEnum(LeafReader leafReader, int docId, String content)
throws IOException {
- Terms docTerms = leafReader.getTermVector(docId, getField());
- if (docTerms == null) {
+ Terms tvTerms = null;
+
+ TermVectors termVectors = leafReader.getTermVectorsReader();
+ if (termVectors != null) {
+ Fields vectors = termVectors.get(docId);
+ if (vectors != null) {
+ tvTerms = vectors.terms(getField());
+ }
+ }
+
+ if (tvTerms == null) {
return OffsetsEnum.EMPTY;
}
- leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms, getField());
+
+ leafReader = new TermVectorFilteredLeafReader(leafReader, tvTerms, getField());
return createOffsetsEnumFromReader(leafReader, docId);
}
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
index 84c0120..76c0db3 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
@@ -17,7 +17,9 @@
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.highlight.TermVectorLeafReader;
@@ -40,7 +42,16 @@ public class TermVectorOffsetStrategy extends FieldOffsetStrategy {
@Override
public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content)
throws IOException {
- Terms tvTerms = reader.getTermVector(docId, getField());
+ Terms tvTerms = null;
+
+ TermVectors termVectors = reader.getTermVectorsReader();
+ if (termVectors != null) {
+ Fields vectors = termVectors.get(docId);
+ if (vectors != null) {
+ tvTerms = vectors.terms(getField());
+ }
+ }
+
if (tvTerms == null) {
return OffsetsEnum.EMPTY;
}
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index b84cd02..c71d186 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -48,6 +48,7 @@ import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermVectors;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
@@ -1102,11 +1103,10 @@ public class UnifiedHighlighter {
return this.values;
}
}
-
/**
- * Wraps an IndexReader that remembers/caches the last call to {@link
- * LeafReader#getTermVectors(int)} so that if the next call has the same ID, then it is reused. If
- * TV's were column-stride (like doc-values), there would be no need for this.
+ * Wraps an IndexReader that remembers/caches the last call to {@link TermVectors#get(int)} so
+ * that if the next call has the same ID, then it is reused. If TV's were column-stride (like
+ * doc-values), there would be no need for this.
*/
private static class TermVectorReusingLeafReader extends FilterLeafReader {
@@ -1136,12 +1136,21 @@ public class UnifiedHighlighter {
}
@Override
- public Fields getTermVectors(int docID) throws IOException {
- if (docID != lastDocId) {
- lastDocId = docID;
- tvFields = in.getTermVectors(docID);
+ public TermVectors getTermVectorsReader() {
+ if (in.getTermVectorsReader() == null) {
+ return null;
}
- return tvFields;
+
+ return new TermVectors() {
+ @Override
+ public Fields get(int docID) throws IOException {
+ if (docID != lastDocId) {
+ lastDocId = docID;
+ tvFields = in.getTermVectorsReader().get(docID);
+ }
+ return tvFields;
+ }
+ };
}
@Override
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java
index 002855e..fbe80de 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java
@@ -40,6 +40,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.ParallelLeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermVectors;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -133,20 +134,23 @@ public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
@Override
public LeafReader wrap(LeafReader reader) {
return new FilterLeafReader(reader) {
- BitSet seenDocIDs = new BitSet();
-
@Override
- public Fields getTermVectors(int docID) throws IOException {
- // if we're invoked by ParallelLeafReader then we can't do our assertion. TODO see
- // LUCENE-6868
- if (callStackContains(ParallelLeafReader.class) == false
- && callStackContains(CheckIndex.class) == false) {
- assertFalse(
- "Should not request TVs for doc more than once.", seenDocIDs.get(docID));
- seenDocIDs.set(docID);
- }
-
- return super.getTermVectors(docID);
+ public TermVectors getTermVectorsReader() {
+ BitSet seenDocIDs = new BitSet();
+ return new TermVectors() {
+ @Override
+ public Fields get(int docID) throws IOException {
+ // if we're invoked by ParallelLeafReader then we can't do our assertion. TODO
+ // see LUCENE-6868
+ if (callStackContains(ParallelLeafReader.class) == false
+ && callStackContains(CheckIndex.class) == false) {
+ assertFalse(
+ "Should not request TVs for doc more than once.", seenDocIDs.get(docID));
+ seenDocIDs.set(docID);
+ }
+ return reader.getTermVectorsReader().get(docID);
+ }
+ };
}
@Override
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 660ecd8..f22bc8f 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -1235,6 +1235,20 @@ public class MemoryIndex {
fieldInfos = new FieldInfos(fieldInfosArr);
}
+ @Override
+ public TermVectors getTermVectorsReader() {
+ return new TermVectors() {
+ @Override
+ public Fields get(int docID) {
+ if (docID == 0) {
+ return memoryFields;
+ } else {
+ return null;
+ }
+ }
+ };
+ }
+
private Info getInfoForExpectedDocValuesType(String fieldName, DocValuesType expectedType) {
if (expectedType == DocValuesType.NONE) {
return null;
@@ -1723,15 +1737,6 @@ public class MemoryIndex {
}
@Override
- public Fields getTermVectors(int docID) {
- if (docID == 0) {
- return memoryFields;
- } else {
- return null;
- }
- }
-
- @Override
public int numDocs() {
if (DEBUG) System.err.println("MemoryIndexReader.numDocs");
return 1;
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
index 636603f..7f6f7a3 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
@@ -86,12 +86,6 @@ public class AssertingLeafReader extends FilterLeafReader {
return terms == null ? null : new AssertingTerms(terms);
}
- @Override
- public Fields getTermVectors(int docID) throws IOException {
- Fields fields = super.getTermVectors(docID);
- return fields == null ? null : new AssertingFields(fields);
- }
-
/** Wraps a Fields but with additional asserts */
public static class AssertingFields extends FilterFields {
public AssertingFields(Fields in) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java
index 57c92e8..1161b7d 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java
@@ -56,15 +56,20 @@ public final class FieldFilterLeafReader extends FilterLeafReader {
}
@Override
- public Fields getTermVectors(int docID) throws IOException {
- Fields f = super.getTermVectors(docID);
- if (f == null) {
- return null;
- }
- f = new FieldFilterFields(f);
- // we need to check for emptyness, so we can return
- // null:
- return f.iterator().hasNext() ? f : null;
+ public TermVectors getTermVectorsReader() {
+ return new TermVectors() {
+ @Override
+ public Fields get(int docID) throws IOException {
+ Fields f = in.getTermVectorsReader().get(docID);
+ if (f == null) {
+ return null;
+ }
+ f = new FieldFilterFields(f);
+ // we need to check for emptyness, so we can return
+ // null:
+ return f.iterator().hasNext() ? f : null;
+ }
+ };
}
@Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
index cbeb9b3..b74066d 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@@ -23,9 +23,9 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.List;
import java.util.Random;
+import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafMetaData;
import org.apache.lucene.index.LeafReader;
@@ -241,7 +241,7 @@ public class QueryUtils {
public void checkIntegrity() throws IOException {}
@Override
- public Fields getTermVectors(int docID) throws IOException {
+ public TermVectorsReader getTermVectorsReader() {
return null;
}