You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2019/02/15 16:33:02 UTC
[lucene-solr] branch master updated: LUCENE-8292: Make TermsEnum
fully abstract (#574)
This is an automated email from the ASF dual-hosted git repository.
simonw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 4a513fa LUCENE-8292: Make TermsEnum fully abstract (#574)
4a513fa is described below
commit 4a513fa99f638cb65e0cae59bfdf7af410c0327a
Author: Simon Willnauer <si...@apache.org>
AuthorDate: Fri Feb 15 17:32:55 2019 +0100
LUCENE-8292: Make TermsEnum fully abstract (#574)
---
lucene/CHANGES.txt | 2 +
lucene/MIGRATE.txt | 10 +--
.../codecs/lucene70/Lucene70DocValuesProducer.java | 8 +--
.../lucene/codecs/blockterms/BlockTermsReader.java | 8 +--
.../blocktreeords/OrdsIntersectTermsEnum.java | 4 +-
.../codecs/blocktreeords/OrdsSegmentTermsEnum.java | 4 +-
.../codecs/bloom/BloomFilteringPostingsFormat.java | 3 +-
.../lucene/codecs/memory/DirectPostingsFormat.java | 9 +--
.../lucene/codecs/memory/FSTOrdTermsReader.java | 7 +-
.../lucene/codecs/memory/FSTTermsReader.java | 7 +-
.../codecs/simpletext/SimpleTextFieldsReader.java | 3 +-
.../simpletext/SimpleTextTermVectorsReader.java | 8 +--
.../codecs/blocktree/IntersectTermsEnum.java | 4 +-
.../lucene/codecs/blocktree/SegmentTermsEnum.java | 4 +-
.../compressing/CompressingTermVectorsReader.java | 8 +--
.../codecs/lucene80/Lucene80DocValuesProducer.java | 8 +--
.../org/apache/lucene/index/BaseTermsEnum.java | 75 ++++++++++++++++++++++
.../org/apache/lucene/index/FilterLeafReader.java | 10 +++
.../org/apache/lucene/index/FreqProxFields.java | 7 +-
.../org/apache/lucene/index/MultiTermsEnum.java | 2 +-
.../lucene/index/SortedDocValuesTermsEnum.java | 2 +-
.../lucene/index/SortedSetDocValuesTermsEnum.java | 2 +-
.../java/org/apache/lucene/index/TermsEnum.java | 37 ++---------
.../org/apache/lucene/search/FuzzyTermsEnum.java | 3 +-
.../test/org/apache/lucene/index/TestCodecs.java | 7 +-
.../apache/lucene/index/memory/MemoryIndex.java | 2 +-
.../idversion/IDVersionSegmentTermsEnum.java | 4 +-
.../codecs/ramonly/RAMOnlyPostingsFormat.java | 8 +--
.../apache/lucene/index/RandomPostingsTester.java | 7 +-
.../java/org/apache/solr/query/SolrRangeQuery.java | 3 +-
.../org/apache/solr/uninverting/DocTermOrds.java | 8 +--
31 files changed, 142 insertions(+), 132 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 76cf703..f7f6009 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -133,6 +133,8 @@ API Changes
* LUCENE-8609: Remove IndexWriter#numDocs() and IndexWriter#maxDoc() in favor
of IndexWriter#getDocStats(). (Simon Willnauer)
+* LUCENE-8292: Make TermsEnum fully abstract. (Simon Willnauer)
+
Changes in Runtime Behavior
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index 16e8101..4e4a16a 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -1,11 +1,11 @@
# Apache Lucene Migration Guide
-## TermsEnum.seekExact(BytesRef) is abstract (LUCENE-8662) ##
+## TermsEnum is now fully abstract (LUCENE-8292) ##
-TermsEnum.seekExact has been changed to abstract, so non-abstract subclass must implement it.
-The default implementation can be seekCeil(text) == SeekStatus.FOUND.
-This method is performance critical, so subclass SHOULD have its own implementation
-if possible instead of using the default implementation.
+TermsEnum has been changed to be fully abstract, so non-abstract subclass must implement all it's methods.
+Non-Performance critical TermsEnums can use BaseTermsEnum as a base class instead. The change was motivated
+by several performance issues with FilterTermsEnum that caused significant slowdowns and massive memory consumption due
+to not delegating all method from TermsEnum. See LUCENE-8292 and LUCENE-8662
## RAMDirectory, RAMFile, RAMInputStream, RAMOutputStream removed ##
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
index f5d5780..113a957 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
@@ -23,6 +23,7 @@ import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
@@ -984,7 +985,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
}
}
- private static class TermsDict extends TermsEnum {
+ private static class TermsDict extends BaseTermsEnum {
final TermsDictEntry entry;
final LongValues blockAddresses;
@@ -1032,11 +1033,6 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) throws IOException {
if (ord < 0 || ord >= entry.termsDictSize) {
throw new IndexOutOfBoundsException();
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
index 9ac0e34..964f616 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
@@ -29,6 +29,7 @@ import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.ImpactsEnum;
@@ -286,7 +287,7 @@ public class BlockTermsReader extends FieldsProducer {
}
// Iterates through terms in this field
- private final class SegmentTermsEnum extends TermsEnum {
+ private final class SegmentTermsEnum extends BaseTermsEnum {
private final IndexInput in;
private final BlockTermState state;
private final boolean doOrd;
@@ -686,11 +687,6 @@ public class BlockTermsReader extends FieldsProducer {
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) throws IOException {
//System.out.println("BTR.seek by ord ord=" + ord);
if (indexEnum == null) {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
index a892549..9434ca8 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
@@ -20,10 +20,10 @@ package org.apache.lucene.codecs.blocktreeords;
import java.io.IOException;
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -34,7 +34,7 @@ import org.apache.lucene.util.automaton.RunAutomaton;
import org.apache.lucene.util.fst.FST;
// NOTE: cannot seek!
-final class OrdsIntersectTermsEnum extends TermsEnum {
+final class OrdsIntersectTermsEnum extends BaseTermsEnum {
final IndexInput in;
private OrdsIntersectTermsEnumFrame[] stack;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java
index bd67adc..f1d930e 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java
@@ -25,10 +25,10 @@ import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
/** Iterates through terms in this field. */
-public final class OrdsSegmentTermsEnum extends TermsEnum {
+public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// Lazy init:
IndexInput in;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
index 28febf3..b9a2399 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
@@ -32,6 +32,7 @@ import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
@@ -291,7 +292,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
}
}
- static final class BloomFilteredTermsEnum extends TermsEnum {
+ static final class BloomFilteredTermsEnum extends BaseTermsEnum {
private Terms delegateTerms;
private TermsEnum delegateTermsEnum;
private final FuzzySet filter;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
index 50ab83d..d9590e1 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@@ -27,6 +27,7 @@ import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
@@ -699,7 +700,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
return hasPayloads;
}
- private final class DirectTermsEnum extends TermsEnum {
+ private final class DirectTermsEnum extends BaseTermsEnum {
private final BytesRef scratch = new BytesRef();
private int termOrd;
@@ -944,7 +945,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
}
- private final class DirectIntersectTermsEnum extends TermsEnum {
+ private final class DirectIntersectTermsEnum extends BaseTermsEnum {
private final RunAutomaton runAutomaton;
private final CompiledAutomaton compiledAutomaton;
private int termOrd;
@@ -1508,10 +1509,6 @@ public final class DirectPostingsFormat extends PostingsFormat {
throw new UnsupportedOperationException();
}
- @Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
}
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
index 12f51f9..12110d9 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
@@ -305,7 +305,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
// Only wraps common operations for PBF interact
- abstract class BaseTermsEnum extends TermsEnum {
+ abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
/* Current term's ord, starts from 0 */
long ord;
@@ -627,11 +627,6 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public SeekStatus seekCeil(BytesRef target) throws IOException {
throw new UnsupportedOperationException();
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
index a8cd0ff..43528ce 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@@ -259,7 +259,7 @@ public class FSTTermsReader extends FieldsProducer {
}
// Only wraps common operations for PBF interact
- abstract class BaseTermsEnum extends TermsEnum {
+ abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
/* Current term stats + decoded metadata (customized by PBF) */
final BlockTermState state;
@@ -520,11 +520,6 @@ public class FSTTermsReader extends FieldsProducer {
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public SeekStatus seekCeil(BytesRef target) throws IOException {
decoded = false;
doSeekCeil(target);
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
index 743dc4f..1dec0c8 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
@@ -27,6 +27,7 @@ import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
@@ -111,7 +112,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
}
- private class SimpleTextTermsEnum extends TermsEnum {
+ private class SimpleTextTermsEnum extends BaseTermsEnum {
private final IndexOptions indexOptions;
private int docFreq;
private long totalTermFreq;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
index ecbf5cb..d859a05 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
@@ -25,6 +25,7 @@ import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
@@ -338,7 +339,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
private BytesRef payloads[];
}
- private static class SimpleTVTermsEnum extends TermsEnum {
+ private static class SimpleTVTermsEnum extends BaseTermsEnum {
SortedMap<BytesRef,SimpleTVPostings> terms;
Iterator<Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings>> iterator;
Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings> current;
@@ -359,11 +360,6 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
index 934b5f6..848bb0b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
@@ -19,11 +19,11 @@ package org.apache.lucene.codecs.blocktree;
import java.io.IOException;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -44,7 +44,7 @@ import org.apache.lucene.util.fst.Outputs;
* Likewise, in next it scans until it finds a term that matches the
* current automaton transition. */
-final class IntersectTermsEnum extends TermsEnum {
+final class IntersectTermsEnum extends BaseTermsEnum {
//static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
index 8e01275..c9d0ddf 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
@@ -21,10 +21,10 @@ import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -36,7 +36,7 @@ import org.apache.lucene.util.fst.Util;
/** Iterates through terms in this field. */
-final class SegmentTermsEnum extends TermsEnum {
+final class SegmentTermsEnum extends BaseTermsEnum {
// Lazy init:
IndexInput in;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
index 5dd3a64..866899c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
@@ -26,6 +26,7 @@ import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
@@ -825,7 +826,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
}
- private static class TVTermsEnum extends TermsEnum {
+ private static class TVTermsEnum extends BaseTermsEnum {
private int numTerms, startPos, ord;
private int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
@@ -907,11 +908,6 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java
index bbef30a..f660cbc 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java
@@ -23,6 +23,7 @@ import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
@@ -926,7 +927,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
}
}
- private static class TermsDict extends TermsEnum {
+ private static class TermsDict extends BaseTermsEnum {
final TermsDictEntry entry;
final LongValues blockAddresses;
@@ -974,11 +975,6 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) throws IOException {
if (ord < 0 || ord >= entry.termsDictSize) {
throw new IndexOutOfBoundsException();
diff --git a/lucene/core/src/java/org/apache/lucene/index/BaseTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/BaseTermsEnum.java
new file mode 100644
index 0000000..0b0d094
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/BaseTermsEnum.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A base TermsEnum that adds default implementations for
+ * <ul>
+ * <li>{@link #attributes()}</li>
+ * <li>{@link #termState()}</li>
+ * <li>{@link #seekExact(BytesRef)}</li>
+ * <li>{@link #seekExact(BytesRef, TermState)}</li>
+ * </ul>
+ *
+ * In some cases, the default implementation may be slow and consume huge memory, so subclass SHOULD have its own
+ * implementation if possible.
+ */
+public abstract class BaseTermsEnum extends TermsEnum {
+
+ private AttributeSource atts = null;
+
+ /** Sole constructor. (For invocation by subclass
+ * constructors, typically implicit.) */
+ protected BaseTermsEnum() {
+ super();
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ return new TermState() {
+ @Override
+ public void copyFrom(TermState other) {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+
+ @Override
+ public boolean seekExact(BytesRef text) throws IOException {
+ return seekCeil(text) == SeekStatus.FOUND;
+ }
+
+ @Override
+ public void seekExact(BytesRef term, TermState state) throws IOException {
+ if (!seekExact(term)) {
+ throw new IllegalArgumentException("term=" + term + " does not exist");
+ }
+ }
+
+ public AttributeSource attributes() {
+ if (atts == null) {
+ atts = new AttributeSource();
+ }
+ return atts;
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
index 5d12766..1d26d17 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
@@ -223,6 +223,16 @@ public abstract class FilterLeafReader extends LeafReader {
public ImpactsEnum impacts(int flags) throws IOException {
return in.impacts(flags);
}
+
+ @Override
+ public void seekExact(BytesRef term, TermState state) throws IOException {
+ in.seekExact(term, state);
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ return in.termState();
+ }
}
/** Base class for filtering {@link PostingsEnum} implementations. */
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
index b1615c3..4ec9fd5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
@@ -124,7 +124,7 @@ class FreqProxFields extends Fields {
}
}
- private static class FreqProxTermsEnum extends TermsEnum {
+ private static class FreqProxTermsEnum extends BaseTermsEnum {
final FreqProxTermsWriterPerField terms;
final int[] sortedTermIDs;
final FreqProxPostingsArray postingsArray;
@@ -180,11 +180,6 @@ class FreqProxFields extends Fields {
}
}
- @Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
public void seekExact(long ord) {
this.ord = (int) ord;
int textStart = postingsArray.textStarts[sortedTermIDs[this.ord]];
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
index d20c6c1..57eb42f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
@@ -32,7 +32,7 @@ import org.apache.lucene.util.PriorityQueue;
*
* @lucene.experimental
*/
-public final class MultiTermsEnum extends TermsEnum {
+public final class MultiTermsEnum extends BaseTermsEnum {
private static final Comparator<TermsEnumWithSlice> INDEX_COMPARATOR = new Comparator<TermsEnumWithSlice>() {
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
index 5fe9a0d..2a06de6 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
@@ -25,7 +25,7 @@ import org.apache.lucene.util.BytesRefBuilder;
/** Implements a {@link TermsEnum} wrapping a provided
* {@link SortedDocValues}. */
-class SortedDocValuesTermsEnum extends TermsEnum {
+class SortedDocValuesTermsEnum extends BaseTermsEnum {
private final SortedDocValues values;
private int currentOrd = -1;
private final BytesRefBuilder scratch;
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
index bbeb5c2..a07af60 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
@@ -25,7 +25,7 @@ import java.io.IOException;
/** Implements a {@link TermsEnum} wrapping a provided
* {@link SortedSetDocValues}. */
-class SortedSetDocValuesTermsEnum extends TermsEnum {
+class SortedSetDocValuesTermsEnum extends BaseTermsEnum {
private final SortedSetDocValues values;
private long currentOrd = -1;
private final BytesRefBuilder scratch;
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
index 7750bbb..2cca1df 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
@@ -42,21 +42,16 @@ import org.apache.lucene.util.BytesRefIterator;
* @lucene.experimental */
public abstract class TermsEnum implements BytesRefIterator {
- private AttributeSource atts = null;
-
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected TermsEnum() {
}
/** Returns the related attributes. */
- public AttributeSource attributes() {
- if (atts == null) atts = new AttributeSource();
- return atts;
- }
+ public abstract AttributeSource attributes();
/** Represents returned result from {@link #seekCeil}. */
- public static enum SeekStatus {
+ public enum SeekStatus {
/** The term was not found, and the end of iteration was hit. */
END,
/** The precise term was found. */
@@ -70,15 +65,11 @@ public abstract class TermsEnum implements BytesRefIterator {
* unpositioned. For some codecs, seekExact may be substantially faster than {@link #seekCeil}.
* <p>
*
- * The default implementation can be <code>seekCeil(text) == SeekStatus.FOUND; </code><br>
- * But this method is performance critical. In some cases, the default implementation may be slow and consume huge memory,
- * so subclass SHOULD have its own implementation if possible.
- *
+ *
* @return true if the term is found; return false if the enum is unpositioned.
*/
public abstract boolean seekExact(BytesRef text) throws IOException;
-
/** Seeks to the specified term, if it exists, or to the
* next (ceiling) term. Returns SeekStatus to
* indicate whether exact term was found, a different
@@ -114,11 +105,7 @@ public abstract class TermsEnum implements BytesRefIterator {
* @param term the term the TermState corresponds to
* @param state the {@link TermState}
* */
- public void seekExact(BytesRef term, TermState state) throws IOException {
- if (!seekExact(term)) {
- throw new IllegalArgumentException("term=" + term + " does not exist");
- }
- }
+ public abstract void seekExact(BytesRef term, TermState state) throws IOException;
/** Returns current term. Do not call this when the enum
* is unpositioned. */
@@ -192,14 +179,7 @@ public abstract class TermsEnum implements BytesRefIterator {
* @see TermState
* @see #seekExact(BytesRef, TermState)
*/
- public TermState termState() throws IOException {
- return new TermState() {
- @Override
- public void copyFrom(TermState other) {
- throw new UnsupportedOperationException();
- }
- };
- }
+ public abstract TermState termState() throws IOException;
/** An empty TermsEnum for quickly returning an empty instance e.g.
* in {@link org.apache.lucene.search.MultiTermQuery}
@@ -208,16 +188,11 @@ public abstract class TermsEnum implements BytesRefIterator {
* This should not be a problem, as the enum is always empty and
* the existence of unused Attributes does not matter.
*/
- public static final TermsEnum EMPTY = new TermsEnum() {
+ public static final TermsEnum EMPTY = new BaseTermsEnum() {
@Override
public SeekStatus seekCeil(BytesRef term) { return SeekStatus.END; }
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) {}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
index 375d3c2..a6d56e7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@@ -17,6 +17,7 @@
package org.apache.lucene.search;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
@@ -44,7 +45,7 @@ import java.util.Arrays;
* {@link BytesRef#compareTo}. Each term in the enumeration is
* greater than all that precede it.</p>
*/
-public final class FuzzyTermsEnum extends TermsEnum {
+public final class FuzzyTermsEnum extends BaseTermsEnum {
// NOTE: we can't subclass FilteredTermsEnum here because we need to sometimes change actualEnum:
private TermsEnum actualEnum;
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
index 6955d65..6094ce6 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
@@ -613,7 +613,7 @@ public class TestCodecs extends LuceneTestCase {
}
}
- private static class DataTermsEnum extends TermsEnum {
+ private static class DataTermsEnum extends BaseTermsEnum {
final FieldData fieldData;
private int upto = -1;
@@ -654,11 +654,6 @@ public class TestCodecs extends LuceneTestCase {
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) {
throw new UnsupportedOperationException();
}
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index d0dae23..4337276 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -1333,7 +1333,7 @@ public class MemoryIndex {
}
}
- private class MemoryTermsEnum extends TermsEnum {
+ private class MemoryTermsEnum extends BaseTermsEnum {
private final Info info;
private final BytesRef br = new BytesRef();
int termUpto = -1;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
index 1f1b7ec..7f43b3f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
@@ -20,11 +20,11 @@ import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -39,7 +39,7 @@ import org.apache.lucene.util.fst.Util;
* can cast it to call {@link #seekExact(BytesRef, long)} for
* optimistic-concurrency, and also {@link #getVersion} to get the
* version of the currently seek'd term. */
-public final class IDVersionSegmentTermsEnum extends TermsEnum {
+public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// Lazy init:
IndexInput in;
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
index e519ce0..0e46b3b 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
@@ -34,6 +34,7 @@ import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
@@ -403,7 +404,7 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
}
}
- static class RAMTermsEnum extends TermsEnum {
+ static class RAMTermsEnum extends BaseTermsEnum {
Iterator<String> it;
String current;
private final RAMField ramField;
@@ -445,11 +446,6 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) {
throw new UnsupportedOperationException();
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
index e4f1e4c..d5cd53c 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
@@ -519,7 +519,7 @@ public class RandomPostingsTester {
}
}
- private static class SeedTermsEnum extends TermsEnum {
+ private static class SeedTermsEnum extends BaseTermsEnum {
final SortedMap<BytesRef,SeedAndOrd> terms;
final IndexOptions maxAllowed;
final boolean allowPayloads;
@@ -565,11 +565,6 @@ public class RandomPostingsTester {
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long ord) {
throw new UnsupportedOperationException();
}
diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
index 21ccf03..be58bbc 100644
--- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
@@ -176,7 +177,7 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
}
- private class RangeTermsEnum extends TermsEnum {
+ private class RangeTermsEnum extends BaseTermsEnum {
TermsEnum te;
BytesRef curr;
diff --git a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
index 4ce085c..8b5cd5c 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
@@ -23,6 +23,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@@ -589,7 +590,7 @@ public class DocTermOrds implements Accountable {
* "wrap" our own terms index around the original IndexReader.
* Only valid if there are terms for this field rom the original reader
*/
- private final class OrdWrappedTermsEnum extends TermsEnum {
+ private final class OrdWrappedTermsEnum extends BaseTermsEnum {
private final TermsEnum termsEnum;
private BytesRef term;
private long ord = -indexInterval-1; // force "real" seek
@@ -704,11 +705,6 @@ public class DocTermOrds implements Accountable {
}
@Override
- public boolean seekExact(BytesRef text) throws IOException {
- return seekCeil(text) == SeekStatus.FOUND;
- }
-
- @Override
public void seekExact(long targetOrd) throws IOException {
int delta = (int) (targetOrd - ordBase - ord);
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);