You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2020/12/23 11:44:33 UTC
[lucene-solr] 04/11: :lucene:core - src/**
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch jira/LUCENE-9570
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 5bbe1f01811efbc11d984bcbddda47fe84febaac
Author: Dawid Weiss <da...@carrotsearch.com>
AuthorDate: Wed Dec 23 11:17:15 2020 +0100
:lucene:core - src/**
---
gradle/validation/spotless.gradle | 18 +-
.../lucene50/Lucene50SkipWriter.java | 3 +-
.../org/apache/lucene/codecs/BlockTermState.java | 20 +-
.../src/java/org/apache/lucene/codecs/Codec.java | 104 +-
.../java/org/apache/lucene/codecs/CodecUtil.java | 504 +++++----
.../codecs/CompetitiveImpactAccumulator.java | 41 +-
.../apache/lucene/codecs/CompoundDirectory.java | 35 +-
.../org/apache/lucene/codecs/CompoundFormat.java | 18 +-
.../apache/lucene/codecs/DocValuesConsumer.java | 1035 ++++++++---------
.../org/apache/lucene/codecs/DocValuesFormat.java | 106 +-
.../apache/lucene/codecs/DocValuesProducer.java | 74 +-
.../org/apache/lucene/codecs/FieldInfosFormat.java | 26 +-
.../org/apache/lucene/codecs/FieldsConsumer.java | 78 +-
.../org/apache/lucene/codecs/FieldsProducer.java | 37 +-
.../java/org/apache/lucene/codecs/FilterCodec.java | 28 +-
.../org/apache/lucene/codecs/LiveDocsFormat.java | 31 +-
.../lucene/codecs/MultiLevelSkipListReader.java | 123 +-
.../lucene/codecs/MultiLevelSkipListWriter.java | 78 +-
.../apache/lucene/codecs/MutablePointValues.java | 10 +-
.../org/apache/lucene/codecs/NormsConsumer.java | 228 ++--
.../java/org/apache/lucene/codecs/NormsFormat.java | 30 +-
.../org/apache/lucene/codecs/NormsProducer.java | 41 +-
.../org/apache/lucene/codecs/PointsFormat.java | 80 +-
.../org/apache/lucene/codecs/PointsReader.java | 25 +-
.../org/apache/lucene/codecs/PointsWriter.java | 89 +-
.../org/apache/lucene/codecs/PostingsFormat.java | 105 +-
.../apache/lucene/codecs/PostingsReaderBase.java | 63 +-
.../apache/lucene/codecs/PostingsWriterBase.java | 61 +-
.../lucene/codecs/PushPostingsWriterBase.java | 78 +-
.../apache/lucene/codecs/SegmentInfoFormat.java | 25 +-
.../apache/lucene/codecs/StoredFieldsFormat.java | 24 +-
.../apache/lucene/codecs/StoredFieldsReader.java | 38 +-
.../apache/lucene/codecs/StoredFieldsWriter.java | 99 +-
.../java/org/apache/lucene/codecs/TermStats.java | 11 +-
.../apache/lucene/codecs/TermVectorsFormat.java | 25 +-
.../apache/lucene/codecs/TermVectorsReader.java | 48 +-
.../apache/lucene/codecs/TermVectorsWriter.java | 184 +--
.../org/apache/lucene/codecs/VectorFormat.java | 57 +-
.../org/apache/lucene/codecs/VectorReader.java | 22 +-
.../org/apache/lucene/codecs/VectorWriter.java | 49 +-
.../codecs/blocktree/BlockTreeTermsReader.java | 175 ++-
.../codecs/blocktree/BlockTreeTermsWriter.java | 559 +++++----
.../codecs/blocktree/CompressionAlgorithm.java | 15 +-
.../lucene/codecs/blocktree/FieldReader.java | 82 +-
.../codecs/blocktree/IntersectTermsEnum.java | 104 +-
.../codecs/blocktree/IntersectTermsEnumFrame.java | 21 +-
.../lucene/codecs/blocktree/SegmentTermsEnum.java | 462 +++++---
.../codecs/blocktree/SegmentTermsEnumFrame.java | 180 +--
.../org/apache/lucene/codecs/blocktree/Stats.java | 137 ++-
.../lucene/codecs/blocktree/package-info.java | 20 +-
.../compressing/CompressingStoredFieldsFormat.java | 141 ++-
.../compressing/CompressingStoredFieldsReader.java | 267 +++--
.../compressing/CompressingStoredFieldsWriter.java | 323 +++---
.../compressing/CompressingTermVectorsFormat.java | 79 +-
.../compressing/CompressingTermVectorsReader.java | 375 ++++--
.../compressing/CompressingTermVectorsWriter.java | 249 ++--
.../lucene/codecs/compressing/CompressionMode.java | 209 ++--
.../lucene/codecs/compressing/Compressor.java | 13 +-
.../lucene/codecs/compressing/Decompressor.java | 21 +-
.../lucene/codecs/compressing/FieldsIndex.java | 2 -
.../codecs/compressing/FieldsIndexReader.java | 46 +-
.../codecs/compressing/FieldsIndexWriter.java | 45 +-
.../compressing/LegacyFieldsIndexReader.java | 38 +-
.../lucene/codecs/compressing/MatchingReaders.java | 23 +-
.../lucene/codecs/compressing/package-info.java | 6 +-
.../codecs/lucene50/Lucene50CompoundFormat.java | 76 +-
.../codecs/lucene50/Lucene50CompoundReader.java | 101 +-
.../codecs/lucene50/Lucene50LiveDocsFormat.java | 81 +-
.../codecs/lucene50/Lucene50TermVectorsFormat.java | 208 ++--
.../lucene/codecs/lucene50/package-info.java | 5 +-
.../codecs/lucene60/Lucene60FieldInfosFormat.java | 304 ++---
.../lucene/codecs/lucene60/package-info.java | 4 +-
.../apache/lucene/codecs/lucene80/IndexedDISI.java | 369 +++---
.../codecs/lucene80/Lucene80DocValuesConsumer.java | 319 ++++--
.../codecs/lucene80/Lucene80DocValuesFormat.java | 177 +--
.../codecs/lucene80/Lucene80DocValuesProducer.java | 327 ++++--
.../codecs/lucene80/Lucene80NormsConsumer.java | 37 +-
.../codecs/lucene80/Lucene80NormsFormat.java | 95 +-
.../codecs/lucene80/Lucene80NormsProducer.java | 103 +-
.../lucene/codecs/lucene80/package-info.java | 5 +-
.../lucene/codecs/lucene84/ForDeltaUtil.java | 21 +-
.../org/apache/lucene/codecs/lucene84/ForUtil.java | 1197 ++++++++++----------
.../codecs/lucene84/Lucene84PostingsFormat.java | 635 +++++------
.../codecs/lucene84/Lucene84PostingsReader.java | 486 ++++----
.../codecs/lucene84/Lucene84PostingsWriter.java | 128 ++-
.../codecs/lucene84/Lucene84ScoreSkipReader.java | 59 +-
.../lucene/codecs/lucene84/Lucene84SkipReader.java | 66 +-
.../lucene/codecs/lucene84/Lucene84SkipWriter.java | 69 +-
.../apache/lucene/codecs/lucene84/PForUtil.java | 35 +-
.../lucene/codecs/lucene84/package-info.java | 4 +-
.../codecs/lucene86/Lucene86PointsFormat.java | 29 +-
.../codecs/lucene86/Lucene86PointsReader.java | 49 +-
.../codecs/lucene86/Lucene86PointsWriter.java | 156 +--
.../codecs/lucene86/Lucene86SegmentInfoFormat.java | 107 +-
.../lucene/codecs/lucene86/package-info.java | 4 +-
.../codecs/lucene87/BugfixDeflater_JDK8252739.java | 216 ++--
.../DeflateWithPresetDictCompressionMode.java | 32 +-
.../lucene87/LZ4WithPresetDictCompressionMode.java | 15 +-
.../lucene87/Lucene87StoredFieldsFormat.java | 146 +--
.../lucene/codecs/lucene87/package-info.java | 4 +-
.../lucene/codecs/lucene90/Lucene90Codec.java | 83 +-
.../codecs/lucene90/Lucene90FieldInfosFormat.java | 329 +++---
.../codecs/lucene90/Lucene90VectorFormat.java | 5 +-
.../codecs/lucene90/Lucene90VectorReader.java | 126 ++-
.../codecs/lucene90/Lucene90VectorWriter.java | 81 +-
.../lucene/codecs/lucene90/package-info.java | 526 +++++----
.../org/apache/lucene/codecs/package-info.java | 74 +-
.../codecs/perfield/PerFieldDocValuesFormat.java | 124 +-
.../lucene/codecs/perfield/PerFieldMergeState.java | 46 +-
.../codecs/perfield/PerFieldPostingsFormat.java | 155 +--
.../lucene/codecs/perfield/package-info.java | 4 +-
.../lucene/codecs/TestCodecLoadingDeadlock.java | 167 +--
.../org/apache/lucene/codecs/TestCodecUtil.java | 275 +++--
.../codecs/TestCompetitiveFreqNormAccumulator.java | 11 +-
.../compressing/AbstractTestCompressionMode.java | 40 +-
.../compressing/TestFastCompressionMode.java | 1 -
.../compressing/TestFastDecompressionMode.java | 1 -
.../compressing/TestHighCompressionMode.java | 2 -
.../lucene50/TestLucene50CompoundFormat.java | 3 +-
.../lucene50/TestLucene50LiveDocsFormat.java | 1 -
.../lucene50/TestLucene50TermVectorsFormat.java | 1 -
.../lucene50/TestLucene60FieldInfoFormat.java | 5 +-
.../BaseLucene80DocValuesFormatTestCase.java | 176 +--
...TestBestCompressionLucene80DocValuesFormat.java | 12 +-
.../TestBestSpeedLucene80DocValuesFormat.java | 9 +-
.../lucene/codecs/lucene80/TestIndexedDISI.java | 236 ++--
.../codecs/lucene80/TestLucene80NormsFormat.java | 7 +-
.../TestLucene80NormsFormatMergeInstance.java | 5 +-
.../lucene/codecs/lucene84/TestForDeltaUtil.java | 16 +-
.../apache/lucene/codecs/lucene84/TestForUtil.java | 15 +-
.../lucene84/TestLucene84PostingsFormat.java | 23 +-
.../lucene/codecs/lucene84/TestPForUtil.java | 15 +-
.../codecs/lucene86/TestLucene86PointsFormat.java | 240 ++--
.../lucene86/TestLucene86SegmentInfoFormat.java | 3 +-
...tLucene87StoredFieldsFormatHighCompression.java | 31 +-
...estLucene87StoredFieldsFormatMergeInstance.java | 5 +-
.../perfield/TestPerFieldDocValuesFormat.java | 106 +-
.../perfield/TestPerFieldPostingsFormat.java | 6 +-
.../perfield/TestPerFieldPostingsFormat2.java | 154 +--
139 files changed, 9022 insertions(+), 7109 deletions(-)
diff --git a/gradle/validation/spotless.gradle b/gradle/validation/spotless.gradle
index bc8e10f..318a2c9 100644
--- a/gradle/validation/spotless.gradle
+++ b/gradle/validation/spotless.gradle
@@ -39,22 +39,15 @@ allprojects { prj ->
// These modules are complete - all sources scanned.
case ":lucene:highlighter":
target "src/**"
- targetExclude "**/resources/**", "**/CambridgeMA.utf8", "**/overview.html"
+ targetExclude "**/resources/**", "**/overview.html", "**/CambridgeMA.utf8"
break
// Partially complete.
case ":lucene:core":
- target "src/**/org/apache/lucene/*.java",
- "src/**/org/apache/lucene/analysis/**",
- // "src/**/org/apache/lucene/codecs/**",
- "src/**/org/apache/lucene/document/**",
- "src/**/org/apache/lucene/geo/**",
- "src/**/org/apache/lucene/index/**",
- "src/**/org/apache/lucene/search/**",
- "src/**/org/apache/lucene/store/**",
- "src/**/org/apache/lucene/util/**"
-
+ target "src/java/**", "src/test/**"
targetExclude "**/resources/**",
+ "**/overview.html",
+ "**/META-INF/**",
"**/StandardTokenizerImpl.jflex",
"**/StandardTokenizerImpl.java",
"**/createLevAutomata.py",
@@ -63,7 +56,8 @@ allprojects { prj ->
"**/gen_Packed64SingleBlock.py",
"**/makeEuroparlLineFile.py",
"**/wordliststopwords.txt",
- "**/wordliststopwords_nocomment.txt"
+ "**/wordliststopwords_nocomment.txt",
+ "**/gen_ForUtil.py"
break
case ":lucene:analysis:common":
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50SkipWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50SkipWriter.java
index 5316f78..fec3be5 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50SkipWriter.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50SkipWriter.java
@@ -32,10 +32,11 @@ import org.apache.lucene.store.IndexOutput;
* Write skip lists with multiple levels, and support skip within block ints.
*
* Assume that docFreq = 28, skipInterval = blockSize = 12
- *
+ * <pre>
* | block#0 | | block#1 | |vInts|
* d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
* ^ ^ (level 0 skip point)
+ * </pre>
*
* Note that skipWriter will ignore first document in block#0, since
* it is useless as a skip point. Also, we'll never skip into the vInts
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
index c317668..06508e7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
@@ -20,9 +20,8 @@ import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.TermState;
/**
- * Holds all state required for {@link PostingsReaderBase}
- * to produce a {@link org.apache.lucene.index.PostingsEnum} without re-seeking the
- * terms dict.
+ * Holds all state required for {@link PostingsReaderBase} to produce a {@link
+ * org.apache.lucene.index.PostingsEnum} without re-seeking the terms dict.
*
* @lucene.internal
*/
@@ -38,10 +37,8 @@ public class BlockTermState extends OrdTermState {
// TODO: update BTR to nuke this
public long blockFilePointer;
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected BlockTermState() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected BlockTermState() {}
@Override
public void copyFrom(TermState _other) {
@@ -56,6 +53,13 @@ public class BlockTermState extends OrdTermState {
@Override
public String toString() {
- return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer;
+ return "docFreq="
+ + docFreq
+ + " totalTermFreq="
+ + totalTermFreq
+ + " termBlockOrd="
+ + termBlockOrd
+ + " blockFP="
+ + blockFilePointer;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
index 3a2bc3f..24abcb4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
@@ -16,46 +16,45 @@
*/
package org.apache.lucene.codecs;
-
import java.util.Objects;
import java.util.ServiceLoader; // javadocs
import java.util.Set;
-
import org.apache.lucene.index.IndexWriterConfig; // javadocs
import org.apache.lucene.util.NamedSPILoader;
/**
* Encodes/decodes an inverted index segment.
- * <p>
- * Note, when extending this class, the name ({@link #getName}) is
- * written into the index. In order for the segment to be read, the
- * name must resolve to your implementation via {@link #forName(String)}.
- * This method uses Java's
- * {@link ServiceLoader Service Provider Interface} (SPI) to resolve codec names.
- * <p>
- * If you implement your own codec, make sure that it has a no-arg constructor
- * so SPI can load it.
+ *
+ * <p>Note, when extending this class, the name ({@link #getName}) is written into the index. In
+ * order for the segment to be read, the name must resolve to your implementation via {@link
+ * #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} (SPI)
+ * to resolve codec names.
+ *
+ * <p>If you implement your own codec, make sure that it has a no-arg constructor so SPI can load
+ * it.
+ *
* @see ServiceLoader
*/
public abstract class Codec implements NamedSPILoader.NamedSPI {
/**
- * This static holder class prevents classloading deadlock by delaying
- * init of default codecs and available codecs until needed.
+ * This static holder class prevents classloading deadlock by delaying init of default codecs and
+ * available codecs until needed.
*/
private static final class Holder {
private static final NamedSPILoader<Codec> LOADER = new NamedSPILoader<>(Codec.class);
-
+
private Holder() {}
-
+
static NamedSPILoader<Codec> getLoader() {
if (LOADER == null) {
- throw new IllegalStateException("You tried to lookup a Codec by name before all Codecs could be initialized. "+
- "This likely happens if you call Codec#forName from a Codec's ctor.");
+ throw new IllegalStateException(
+ "You tried to lookup a Codec by name before all Codecs could be initialized. "
+ + "This likely happens if you call Codec#forName from a Codec's ctor.");
}
return LOADER;
}
-
+
static Codec defaultCodec = LOADER.lookup("Lucene90");
}
@@ -63,47 +62,48 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
/**
* Creates a new codec.
- * <p>
- * The provided name will be written into the index segment: in order to
- * for the segment to be read this class should be registered with Java's
- * SPI mechanism (registered in META-INF/ of your jar file, etc).
+ *
+ * <p>The provided name will be written into the index segment: in order to for the segment to be
+ * read this class should be registered with Java's SPI mechanism (registered in META-INF/ of your
+ * jar file, etc).
+ *
* @param name must be all ascii alphanumeric, and less than 128 characters in length.
*/
protected Codec(String name) {
NamedSPILoader.checkServiceName(name);
this.name = name;
}
-
+
/** Returns this codec's name */
@Override
public final String getName() {
return name;
}
-
+
/** Encodes/decodes postings */
public abstract PostingsFormat postingsFormat();
/** Encodes/decodes docvalues */
public abstract DocValuesFormat docValuesFormat();
-
+
/** Encodes/decodes stored fields */
public abstract StoredFieldsFormat storedFieldsFormat();
-
+
/** Encodes/decodes term vectors */
public abstract TermVectorsFormat termVectorsFormat();
-
+
/** Encodes/decodes field infos file */
public abstract FieldInfosFormat fieldInfosFormat();
-
+
/** Encodes/decodes segment info file */
public abstract SegmentInfoFormat segmentInfoFormat();
-
+
/** Encodes/decodes document normalization values */
public abstract NormsFormat normsFormat();
/** Encodes/decodes live docs */
public abstract LiveDocsFormat liveDocsFormat();
-
+
/** Encodes/decodes compound files */
public abstract CompoundFormat compoundFormat();
@@ -112,53 +112,47 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
/** Encodes/decodes numeric vector fields */
public abstract VectorFormat vectorFormat();
-
+
/** looks up a codec by name */
public static Codec forName(String name) {
return Holder.getLoader().lookup(name);
}
-
+
/** returns a list of all available codec names */
public static Set<String> availableCodecs() {
return Holder.getLoader().availableServices();
}
-
- /**
- * Reloads the codec list from the given {@link ClassLoader}.
- * Changes to the codecs are visible after the method ends, all
- * iterators ({@link #availableCodecs()},...) stay consistent.
- *
- * <p><b>NOTE:</b> Only new codecs are added, existing ones are
- * never removed or replaced.
- *
- * <p><em>This method is expensive and should only be called for discovery
- * of new codecs on the given classpath/classloader!</em>
+
+ /**
+ * Reloads the codec list from the given {@link ClassLoader}. Changes to the codecs are visible
+ * after the method ends, all iterators ({@link #availableCodecs()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new codecs are added, existing ones are never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery of new codecs on the
+ * given classpath/classloader!</em>
*/
public static void reloadCodecs(ClassLoader classloader) {
Holder.getLoader().reload(classloader);
}
-
- /** expert: returns the default codec used for newly created
- * {@link IndexWriterConfig}s.
- */
+
+ /** expert: returns the default codec used for newly created {@link IndexWriterConfig}s. */
public static Codec getDefault() {
if (Holder.defaultCodec == null) {
- throw new IllegalStateException("You tried to lookup the default Codec before all Codecs could be initialized. "+
- "This likely happens if you try to get it from a Codec's ctor.");
+ throw new IllegalStateException(
+ "You tried to lookup the default Codec before all Codecs could be initialized. "
+ + "This likely happens if you try to get it from a Codec's ctor.");
}
return Holder.defaultCodec;
}
-
- /** expert: sets the default codec used for newly created
- * {@link IndexWriterConfig}s.
- */
+
+ /** expert: sets the default codec used for newly created {@link IndexWriterConfig}s. */
public static void setDefault(Codec codec) {
Holder.defaultCodec = Objects.requireNonNull(codec);
}
/**
- * returns the codec's name. Subclasses can override to provide
- * more detail (such as parameters).
+ * returns the codec's name. Subclasses can override to provide more detail (such as parameters).
*/
@Override
public String toString() {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
index 8c40e2a..3dc46de 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@@ -16,11 +16,9 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
-
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
@@ -36,97 +34,91 @@ import org.apache.lucene.util.StringHelper;
/**
* Utility class for reading and writing versioned headers.
- * <p>
- * Writing codec headers is useful to ensure that a file is in
- * the format you think it is.
- *
+ *
+ * <p>Writing codec headers is useful to ensure that a file is in the format you think it is.
+ *
* @lucene.experimental
*/
-
public final class CodecUtil {
private CodecUtil() {} // no instance
- /**
- * Constant to identify the start of a codec header.
- */
- public final static int CODEC_MAGIC = 0x3fd76c17;
- /**
- * Constant to identify the start of a codec footer.
- */
- public final static int FOOTER_MAGIC = ~CODEC_MAGIC;
+ /** Constant to identify the start of a codec header. */
+ public static final int CODEC_MAGIC = 0x3fd76c17;
+ /** Constant to identify the start of a codec footer. */
+ public static final int FOOTER_MAGIC = ~CODEC_MAGIC;
/**
- * Writes a codec header, which records both a string to
- * identify the file and a version number. This header can
- * be parsed and validated with
- * {@link #checkHeader(DataInput, String, int, int) checkHeader()}.
- * <p>
- * CodecHeader --> Magic,CodecName,Version
+ * Writes a codec header, which records both a string to identify the file and a version number.
+ * This header can be parsed and validated with {@link #checkHeader(DataInput, String, int, int)
+ * checkHeader()}.
+ *
+ * <p>CodecHeader --> Magic,CodecName,Version
+ *
* <ul>
- * <li>Magic --> {@link DataOutput#writeInt Uint32}. This
- * identifies the start of the header. It is always {@value #CODEC_MAGIC}.
- * <li>CodecName --> {@link DataOutput#writeString String}. This
- * is a string to identify this file.
- * <li>Version --> {@link DataOutput#writeInt Uint32}. Records
- * the version of the file.
+ * <li>Magic --> {@link DataOutput#writeInt Uint32}. This identifies the start of the header.
+ * It is always {@value #CODEC_MAGIC}.
+ * <li>CodecName --> {@link DataOutput#writeString String}. This is a string to identify this
+ * file.
+ * <li>Version --> {@link DataOutput#writeInt Uint32}. Records the version of the file.
* </ul>
- * <p>
- * Note that the length of a codec header depends only upon the
- * name of the codec, so this length can be computed at any time
- * with {@link #headerLength(String)}.
- *
+ *
+ * <p>Note that the length of a codec header depends only upon the name of the codec, so this
+ * length can be computed at any time with {@link #headerLength(String)}.
+ *
* @param out Output stream
- * @param codec String to identify this file. It should be simple ASCII,
- * less than 128 characters in length.
+ * @param codec String to identify this file. It should be simple ASCII, less than 128 characters
+ * in length.
* @param version Version number
* @throws IOException If there is an I/O error writing to the underlying medium.
- * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127 characters in length
+ * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127
+ * characters in length
*/
public static void writeHeader(DataOutput out, String codec, int version) throws IOException {
BytesRef bytes = new BytesRef(codec);
if (bytes.length != codec.length() || bytes.length >= 128) {
- throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]");
+ throw new IllegalArgumentException(
+ "codec must be simple ASCII, less than 128 characters in length [got " + codec + "]");
}
out.writeInt(CODEC_MAGIC);
out.writeString(codec);
out.writeInt(version);
}
-
+
/**
- * Writes a codec header for an index file, which records both a string to
- * identify the format of the file, a version number, and data to identify
- * the file instance (ID and auxiliary suffix such as generation).
- * <p>
- * This header can be parsed and validated with
- * {@link #checkIndexHeader(DataInput, String, int, int, byte[], String) checkIndexHeader()}.
- * <p>
- * IndexHeader --> CodecHeader,ObjectID,ObjectSuffix
+ * Writes a codec header for an index file, which records both a string to identify the format of
+ * the file, a version number, and data to identify the file instance (ID and auxiliary suffix
+ * such as generation).
+ *
+ * <p>This header can be parsed and validated with {@link #checkIndexHeader(DataInput, String,
+ * int, int, byte[], String) checkIndexHeader()}.
+ *
+ * <p>IndexHeader --> CodecHeader,ObjectID,ObjectSuffix
+ *
* <ul>
- * <li>CodecHeader --> {@link #writeHeader}
- * <li>ObjectID --> {@link DataOutput#writeByte byte}<sup>16</sup>
- * <li>ObjectSuffix --> SuffixLength,SuffixBytes
- * <li>SuffixLength --> {@link DataOutput#writeByte byte}
- * <li>SuffixBytes --> {@link DataOutput#writeByte byte}<sup>SuffixLength</sup>
+ * <li>CodecHeader --> {@link #writeHeader}
+ * <li>ObjectID --> {@link DataOutput#writeByte byte}<sup>16</sup>
+ * <li>ObjectSuffix --> SuffixLength,SuffixBytes
+ * <li>SuffixLength --> {@link DataOutput#writeByte byte}
+ * <li>SuffixBytes --> {@link DataOutput#writeByte byte}<sup>SuffixLength</sup>
* </ul>
- * <p>
- * Note that the length of an index header depends only upon the
- * name of the codec and suffix, so this length can be computed at any time
- * with {@link #indexHeaderLength(String,String)}.
- *
+ *
+ * <p>Note that the length of an index header depends only upon the name of the codec and suffix,
+ * so this length can be computed at any time with {@link #indexHeaderLength(String,String)}.
+ *
* @param out Output stream
- * @param codec String to identify the format of this file. It should be simple ASCII,
- * less than 128 characters in length.
+ * @param codec String to identify the format of this file. It should be simple ASCII, less than
+ * 128 characters in length.
* @param id Unique identifier for this particular file instance.
- * @param suffix auxiliary suffix information for the file. It should be simple ASCII,
- * less than 256 characters in length.
+ * @param suffix auxiliary suffix information for the file. It should be simple ASCII, less than
+ * 256 characters in length.
* @param version Version number
* @throws IOException If there is an I/O error writing to the underlying medium.
- * @throws IllegalArgumentException If the codec name is not simple ASCII, or
- * is more than 127 characters in length, or if id is invalid,
- * or if the suffix is not simple ASCII, or more than 255 characters
- * in length.
+ * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127
+ * characters in length, or if id is invalid, or if the suffix is not simple ASCII, or more
+ * than 255 characters in length.
*/
- public static void writeIndexHeader(DataOutput out, String codec, int version, byte[] id, String suffix) throws IOException {
+ public static void writeIndexHeader(
+ DataOutput out, String codec, int version, byte[] id, String suffix) throws IOException {
if (id.length != StringHelper.ID_LENGTH) {
throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(id));
}
@@ -134,7 +126,8 @@ public final class CodecUtil {
out.writeBytes(id, 0, id.length);
BytesRef suffixBytes = new BytesRef(suffix);
if (suffixBytes.length != suffix.length() || suffixBytes.length >= 256) {
- throw new IllegalArgumentException("suffix must be simple ASCII, less than 256 characters in length [got " + suffix + "]");
+ throw new IllegalArgumentException(
+ "suffix must be simple ASCII, less than 256 characters in length [got " + suffix + "]");
}
out.writeByte((byte) suffixBytes.length);
out.writeBytes(suffixBytes.bytes, suffixBytes.offset, suffixBytes.length);
@@ -142,18 +135,18 @@ public final class CodecUtil {
/**
* Computes the length of a codec header.
- *
+ *
* @param codec Codec name.
* @return length of the entire codec header.
* @see #writeHeader(DataOutput, String, int)
*/
public static int headerLength(String codec) {
- return 9+codec.length();
+ return 9 + codec.length();
}
-
+
/**
* Computes the length of an index header.
- *
+ *
* @param codec Codec name.
* @return length of the entire index header.
* @see #writeIndexHeader(DataOutput, String, int, byte[], String)
@@ -163,49 +156,53 @@ public final class CodecUtil {
}
/**
- * Reads and validates a header previously written with
- * {@link #writeHeader(DataOutput, String, int)}.
- * <p>
- * When reading a file, supply the expected <code>codec</code> and
- * an expected version range (<code>minVersion to maxVersion</code>).
- *
- * @param in Input stream, positioned at the point where the
- * header was previously written. Typically this is located
- * at the beginning of the file.
+ * Reads and validates a header previously written with {@link #writeHeader(DataOutput, String,
+ * int)}.
+ *
+ * <p>When reading a file, supply the expected <code>codec</code> and an expected version range (
+ * <code>minVersion to maxVersion</code>).
+ *
+ * @param in Input stream, positioned at the point where the header was previously written.
+ * Typically this is located at the beginning of the file.
* @param codec The expected codec name.
* @param minVersion The minimum supported expected version number.
* @param maxVersion The maximum supported expected version number.
- * @return The actual version found, when a valid header is found
- * that matches <code>codec</code>, with an actual version
- * where {@code minVersion <= actual <= maxVersion}.
- * Otherwise an exception is thrown.
- * @throws CorruptIndexException If the first four bytes are not
- * {@link #CODEC_MAGIC}, or if the actual codec found is
- * not <code>codec</code>.
- * @throws IndexFormatTooOldException If the actual version is less
- * than <code>minVersion</code>.
- * @throws IndexFormatTooNewException If the actual version is greater
- * than <code>maxVersion</code>.
+ * @return The actual version found, when a valid header is found that matches <code>codec</code>,
+ * with an actual version where {@code minVersion <= actual <= maxVersion}. Otherwise an
+ * exception is thrown.
+ * @throws CorruptIndexException If the first four bytes are not {@link #CODEC_MAGIC}, or if the
+ * actual codec found is not <code>codec</code>.
+ * @throws IndexFormatTooOldException If the actual version is less than <code>minVersion</code>.
+ * @throws IndexFormatTooNewException If the actual version is greater than <code>maxVersion
+ * </code>.
* @throws IOException If there is an I/O error reading from the underlying medium.
* @see #writeHeader(DataOutput, String, int)
*/
- public static int checkHeader(DataInput in, String codec, int minVersion, int maxVersion) throws IOException {
+ public static int checkHeader(DataInput in, String codec, int minVersion, int maxVersion)
+ throws IOException {
// Safety to guard against reading a bogus string:
final int actualHeader = in.readInt();
if (actualHeader != CODEC_MAGIC) {
- throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
+ throw new CorruptIndexException(
+ "codec header mismatch: actual header="
+ + actualHeader
+ + " vs expected header="
+ + CODEC_MAGIC,
+ in);
}
return checkHeaderNoMagic(in, codec, minVersion, maxVersion);
}
- /** Like {@link
- * #checkHeader(DataInput,String,int,int)} except this
- * version assumes the first int has already been read
- * and validated from the input. */
- public static int checkHeaderNoMagic(DataInput in, String codec, int minVersion, int maxVersion) throws IOException {
+ /**
+ * Like {@link #checkHeader(DataInput,String,int,int)} except this version assumes the first int
+ * has already been read and validated from the input.
+ */
+ public static int checkHeaderNoMagic(DataInput in, String codec, int minVersion, int maxVersion)
+ throws IOException {
final String actualCodec = in.readString();
if (!actualCodec.equals(codec)) {
- throw new CorruptIndexException("codec mismatch: actual codec=" + actualCodec + " vs expected codec=" + codec, in);
+ throw new CorruptIndexException(
+ "codec mismatch: actual codec=" + actualCodec + " vs expected codec=" + codec, in);
}
final int actualVersion = in.readInt();
@@ -218,40 +215,41 @@ public final class CodecUtil {
return actualVersion;
}
-
+
/**
- * Reads and validates a header previously written with
- * {@link #writeIndexHeader(DataOutput, String, int, byte[], String)}.
- * <p>
- * When reading a file, supply the expected <code>codec</code>,
- * expected version range (<code>minVersion to maxVersion</code>),
- * and object ID and suffix.
- *
- * @param in Input stream, positioned at the point where the
- * header was previously written. Typically this is located
- * at the beginning of the file.
+ * Reads and validates a header previously written with {@link #writeIndexHeader(DataOutput,
+ * String, int, byte[], String)}.
+ *
+ * <p>When reading a file, supply the expected <code>codec</code>, expected version range (<code>
+ * minVersion to maxVersion</code>), and object ID and suffix.
+ *
+ * @param in Input stream, positioned at the point where the header was previously written.
+ * Typically this is located at the beginning of the file.
* @param codec The expected codec name.
* @param minVersion The minimum supported expected version number.
* @param maxVersion The maximum supported expected version number.
* @param expectedID The expected object identifier for this file.
* @param expectedSuffix The expected auxiliary suffix for this file.
- * @return The actual version found, when a valid header is found
- * that matches <code>codec</code>, with an actual version
- * where {@code minVersion <= actual <= maxVersion},
- * and matching <code>expectedID</code> and <code>expectedSuffix</code>
- * Otherwise an exception is thrown.
- * @throws CorruptIndexException If the first four bytes are not
- * {@link #CODEC_MAGIC}, or if the actual codec found is
- * not <code>codec</code>, or if the <code>expectedID</code>
- * or <code>expectedSuffix</code> do not match.
- * @throws IndexFormatTooOldException If the actual version is less
- * than <code>minVersion</code>.
- * @throws IndexFormatTooNewException If the actual version is greater
- * than <code>maxVersion</code>.
+ * @return The actual version found, when a valid header is found that matches <code>codec</code>,
+ * with an actual version where {@code minVersion <= actual <= maxVersion}, and matching
+ * <code>expectedID</code> and <code>expectedSuffix</code> Otherwise an exception is thrown.
+ * @throws CorruptIndexException If the first four bytes are not {@link #CODEC_MAGIC}, or if the
+ * actual codec found is not <code>codec</code>, or if the <code>expectedID</code> or <code>
+ * expectedSuffix</code> do not match.
+ * @throws IndexFormatTooOldException If the actual version is less than <code>minVersion</code>.
+ * @throws IndexFormatTooNewException If the actual version is greater than <code>maxVersion
+ * </code>.
* @throws IOException If there is an I/O error reading from the underlying medium.
* @see #writeIndexHeader(DataOutput, String, int, byte[],String)
*/
- public static int checkIndexHeader(DataInput in, String codec, int minVersion, int maxVersion, byte[] expectedID, String expectedSuffix) throws IOException {
+ public static int checkIndexHeader(
+ DataInput in,
+ String codec,
+ int minVersion,
+ int maxVersion,
+ byte[] expectedID,
+ String expectedSuffix)
+ throws IOException {
int version = checkHeader(in, codec, minVersion, maxVersion);
checkIndexHeaderID(in, expectedID);
checkIndexHeaderSuffix(in, expectedSuffix);
@@ -259,32 +257,38 @@ public final class CodecUtil {
}
/**
- * Expert: verifies the incoming {@link IndexInput} has an index header
- * and that its segment ID matches the expected one, and then copies
- * that index header into the provided {@link DataOutput}. This is
- * useful when building compound files.
+ * Expert: verifies the incoming {@link IndexInput} has an index header and that its segment ID
+ * matches the expected one, and then copies that index header into the provided {@link
+ * DataOutput}. This is useful when building compound files.
*
- * @param in Input stream, positioned at the point where the
- * index header was previously written. Typically this is located
- * at the beginning of the file.
+ * @param in Input stream, positioned at the point where the index header was previously written.
+ * Typically this is located at the beginning of the file.
* @param out Output stream, where the header will be copied to.
* @param expectedID Expected segment ID
- * @throws CorruptIndexException If the first four bytes are not
- * {@link #CODEC_MAGIC}, or if the <code>expectedID</code>
- * does not match.
+ * @throws CorruptIndexException If the first four bytes are not {@link #CODEC_MAGIC}, or if the
+ * <code>expectedID</code> does not match.
* @throws IOException If there is an I/O error reading from the underlying medium.
- *
- * @lucene.internal
+ * @lucene.internal
*/
- public static void verifyAndCopyIndexHeader(IndexInput in, DataOutput out, byte[] expectedID) throws IOException {
+ public static void verifyAndCopyIndexHeader(IndexInput in, DataOutput out, byte[] expectedID)
+ throws IOException {
// make sure it's large enough to have a header and footer
if (in.length() < footerLength() + headerLength("")) {
- throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: file is too small (" + in.length() + " bytes)", in);
+ throw new CorruptIndexException(
+ "compound sub-files must have a valid codec header and footer: file is too small ("
+ + in.length()
+ + " bytes)",
+ in);
}
int actualHeader = in.readInt();
if (actualHeader != CODEC_MAGIC) {
- throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CodecUtil.CODEC_MAGIC, in);
+ throw new CorruptIndexException(
+ "compound sub-files must have a valid codec header and footer: codec header mismatch: actual header="
+ + actualHeader
+ + " vs expected header="
+ + CodecUtil.CODEC_MAGIC,
+ in);
}
// we can't verify these, so we pass-through:
@@ -308,15 +312,20 @@ public final class CodecUtil {
out.writeBytes(suffixBytes, 0, suffixLength);
}
-
- /** Retrieves the full index header from the provided {@link IndexInput}.
- * This throws {@link CorruptIndexException} if this file does
- * not appear to be an index file. */
+ /**
+ * Retrieves the full index header from the provided {@link IndexInput}. This throws {@link
+ * CorruptIndexException} if this file does not appear to be an index file.
+ */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
in.seek(0);
final int actualHeader = in.readInt();
if (actualHeader != CODEC_MAGIC) {
- throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
+ throw new CorruptIndexException(
+ "codec header mismatch: actual header="
+ + actualHeader
+ + " vs expected header="
+ + CODEC_MAGIC,
+ in);
}
String codec = in.readString();
in.readInt();
@@ -328,11 +337,18 @@ public final class CodecUtil {
return bytes;
}
- /** Retrieves the full footer from the provided {@link IndexInput}. This throws
- * {@link CorruptIndexException} if this file does not have a valid footer. */
+ /**
+ * Retrieves the full footer from the provided {@link IndexInput}. This throws {@link
+ * CorruptIndexException} if this file does not have a valid footer.
+ */
public static byte[] readFooter(IndexInput in) throws IOException {
if (in.length() < footerLength()) {
- throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in);
+ throw new CorruptIndexException(
+ "misplaced codec footer (file truncated?): length="
+ + in.length()
+ + " but footerLength=="
+ + footerLength(),
+ in);
}
in.seek(in.length() - footerLength());
validateFooter(in);
@@ -341,49 +357,51 @@ public final class CodecUtil {
in.readBytes(bytes, 0, bytes.length);
return bytes;
}
-
+
/** Expert: just reads and verifies the object ID of an index header */
public static byte[] checkIndexHeaderID(DataInput in, byte[] expectedID) throws IOException {
byte id[] = new byte[StringHelper.ID_LENGTH];
in.readBytes(id, 0, id.length);
if (!Arrays.equals(id, expectedID)) {
- throw new CorruptIndexException("file mismatch, expected id=" + StringHelper.idToString(expectedID)
- + ", got=" + StringHelper.idToString(id), in);
+ throw new CorruptIndexException(
+ "file mismatch, expected id="
+ + StringHelper.idToString(expectedID)
+ + ", got="
+ + StringHelper.idToString(id),
+ in);
}
return id;
}
-
+
/** Expert: just reads and verifies the suffix of an index header */
- public static String checkIndexHeaderSuffix(DataInput in, String expectedSuffix) throws IOException {
+ public static String checkIndexHeaderSuffix(DataInput in, String expectedSuffix)
+ throws IOException {
int suffixLength = in.readByte() & 0xFF;
byte suffixBytes[] = new byte[suffixLength];
in.readBytes(suffixBytes, 0, suffixBytes.length);
String suffix = new String(suffixBytes, 0, suffixBytes.length, StandardCharsets.UTF_8);
if (!suffix.equals(expectedSuffix)) {
- throw new CorruptIndexException("file mismatch, expected suffix=" + expectedSuffix
- + ", got=" + suffix, in);
+ throw new CorruptIndexException(
+ "file mismatch, expected suffix=" + expectedSuffix + ", got=" + suffix, in);
}
return suffix;
}
-
+
/**
- * Writes a codec footer, which records both a checksum
- * algorithm ID and a checksum. This footer can
- * be parsed and validated with
- * {@link #checkFooter(ChecksumIndexInput) checkFooter()}.
- * <p>
- * CodecFooter --> Magic,AlgorithmID,Checksum
+ * Writes a codec footer, which records both a checksum algorithm ID and a checksum. This footer
+ * can be parsed and validated with {@link #checkFooter(ChecksumIndexInput) checkFooter()}.
+ *
+ * <p>CodecFooter --> Magic,AlgorithmID,Checksum
+ *
* <ul>
- * <li>Magic --> {@link DataOutput#writeInt Uint32}. This
- * identifies the start of the footer. It is always {@value #FOOTER_MAGIC}.
- * <li>AlgorithmID --> {@link DataOutput#writeInt Uint32}. This
- * indicates the checksum algorithm used. Currently this is always 0,
- * for zlib-crc32.
- * <li>Checksum --> {@link DataOutput#writeLong Uint64}. The
- * actual checksum value for all previous bytes in the stream, including
- * the bytes from Magic and AlgorithmID.
+ * <li>Magic --> {@link DataOutput#writeInt Uint32}. This identifies the start of the footer.
+ * It is always {@value #FOOTER_MAGIC}.
+ * <li>AlgorithmID --> {@link DataOutput#writeInt Uint32}. This indicates the checksum
+ * algorithm used. Currently this is always 0, for zlib-crc32.
+ * <li>Checksum --> {@link DataOutput#writeLong Uint64}. The actual checksum value for all
+ * previous bytes in the stream, including the bytes from Magic and AlgorithmID.
* </ul>
- *
+ *
* @param out Output stream
* @throws IOException If there is an I/O error writing to the underlying medium.
*/
@@ -392,49 +410,54 @@ public final class CodecUtil {
out.writeInt(0);
writeCRC(out);
}
-
+
/**
* Computes the length of a codec footer.
- *
+ *
* @return length of the entire codec footer.
* @see #writeFooter(IndexOutput)
*/
public static int footerLength() {
return 16;
}
-
- /**
- * Validates the codec footer previously written by {@link #writeFooter}.
+
+ /**
+ * Validates the codec footer previously written by {@link #writeFooter}.
+ *
* @return actual checksum value
- * @throws IOException if the footer is invalid, if the checksum does not match,
- * or if {@code in} is not properly positioned before the footer
- * at the end of the stream.
+ * @throws IOException if the footer is invalid, if the checksum does not match, or if {@code in}
+ * is not properly positioned before the footer at the end of the stream.
*/
public static long checkFooter(ChecksumIndexInput in) throws IOException {
validateFooter(in);
long actualChecksum = in.getChecksum();
long expectedChecksum = readCRC(in);
if (expectedChecksum != actualChecksum) {
- throw new CorruptIndexException("checksum failed (hardware problem?) : expected=" + Long.toHexString(expectedChecksum) +
- " actual=" + Long.toHexString(actualChecksum), in);
+ throw new CorruptIndexException(
+ "checksum failed (hardware problem?) : expected="
+ + Long.toHexString(expectedChecksum)
+ + " actual="
+ + Long.toHexString(actualChecksum),
+ in);
}
return actualChecksum;
}
-
- /**
- * Validates the codec footer previously written by {@link #writeFooter}, optionally
- * passing an unexpected exception that has already occurred.
- * <p>
- * When a {@code priorException} is provided, this method will add a suppressed exception
- * indicating whether the checksum for the stream passes, fails, or cannot be computed, and
+
+ /**
+ * Validates the codec footer previously written by {@link #writeFooter}, optionally passing an
+ * unexpected exception that has already occurred.
+ *
+ * <p>When a {@code priorException} is provided, this method will add a suppressed exception
+ * indicating whether the checksum for the stream passes, fails, or cannot be computed, and
* rethrow it. Otherwise it behaves the same as {@link #checkFooter(ChecksumIndexInput)}.
- * <p>
- * Example usage:
+ *
+ * <p>Example usage:
+ *
* <pre class="prettyprint">
* try (ChecksumIndexInput input = ...) {
* Throwable priorE = null;
* try {
- * // ... read a bunch of stuff ...
+ * // ... read a bunch of stuff ...
* } catch (Throwable exception) {
* priorE = exception;
* } finally {
@@ -443,7 +466,8 @@ public final class CodecUtil {
* }
* </pre>
*/
- public static void checkFooter(ChecksumIndexInput in, Throwable priorException) throws IOException {
+ public static void checkFooter(ChecksumIndexInput in, Throwable priorException)
+ throws IOException {
if (priorException == null) {
checkFooter(in);
} else {
@@ -455,44 +479,60 @@ public final class CodecUtil {
long remaining = in.length() - in.getFilePointer();
if (remaining < footerLength()) {
// corruption caused us to read into the checksum footer already: we can't proceed
- throw new CorruptIndexException("checksum status indeterminate: remaining=" + remaining +
- "; please run checkindex for more details", in);
+ throw new CorruptIndexException(
+ "checksum status indeterminate: remaining="
+ + remaining
+ + "; please run checkindex for more details",
+ in);
} else {
// otherwise, skip any unread bytes.
in.skipBytes(remaining - footerLength());
-
+
// now check the footer
long checksum = checkFooter(in);
- priorException.addSuppressed(new CorruptIndexException("checksum passed (" + Long.toHexString(checksum) +
- "). possibly transient resource issue, or a Lucene or JVM bug", in));
+ priorException.addSuppressed(
+ new CorruptIndexException(
+ "checksum passed ("
+ + Long.toHexString(checksum)
+ + "). possibly transient resource issue, or a Lucene or JVM bug",
+ in));
}
} catch (CorruptIndexException corruptException) {
corruptException.addSuppressed(priorException);
throw corruptException;
} catch (Throwable t) {
// catch-all for things that shouldn't go wrong (e.g. OOM during readInt) but could...
- priorException.addSuppressed(new CorruptIndexException("checksum status indeterminate: unexpected exception", in, t));
+ priorException.addSuppressed(
+ new CorruptIndexException(
+ "checksum status indeterminate: unexpected exception", in, t));
}
throw IOUtils.rethrowAlways(priorException);
}
}
-
- /**
+
+ /**
* Returns (but does not validate) the checksum previously written by {@link #checkFooter}.
+ *
* @return actual checksum value
* @throws IOException if the footer is invalid
*/
public static long retrieveChecksum(IndexInput in) throws IOException {
if (in.length() < footerLength()) {
- throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in);
+ throw new CorruptIndexException(
+ "misplaced codec footer (file truncated?): length="
+ + in.length()
+ + " but footerLength=="
+ + footerLength(),
+ in);
}
in.seek(in.length() - footerLength());
validateFooter(in);
return readCRC(in);
}
- /**
+ /**
* Returns (but does not validate) the checksum previously written by {@link #checkFooter}.
+ *
* @return actual checksum value
* @throws IOException if the footer is invalid
*/
@@ -501,9 +541,11 @@ public final class CodecUtil {
throw new IllegalArgumentException("expectedLength cannot be less than the footer length");
}
if (in.length() < expectedLength) {
- throw new CorruptIndexException("truncated file: length=" + in.length() + " but expectedLength==" + expectedLength, in);
+ throw new CorruptIndexException(
+ "truncated file: length=" + in.length() + " but expectedLength==" + expectedLength, in);
} else if (in.length() > expectedLength) {
- throw new CorruptIndexException("file too long: length=" + in.length() + " but expectedLength==" + expectedLength, in);
+ throw new CorruptIndexException(
+ "file too long: length=" + in.length() + " but expectedLength==" + expectedLength, in);
}
return retrieveChecksum(in);
@@ -513,27 +555,47 @@ public final class CodecUtil {
long remaining = in.length() - in.getFilePointer();
long expected = footerLength();
if (remaining < expected) {
- throw new CorruptIndexException("misplaced codec footer (file truncated?): remaining=" + remaining + ", expected=" + expected + ", fp=" + in.getFilePointer(), in);
+ throw new CorruptIndexException(
+ "misplaced codec footer (file truncated?): remaining="
+ + remaining
+ + ", expected="
+ + expected
+ + ", fp="
+ + in.getFilePointer(),
+ in);
} else if (remaining > expected) {
- throw new CorruptIndexException("misplaced codec footer (file extended?): remaining=" + remaining + ", expected=" + expected + ", fp=" + in.getFilePointer(), in);
+ throw new CorruptIndexException(
+ "misplaced codec footer (file extended?): remaining="
+ + remaining
+ + ", expected="
+ + expected
+ + ", fp="
+ + in.getFilePointer(),
+ in);
}
-
+
final int magic = in.readInt();
if (magic != FOOTER_MAGIC) {
- throw new CorruptIndexException("codec footer mismatch (file truncated?): actual footer=" + magic + " vs expected footer=" + FOOTER_MAGIC, in);
+ throw new CorruptIndexException(
+ "codec footer mismatch (file truncated?): actual footer="
+ + magic
+ + " vs expected footer="
+ + FOOTER_MAGIC,
+ in);
}
-
+
final int algorithmID = in.readInt();
if (algorithmID != 0) {
- throw new CorruptIndexException("codec footer mismatch: unknown algorithmID: " + algorithmID, in);
+ throw new CorruptIndexException(
+ "codec footer mismatch: unknown algorithmID: " + algorithmID, in);
}
}
-
- /**
- * Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter}
- * <p>
- * Note that this method may be slow, as it must process the entire file.
- * If you just need to extract the checksum value, call {@link #retrieveChecksum}.
+
+ /**
+ * Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter}
+ *
+ * <p>Note that this method may be slow, as it must process the entire file. If you just need to
+ * extract the checksum value, call {@link #retrieveChecksum}.
*/
public static long checksumEntireFile(IndexInput input) throws IOException {
IndexInput clone = input.clone();
@@ -541,14 +603,20 @@ public final class CodecUtil {
ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
assert in.getFilePointer() == 0;
if (in.length() < footerLength()) {
- throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), input);
+ throw new CorruptIndexException(
+ "misplaced codec footer (file truncated?): length="
+ + in.length()
+ + " but footerLength=="
+ + footerLength(),
+ input);
}
in.seek(in.length() - footerLength());
return checkFooter(in);
}
-
+
/**
* Reads CRC32 value as a 64-bit long from the input.
+ *
* @throws CorruptIndexException if CRC is formatted incorrectly (wrong bits set)
* @throws IOException if an i/o error occurs
*/
@@ -559,16 +627,18 @@ public final class CodecUtil {
}
return value;
}
-
+
/**
* Writes CRC32 value as a 64-bit long to the output.
+ *
* @throws IllegalStateException if CRC is formatted incorrectly (wrong bits set)
* @throws IOException if an i/o error occurs
*/
static void writeCRC(IndexOutput output) throws IOException {
long value = output.getChecksum();
if ((value & 0xFFFFFFFF00000000L) != 0) {
- throw new IllegalStateException("Illegal CRC-32 checksum: " + value + " (resource=" + output + ")");
+ throw new IllegalStateException(
+ "Illegal CRC-32 checksum: " + value + " (resource=" + output + ")");
}
output.writeLong(value);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveImpactAccumulator.java b/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveImpactAccumulator.java
index fca6455..247cd17 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveImpactAccumulator.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveImpactAccumulator.java
@@ -24,12 +24,9 @@ import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
-
import org.apache.lucene.index.Impact;
-/**
- * This class accumulates the (freq, norm) pairs that may produce competitive scores.
- */
+/** This class accumulates the (freq, norm) pairs that may produce competitive scores. */
public final class CompetitiveImpactAccumulator {
// We speed up accumulation for common norm values with this array that maps
@@ -44,18 +41,19 @@ public final class CompetitiveImpactAccumulator {
/** Sole constructor. */
public CompetitiveImpactAccumulator() {
maxFreqs = new int[256];
- Comparator<Impact> comparator = new Comparator<Impact>() {
- @Override
- public int compare(Impact o1, Impact o2) {
- // greater freqs compare greater
- int cmp = Integer.compare(o1.freq, o2.freq);
- if (cmp == 0) {
- // greater norms compare lower
- cmp = Long.compareUnsigned(o2.norm, o1.norm);
- }
- return cmp;
- }
- };
+ Comparator<Impact> comparator =
+ new Comparator<Impact>() {
+ @Override
+ public int compare(Impact o1, Impact o2) {
+ // greater freqs compare greater
+ int cmp = Integer.compare(o1.freq, o2.freq);
+ if (cmp == 0) {
+ // greater norms compare lower
+ cmp = Long.compareUnsigned(o2.norm, o1.norm);
+ }
+ return cmp;
+ }
+ };
otherFreqNormPairs = new TreeSet<>(comparator);
}
@@ -66,12 +64,14 @@ public final class CompetitiveImpactAccumulator {
assert assertConsistent();
}
- /** Accumulate a (freq,norm) pair, updating this structure if there is no
- * equivalent or more competitive entry already. */
+ /**
+ * Accumulate a (freq,norm) pair, updating this structure if there is no equivalent or more
+ * competitive entry already.
+ */
public void add(int freq, long norm) {
if (norm >= Byte.MIN_VALUE && norm <= Byte.MAX_VALUE) {
int index = Byte.toUnsignedInt((byte) norm);
- maxFreqs[index] = Math.max(maxFreqs[index], freq);
+ maxFreqs[index] = Math.max(maxFreqs[index], freq);
} else {
add(new Impact(freq, norm), otherFreqNormPairs);
}
@@ -131,7 +131,8 @@ public final class CompetitiveImpactAccumulator {
freqNormPairs.add(newEntry);
}
- for (Iterator<Impact> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
+ for (Iterator<Impact> it = freqNormPairs.headSet(newEntry, false).descendingIterator();
+ it.hasNext(); ) {
Impact entry = it.next();
if (Long.compareUnsigned(entry.norm, newEntry.norm) >= 0) {
// less competitive
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CompoundDirectory.java b/lucene/core/src/java/org/apache/lucene/codecs/CompoundDirectory.java
index f063a12..362b5b3 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CompoundDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CompoundDirectory.java
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs;
import java.io.IOException;
import java.util.Collection;
-
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
@@ -26,6 +25,7 @@ import org.apache.lucene.store.Lock;
/**
* A read-only {@link Directory} that consists of a view over a compound file.
+ *
* @see CompoundFormat
* @lucene.experimental
*/
@@ -36,29 +36,34 @@ public abstract class CompoundDirectory extends Directory {
/**
* Checks consistency of this directory.
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
*/
public abstract void checkIntegrity() throws IOException;
- /** Not implemented
- * @throws UnsupportedOperationException always: not supported by CFS */
+ /**
+ * Not implemented
+ *
+ * @throws UnsupportedOperationException always: not supported by CFS
+ */
@Override
public final void deleteFile(String name) {
throw new UnsupportedOperationException();
}
-
- /** Not implemented
- * @throws UnsupportedOperationException always: not supported by CFS */
+
+ /**
+ * Not implemented
+ *
+ * @throws UnsupportedOperationException always: not supported by CFS
+ */
@Override
public final void rename(String from, String to) {
throw new UnsupportedOperationException();
}
@Override
- public final void syncMetaData() {
- }
+ public final void syncMetaData() {}
@Override
public final IndexOutput createOutput(String name, IOContext context) throws IOException {
@@ -66,18 +71,18 @@ public abstract class CompoundDirectory extends Directory {
}
@Override
- public final IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
+ public final IndexOutput createTempOutput(String prefix, String suffix, IOContext context)
+ throws IOException {
throw new UnsupportedOperationException();
}
-
+
@Override
public final void sync(Collection<String> names) {
throw new UnsupportedOperationException();
}
-
+
@Override
public final Lock obtainLock(String name) {
throw new UnsupportedOperationException();
}
-
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
index d74eced..371e192 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
@@ -16,15 +16,14 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Encodes/decodes compound files
+ *
* @lucene.experimental
*/
public abstract class CompoundFormat {
@@ -34,16 +33,15 @@ public abstract class CompoundFormat {
// TODO: this is very minimal. If we need more methods,
// we can add 'producer' classes.
-
- /**
- * Returns a Directory view (read-only) for the compound files in this segment
- */
- public abstract CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException;
+
+ /** Returns a Directory view (read-only) for the compound files in this segment */
+ public abstract CompoundDirectory getCompoundReader(
+ Directory dir, SegmentInfo si, IOContext context) throws IOException;
/**
- * Packs the provided segment's files into a compound format. All files referenced
- * by the provided {@link SegmentInfo} must have {@link CodecUtil#writeIndexHeader}
- * and {@link CodecUtil#writeFooter}.
+ * Packs the provided segment's files into a compound format. All files referenced by the provided
+ * {@link SegmentInfo} must have {@link CodecUtil#writeIndexHeader} and {@link
+ * CodecUtil#writeFooter}.
*/
public abstract void write(Directory dir, SegmentInfo si, IOContext context) throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index c4bae5c..6320f97 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -16,12 +16,13 @@
*/
package org.apache.lucene.codecs;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.DocValues;
@@ -47,86 +48,88 @@ import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.packed.PackedInts;
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
-
-/**
- * Abstract API that consumes numeric, binary and
- * sorted docvalues. Concrete implementations of this
- * actually do "something" with the docvalues (write it into
- * the index in a specific format).
- * <p>
- * The lifecycle is:
+/**
+ * Abstract API that consumes numeric, binary and sorted docvalues. Concrete implementations of this
+ * actually do "something" with the docvalues (write it into the index in a specific format).
+ *
+ * <p>The lifecycle is:
+ *
* <ol>
- * <li>DocValuesConsumer is created by
- * {@link NormsFormat#normsConsumer(SegmentWriteState)}.
- * <li>{@link #addNumericField}, {@link #addBinaryField},
- * {@link #addSortedField}, {@link #addSortedSetField},
- * or {@link #addSortedNumericField} are called for each Numeric,
- * Binary, Sorted, SortedSet, or SortedNumeric docvalues field.
- * The API is a "pull" rather than "push", and the implementation
- * is free to iterate over the values multiple times
- * ({@link Iterable#iterator()}).
+ * <li>DocValuesConsumer is created by {@link NormsFormat#normsConsumer(SegmentWriteState)}.
+ * <li>{@link #addNumericField}, {@link #addBinaryField}, {@link #addSortedField}, {@link
+ * #addSortedSetField}, or {@link #addSortedNumericField} are called for each Numeric, Binary,
+ * Sorted, SortedSet, or SortedNumeric docvalues field. The API is a "pull" rather than
+ * "push", and the implementation is free to iterate over the values multiple times ({@link
+ * Iterable#iterator()}).
* <li>After all fields are added, the consumer is {@link #close}d.
* </ol>
*
* @lucene.experimental
*/
public abstract class DocValuesConsumer implements Closeable {
-
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
+
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected DocValuesConsumer() {}
/**
* Writes numeric docvalues for a field.
+ *
* @param field field information
* @param valuesProducer Numeric values to write.
* @throws IOException if an I/O error occurred.
*/
- public abstract void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException;
+ public abstract void addNumericField(FieldInfo field, DocValuesProducer valuesProducer)
+ throws IOException;
/**
* Writes binary docvalues for a field.
+ *
* @param field field information
* @param valuesProducer Binary values to write.
* @throws IOException if an I/O error occurred.
*/
- public abstract void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException;
+ public abstract void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer)
+ throws IOException;
/**
* Writes pre-sorted binary docvalues for a field.
+ *
* @param field field information
* @param valuesProducer produces the values and ordinals to write
* @throws IOException if an I/O error occurred.
*/
- public abstract void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException;
-
+ public abstract void addSortedField(FieldInfo field, DocValuesProducer valuesProducer)
+ throws IOException;
+
/**
* Writes pre-sorted numeric docvalues for a field
+ *
* @param field field information
* @param valuesProducer produces the values to write
* @throws IOException if an I/O error occurred.
*/
- public abstract void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException;
+ public abstract void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer)
+ throws IOException;
/**
* Writes pre-sorted set docvalues for a field
+ *
* @param field field information
* @param valuesProducer produces the values to write
* @throws IOException if an I/O error occurred.
*/
- public abstract void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException;
-
- /** Merges in the fields from the readers in
- * <code>mergeState</code>. The default implementation
- * calls {@link #mergeNumericField}, {@link #mergeBinaryField},
- * {@link #mergeSortedField}, {@link #mergeSortedSetField},
- * or {@link #mergeSortedNumericField} for each field,
- * depending on its type.
- * Implementations can override this method
- * for more sophisticated merging (bulk-byte copying, etc). */
+ public abstract void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer)
+ throws IOException;
+
+ /**
+ * Merges in the fields from the readers in <code>mergeState</code>. The default implementation
+ * calls {@link #mergeNumericField}, {@link #mergeBinaryField}, {@link #mergeSortedField}, {@link
+ * #mergeSortedSetField}, or {@link #mergeSortedNumericField} for each field, depending on its
+ * type. Implementations can override this method for more sophisticated merging (bulk-byte
+ * copying, etc).
+ */
public void merge(MergeState mergeState) throws IOException {
- for(DocValuesProducer docValuesProducer : mergeState.docValuesProducers) {
+ for (DocValuesProducer docValuesProducer : mergeState.docValuesProducers) {
if (docValuesProducer != null) {
docValuesProducer.checkIntegrity();
}
@@ -168,88 +171,92 @@ public abstract class DocValuesConsumer implements Closeable {
return values.nextDoc();
}
}
-
+
/**
* Merges the numeric docvalues from <code>MergeState</code>.
- * <p>
- * The default implementation calls {@link #addNumericField}, passing
- * a DocValuesProducer that merges and filters deleted documents on the fly.
+ *
+ * <p>The default implementation calls {@link #addNumericField}, passing a DocValuesProducer that
+ * merges and filters deleted documents on the fly.
*/
- public void mergeNumericField(final FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
- addNumericField(mergeFieldInfo,
- new EmptyDocValuesProducer() {
- @Override
- public NumericDocValues getNumeric(FieldInfo fieldInfo) throws IOException {
- if (fieldInfo != mergeFieldInfo) {
- throw new IllegalArgumentException("wrong fieldInfo");
- }
-
- List<NumericDocValuesSub> subs = new ArrayList<>();
- assert mergeState.docMaps.length == mergeState.docValuesProducers.length;
- long cost = 0;
- for (int i=0;i<mergeState.docValuesProducers.length;i++) {
- NumericDocValues values = null;
- DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
- if (docValuesProducer != null) {
- FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
- if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.NUMERIC) {
- values = docValuesProducer.getNumeric(readerFieldInfo);
- }
- }
- if (values != null) {
- cost += values.cost();
- subs.add(new NumericDocValuesSub(mergeState.docMaps[i], values));
- }
- }
-
- final DocIDMerger<NumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
-
- final long finalCost = cost;
-
- return new NumericDocValues() {
- private int docID = -1;
- private NumericDocValuesSub current;
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int nextDoc() throws IOException {
- current = docIDMerger.next();
- if (current == null) {
- docID = NO_MORE_DOCS;
- } else {
- docID = current.mappedDocID;
- }
- return docID;
- }
-
- @Override
- public int advance(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean advanceExact(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long cost() {
- return finalCost;
- }
-
- @Override
- public long longValue() throws IOException {
- return current.values.longValue();
- }
- };
- }
- });
+ public void mergeNumericField(final FieldInfo mergeFieldInfo, final MergeState mergeState)
+ throws IOException {
+ addNumericField(
+ mergeFieldInfo,
+ new EmptyDocValuesProducer() {
+ @Override
+ public NumericDocValues getNumeric(FieldInfo fieldInfo) throws IOException {
+ if (fieldInfo != mergeFieldInfo) {
+ throw new IllegalArgumentException("wrong fieldInfo");
+ }
+
+ List<NumericDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == mergeState.docValuesProducers.length;
+ long cost = 0;
+ for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
+ NumericDocValues values = null;
+ DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
+ if (docValuesProducer != null) {
+ FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
+ if (readerFieldInfo != null
+ && readerFieldInfo.getDocValuesType() == DocValuesType.NUMERIC) {
+ values = docValuesProducer.getNumeric(readerFieldInfo);
+ }
+ }
+ if (values != null) {
+ cost += values.cost();
+ subs.add(new NumericDocValuesSub(mergeState.docMaps[i], values));
+ }
+ }
+
+ final DocIDMerger<NumericDocValuesSub> docIDMerger =
+ DocIDMerger.of(subs, mergeState.needsIndexSort);
+
+ final long finalCost = cost;
+
+ return new NumericDocValues() {
+ private int docID = -1;
+ private NumericDocValuesSub current;
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ current = docIDMerger.next();
+ if (current == null) {
+ docID = NO_MORE_DOCS;
+ } else {
+ docID = current.mappedDocID;
+ }
+ return docID;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ return finalCost;
+ }
+
+ @Override
+ public long longValue() throws IOException {
+ return current.values.longValue();
+ }
+ };
+ }
+ });
}
-
+
/** Tracks state of one binary sub-reader that we are merging */
private static class BinaryDocValuesSub extends DocIDMerger.Sub {
@@ -269,82 +276,86 @@ public abstract class DocValuesConsumer implements Closeable {
/**
* Merges the binary docvalues from <code>MergeState</code>.
- * <p>
- * The default implementation calls {@link #addBinaryField}, passing
- * a DocValuesProducer that merges and filters deleted documents on the fly.
+ *
+ * <p>The default implementation calls {@link #addBinaryField}, passing a DocValuesProducer that
+ * merges and filters deleted documents on the fly.
*/
- public void mergeBinaryField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
- addBinaryField(mergeFieldInfo,
- new EmptyDocValuesProducer() {
- @Override
- public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException {
- if (fieldInfo != mergeFieldInfo) {
- throw new IllegalArgumentException("wrong fieldInfo");
- }
-
- List<BinaryDocValuesSub> subs = new ArrayList<>();
-
- long cost = 0;
- for (int i=0;i<mergeState.docValuesProducers.length;i++) {
- BinaryDocValues values = null;
- DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
- if (docValuesProducer != null) {
- FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
- if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.BINARY) {
- values = docValuesProducer.getBinary(readerFieldInfo);
- }
- }
- if (values != null) {
- cost += values.cost();
- subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], values));
- }
- }
-
- final DocIDMerger<BinaryDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
- final long finalCost = cost;
-
- return new BinaryDocValues() {
- private BinaryDocValuesSub current;
- private int docID = -1;
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int nextDoc() throws IOException {
- current = docIDMerger.next();
- if (current == null) {
- docID = NO_MORE_DOCS;
- } else {
- docID = current.mappedDocID;
- }
- return docID;
- }
-
- @Override
- public int advance(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean advanceExact(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long cost() {
- return finalCost;
- }
-
- @Override
- public BytesRef binaryValue() throws IOException {
- return current.values.binaryValue();
- }
- };
- }
- });
+ public void mergeBinaryField(FieldInfo mergeFieldInfo, final MergeState mergeState)
+ throws IOException {
+ addBinaryField(
+ mergeFieldInfo,
+ new EmptyDocValuesProducer() {
+ @Override
+ public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException {
+ if (fieldInfo != mergeFieldInfo) {
+ throw new IllegalArgumentException("wrong fieldInfo");
+ }
+
+ List<BinaryDocValuesSub> subs = new ArrayList<>();
+
+ long cost = 0;
+ for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
+ BinaryDocValues values = null;
+ DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
+ if (docValuesProducer != null) {
+ FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
+ if (readerFieldInfo != null
+ && readerFieldInfo.getDocValuesType() == DocValuesType.BINARY) {
+ values = docValuesProducer.getBinary(readerFieldInfo);
+ }
+ }
+ if (values != null) {
+ cost += values.cost();
+ subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], values));
+ }
+ }
+
+ final DocIDMerger<BinaryDocValuesSub> docIDMerger =
+ DocIDMerger.of(subs, mergeState.needsIndexSort);
+ final long finalCost = cost;
+
+ return new BinaryDocValues() {
+ private BinaryDocValuesSub current;
+ private int docID = -1;
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ current = docIDMerger.next();
+ if (current == null) {
+ docID = NO_MORE_DOCS;
+ } else {
+ docID = current.mappedDocID;
+ }
+ return docID;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ return finalCost;
+ }
+
+ @Override
+ public BytesRef binaryValue() throws IOException {
+ return current.values.binaryValue();
+ }
+ };
+ }
+ });
}
/** Tracks state of one sorted numeric sub-reader that we are merging */
@@ -366,98 +377,102 @@ public abstract class DocValuesConsumer implements Closeable {
/**
* Merges the sorted docvalues from <code>toMerge</code>.
- * <p>
- * The default implementation calls {@link #addSortedNumericField}, passing
- * iterables that filter deleted documents.
+ *
+ * <p>The default implementation calls {@link #addSortedNumericField}, passing iterables that
+ * filter deleted documents.
*/
- public void mergeSortedNumericField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
-
- addSortedNumericField(mergeFieldInfo,
- new EmptyDocValuesProducer() {
- @Override
- public SortedNumericDocValues getSortedNumeric(FieldInfo fieldInfo) throws IOException {
- if (fieldInfo != mergeFieldInfo) {
- throw new IllegalArgumentException("wrong FieldInfo");
- }
-
- // We must make new iterators + DocIDMerger for each iterator:
- List<SortedNumericDocValuesSub> subs = new ArrayList<>();
- long cost = 0;
- for (int i=0;i<mergeState.docValuesProducers.length;i++) {
- DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
- SortedNumericDocValues values = null;
- if (docValuesProducer != null) {
- FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
- if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC) {
- values = docValuesProducer.getSortedNumeric(readerFieldInfo);
- }
- }
- if (values == null) {
- values = DocValues.emptySortedNumeric();
- }
- cost += values.cost();
- subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], values));
- }
-
- final long finalCost = cost;
-
- final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
-
- return new SortedNumericDocValues() {
-
- private int docID = -1;
- private SortedNumericDocValuesSub currentSub;
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int nextDoc() throws IOException {
- currentSub = docIDMerger.next();
- if (currentSub == null) {
- docID = NO_MORE_DOCS;
- } else {
- docID = currentSub.mappedDocID;
- }
-
- return docID;
- }
-
- @Override
- public int advance(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean advanceExact(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int docValueCount() {
- return currentSub.values.docValueCount();
- }
-
- @Override
- public long cost() {
- return finalCost;
- }
-
- @Override
- public long nextValue() throws IOException {
- return currentSub.values.nextValue();
- }
- };
- }
- });
+ public void mergeSortedNumericField(FieldInfo mergeFieldInfo, final MergeState mergeState)
+ throws IOException {
+
+ addSortedNumericField(
+ mergeFieldInfo,
+ new EmptyDocValuesProducer() {
+ @Override
+ public SortedNumericDocValues getSortedNumeric(FieldInfo fieldInfo) throws IOException {
+ if (fieldInfo != mergeFieldInfo) {
+ throw new IllegalArgumentException("wrong FieldInfo");
+ }
+
+ // We must make new iterators + DocIDMerger for each iterator:
+ List<SortedNumericDocValuesSub> subs = new ArrayList<>();
+ long cost = 0;
+ for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
+ DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
+ SortedNumericDocValues values = null;
+ if (docValuesProducer != null) {
+ FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
+ if (readerFieldInfo != null
+ && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC) {
+ values = docValuesProducer.getSortedNumeric(readerFieldInfo);
+ }
+ }
+ if (values == null) {
+ values = DocValues.emptySortedNumeric();
+ }
+ cost += values.cost();
+ subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], values));
+ }
+
+ final long finalCost = cost;
+
+ final DocIDMerger<SortedNumericDocValuesSub> docIDMerger =
+ DocIDMerger.of(subs, mergeState.needsIndexSort);
+
+ return new SortedNumericDocValues() {
+
+ private int docID = -1;
+ private SortedNumericDocValuesSub currentSub;
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ currentSub = docIDMerger.next();
+ if (currentSub == null) {
+ docID = NO_MORE_DOCS;
+ } else {
+ docID = currentSub.mappedDocID;
+ }
+
+ return docID;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int docValueCount() {
+ return currentSub.values.docValueCount();
+ }
+
+ @Override
+ public long cost() {
+ return finalCost;
+ }
+
+ @Override
+ public long nextValue() throws IOException {
+ return currentSub.values.nextValue();
+ }
+ };
+ }
+ });
}
/**
- * A merged {@link TermsEnum}. This helps avoid relying on the default terms enum,
- * which calls {@link SortedDocValues#lookupOrd(int)} or
- * {@link SortedSetDocValues#lookupOrd(long)} on every call to {@link TermsEnum#next()}.
+ * A merged {@link TermsEnum}. This helps avoid relying on the default terms enum, which calls
+ * {@link SortedDocValues#lookupOrd(int)} or {@link SortedSetDocValues#lookupOrd(long)} on every
+ * call to {@link TermsEnum#next()}.
*/
private static class MergedTermsEnum extends TermsEnum {
@@ -547,7 +562,6 @@ public abstract class DocValuesConsumer implements Closeable {
public TermState termState() throws IOException {
throw new UnsupportedOperationException();
}
-
}
/** Tracks state of one sorted sub-reader that we are merging */
@@ -555,7 +569,7 @@ public abstract class DocValuesConsumer implements Closeable {
final SortedDocValues values;
final LongValues map;
-
+
public SortedDocValuesSub(MergeState.DocMap docMap, SortedDocValues values, LongValues map) {
super(docMap);
this.values = values;
@@ -571,13 +585,14 @@ public abstract class DocValuesConsumer implements Closeable {
/**
* Merges the sorted docvalues from <code>toMerge</code>.
- * <p>
- * The default implementation calls {@link #addSortedField}, passing
- * an Iterable that merges ordinals and values and filters deleted documents .
+ *
+ * <p>The default implementation calls {@link #addSortedField}, passing an Iterable that merges
+ * ordinals and values and filters deleted documents .
*/
- public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState) throws IOException {
+ public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState)
+ throws IOException {
List<SortedDocValues> toMerge = new ArrayList<>();
- for (int i=0;i<mergeState.docValuesProducers.length;i++) {
+ for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
@@ -594,11 +609,11 @@ public abstract class DocValuesConsumer implements Closeable {
final int numReaders = toMerge.size();
final SortedDocValues dvs[] = toMerge.toArray(new SortedDocValues[numReaders]);
-
+
// step 1: iterate thru each sub and mark terms still in use
TermsEnum liveTerms[] = new TermsEnum[dvs.length];
long[] weights = new long[liveTerms.length];
- for (int sub=0;sub<numReaders;sub++) {
+ for (int sub = 0; sub < numReaders; sub++) {
SortedDocValues dv = dvs[sub];
Bits liveDocs = mergeState.liveDocs[sub];
if (liveDocs == null) {
@@ -619,118 +634,122 @@ public abstract class DocValuesConsumer implements Closeable {
weights[sub] = bitset.cardinality();
}
}
-
+
// step 2: create ordinal map (this conceptually does the "merging")
final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
-
+
// step 3: add field
- addSortedField(fieldInfo,
- new EmptyDocValuesProducer() {
- @Override
- public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException {
- if (fieldInfoIn != fieldInfo) {
- throw new IllegalArgumentException("wrong FieldInfo");
- }
-
- // We must make new iterators + DocIDMerger for each iterator:
-
- List<SortedDocValuesSub> subs = new ArrayList<>();
- long cost = 0;
- for (int i=0;i<mergeState.docValuesProducers.length;i++) {
- SortedDocValues values = null;
- DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
- if (docValuesProducer != null) {
- FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
- if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
- values = docValuesProducer.getSorted(readerFieldInfo);
- }
- }
- if (values == null) {
- values = DocValues.emptySorted();
- }
- cost += values.cost();
-
- subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
- }
-
- final long finalCost = cost;
-
- final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
-
- return new SortedDocValues() {
- private int docID = -1;
- private int ord;
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int nextDoc() throws IOException {
- SortedDocValuesSub sub = docIDMerger.next();
- if (sub == null) {
- return docID = NO_MORE_DOCS;
- }
- int subOrd = sub.values.ordValue();
- assert subOrd != -1;
- ord = (int) sub.map.get(subOrd);
- docID = sub.mappedDocID;
- return docID;
- }
-
- @Override
- public int ordValue() {
- return ord;
- }
-
- @Override
- public int advance(int target) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean advanceExact(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long cost() {
- return finalCost;
- }
-
- @Override
- public int getValueCount() {
- return (int) map.getValueCount();
- }
-
- @Override
- public BytesRef lookupOrd(int ord) throws IOException {
- int segmentNumber = map.getFirstSegmentNumber(ord);
- int segmentOrd = (int) map.getFirstSegmentOrd(ord);
- return dvs[segmentNumber].lookupOrd(segmentOrd);
- }
-
- @Override
- public TermsEnum termsEnum() throws IOException {
- TermsEnum[] subs = new TermsEnum[toMerge.size()];
- for (int sub = 0; sub < subs.length; ++sub) {
- subs[sub] = toMerge.get(sub).termsEnum();
- }
- return new MergedTermsEnum(map, subs);
- }
- };
- }
- });
+ addSortedField(
+ fieldInfo,
+ new EmptyDocValuesProducer() {
+ @Override
+ public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException {
+ if (fieldInfoIn != fieldInfo) {
+ throw new IllegalArgumentException("wrong FieldInfo");
+ }
+
+ // We must make new iterators + DocIDMerger for each iterator:
+
+ List<SortedDocValuesSub> subs = new ArrayList<>();
+ long cost = 0;
+ for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
+ SortedDocValues values = null;
+ DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
+ if (docValuesProducer != null) {
+ FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
+ if (readerFieldInfo != null
+ && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
+ values = docValuesProducer.getSorted(readerFieldInfo);
+ }
+ }
+ if (values == null) {
+ values = DocValues.emptySorted();
+ }
+ cost += values.cost();
+
+ subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
+ }
+
+ final long finalCost = cost;
+
+ final DocIDMerger<SortedDocValuesSub> docIDMerger =
+ DocIDMerger.of(subs, mergeState.needsIndexSort);
+
+ return new SortedDocValues() {
+ private int docID = -1;
+ private int ord;
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ SortedDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return docID = NO_MORE_DOCS;
+ }
+ int subOrd = sub.values.ordValue();
+ assert subOrd != -1;
+ ord = (int) sub.map.get(subOrd);
+ docID = sub.mappedDocID;
+ return docID;
+ }
+
+ @Override
+ public int ordValue() {
+ return ord;
+ }
+
+ @Override
+ public int advance(int target) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ return finalCost;
+ }
+
+ @Override
+ public int getValueCount() {
+ return (int) map.getValueCount();
+ }
+
+ @Override
+ public BytesRef lookupOrd(int ord) throws IOException {
+ int segmentNumber = map.getFirstSegmentNumber(ord);
+ int segmentOrd = (int) map.getFirstSegmentOrd(ord);
+ return dvs[segmentNumber].lookupOrd(segmentOrd);
+ }
+
+ @Override
+ public TermsEnum termsEnum() throws IOException {
+ TermsEnum[] subs = new TermsEnum[toMerge.size()];
+ for (int sub = 0; sub < subs.length; ++sub) {
+ subs[sub] = toMerge.get(sub).termsEnum();
+ }
+ return new MergedTermsEnum(map, subs);
+ }
+ };
+ }
+ });
}
-
+
/** Tracks state of one sorted set sub-reader that we are merging */
private static class SortedSetDocValuesSub extends DocIDMerger.Sub {
final SortedSetDocValues values;
final LongValues map;
-
- public SortedSetDocValuesSub(MergeState.DocMap docMap, SortedSetDocValues values, LongValues map) {
+
+ public SortedSetDocValuesSub(
+ MergeState.DocMap docMap, SortedSetDocValues values, LongValues map) {
super(docMap);
this.values = values;
this.map = map;
@@ -750,14 +769,15 @@ public abstract class DocValuesConsumer implements Closeable {
/**
* Merges the sortedset docvalues from <code>toMerge</code>.
- * <p>
- * The default implementation calls {@link #addSortedSetField}, passing
- * an Iterable that merges ordinals and values and filters deleted documents .
+ *
+ * <p>The default implementation calls {@link #addSortedSetField}, passing an Iterable that merges
+ * ordinals and values and filters deleted documents .
*/
- public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
+ public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState)
+ throws IOException {
List<SortedSetDocValues> toMerge = new ArrayList<>();
- for (int i=0;i<mergeState.docValuesProducers.length;i++) {
+ for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedSetDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
@@ -796,118 +816,122 @@ public abstract class DocValuesConsumer implements Closeable {
weights[sub] = bitset.cardinality();
}
}
-
+
// step 2: create ordinal map (this conceptually does the "merging")
final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
-
+
// step 3: add field
- addSortedSetField(mergeFieldInfo,
- new EmptyDocValuesProducer() {
- @Override
- public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
- if (fieldInfo != mergeFieldInfo) {
- throw new IllegalArgumentException("wrong FieldInfo");
- }
-
- // We must make new iterators + DocIDMerger for each iterator:
- List<SortedSetDocValuesSub> subs = new ArrayList<>();
-
- long cost = 0;
-
- for (int i=0;i<mergeState.docValuesProducers.length;i++) {
- SortedSetDocValues values = null;
- DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
- if (docValuesProducer != null) {
- FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
- if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
- values = docValuesProducer.getSortedSet(readerFieldInfo);
- }
- }
- if (values == null) {
- values = DocValues.emptySortedSet();
- }
- cost += values.cost();
- subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
- }
-
- final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
-
- final long finalCost = cost;
-
- return new SortedSetDocValues() {
- private int docID = -1;
- private SortedSetDocValuesSub currentSub;
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int nextDoc() throws IOException {
- currentSub = docIDMerger.next();
- if (currentSub == null) {
- docID = NO_MORE_DOCS;
- } else {
- docID = currentSub.mappedDocID;
- }
-
- return docID;
- }
-
- @Override
- public int advance(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean advanceExact(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long nextOrd() throws IOException {
- long subOrd = currentSub.values.nextOrd();
- if (subOrd == NO_MORE_ORDS) {
- return NO_MORE_ORDS;
- }
- return currentSub.map.get(subOrd);
- }
-
- @Override
- public long cost() {
- return finalCost;
- }
-
- @Override
- public BytesRef lookupOrd(long ord) throws IOException {
- int segmentNumber = map.getFirstSegmentNumber(ord);
- long segmentOrd = map.getFirstSegmentOrd(ord);
- return toMerge.get(segmentNumber).lookupOrd(segmentOrd);
- }
-
- @Override
- public long getValueCount() {
- return map.getValueCount();
- }
-
- @Override
- public TermsEnum termsEnum() throws IOException {
- TermsEnum[] subs = new TermsEnum[toMerge.size()];
- for (int sub = 0; sub < subs.length; ++sub) {
- subs[sub] = toMerge.get(sub).termsEnum();
- }
- return new MergedTermsEnum(map, subs);
- }
- };
- }
- });
+ addSortedSetField(
+ mergeFieldInfo,
+ new EmptyDocValuesProducer() {
+ @Override
+ public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
+ if (fieldInfo != mergeFieldInfo) {
+ throw new IllegalArgumentException("wrong FieldInfo");
+ }
+
+ // We must make new iterators + DocIDMerger for each iterator:
+ List<SortedSetDocValuesSub> subs = new ArrayList<>();
+
+ long cost = 0;
+
+ for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
+ SortedSetDocValues values = null;
+ DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
+ if (docValuesProducer != null) {
+ FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
+ if (readerFieldInfo != null
+ && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
+ values = docValuesProducer.getSortedSet(readerFieldInfo);
+ }
+ }
+ if (values == null) {
+ values = DocValues.emptySortedSet();
+ }
+ cost += values.cost();
+ subs.add(
+ new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
+ }
+
+ final DocIDMerger<SortedSetDocValuesSub> docIDMerger =
+ DocIDMerger.of(subs, mergeState.needsIndexSort);
+
+ final long finalCost = cost;
+
+ return new SortedSetDocValues() {
+ private int docID = -1;
+ private SortedSetDocValuesSub currentSub;
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ currentSub = docIDMerger.next();
+ if (currentSub == null) {
+ docID = NO_MORE_DOCS;
+ } else {
+ docID = currentSub.mappedDocID;
+ }
+
+ return docID;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long nextOrd() throws IOException {
+ long subOrd = currentSub.values.nextOrd();
+ if (subOrd == NO_MORE_ORDS) {
+ return NO_MORE_ORDS;
+ }
+ return currentSub.map.get(subOrd);
+ }
+
+ @Override
+ public long cost() {
+ return finalCost;
+ }
+
+ @Override
+ public BytesRef lookupOrd(long ord) throws IOException {
+ int segmentNumber = map.getFirstSegmentNumber(ord);
+ long segmentOrd = map.getFirstSegmentOrd(ord);
+ return toMerge.get(segmentNumber).lookupOrd(segmentOrd);
+ }
+
+ @Override
+ public long getValueCount() {
+ return map.getValueCount();
+ }
+
+ @Override
+ public TermsEnum termsEnum() throws IOException {
+ TermsEnum[] subs = new TermsEnum[toMerge.size()];
+ for (int sub = 0; sub < subs.length; ++sub) {
+ subs[sub] = toMerge.get(sub).termsEnum();
+ }
+ return new MergedTermsEnum(map, subs);
+ }
+ };
+ }
+ });
}
-
+
// TODO: seek-by-ord to nextSetBit
static class BitsFilteredTermsEnum extends FilteredTermsEnum {
final LongBitSet liveTerms;
-
+
BitsFilteredTermsEnum(TermsEnum in, LongBitSet liveTerms) {
super(in, false); // <-- not passing false here wasted about 3 hours of my time!!!!!!!!!!!!!
assert liveTerms != null;
@@ -923,7 +947,7 @@ public abstract class DocValuesConsumer implements Closeable {
}
}
}
-
+
/** Helper: returns true if the given docToValue count contains only at most one value */
public static boolean isSingleValued(Iterable<Number> docToValueCount) {
for (Number count : docToValueCount) {
@@ -933,9 +957,12 @@ public abstract class DocValuesConsumer implements Closeable {
}
return true;
}
-
+
/** Helper: returns single-valued view, using {@code missingValue} when count is zero */
- public static Iterable<Number> singletonView(final Iterable<Number> docToValueCount, final Iterable<Number> values, final Number missingValue) {
+ public static Iterable<Number> singletonView(
+ final Iterable<Number> docToValueCount,
+ final Iterable<Number> values,
+ final Number missingValue) {
assert isSingleValued(docToValueCount);
return new Iterable<Number>() {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java
index 511a2b9..7cac0c2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java
@@ -16,60 +16,60 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
import java.util.ServiceLoader;
import java.util.Set;
-
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.NamedSPILoader;
-/**
+/**
* Encodes/decodes per-document values.
- * <p>
- * Note, when extending this class, the name ({@link #getName}) may
- * written into the index in certain configurations. In order for the segment
- * to be read, the name must resolve to your implementation via {@link #forName(String)}.
- * This method uses Java's
- * {@link ServiceLoader Service Provider Interface} (SPI) to resolve format names.
- * <p>
- * If you implement your own format, make sure that it has a no-arg constructor
- * so SPI can load it.
+ *
+ * <p>Note, when extending this class, the name ({@link #getName}) may written into the index in
+ * certain configurations. In order for the segment to be read, the name must resolve to your
+ * implementation via {@link #forName(String)}. This method uses Java's {@link ServiceLoader Service
+ * Provider Interface} (SPI) to resolve format names.
+ *
+ * <p>If you implement your own format, make sure that it has a no-arg constructor so SPI can load
+ * it.
+ *
* @see ServiceLoader
- * @lucene.experimental */
+ * @lucene.experimental
+ */
public abstract class DocValuesFormat implements NamedSPILoader.NamedSPI {
-
+
/**
- * This static holder class prevents classloading deadlock by delaying
- * init of doc values formats until needed.
+ * This static holder class prevents classloading deadlock by delaying init of doc values formats
+ * until needed.
*/
private static final class Holder {
- private static final NamedSPILoader<DocValuesFormat> LOADER = new NamedSPILoader<>(DocValuesFormat.class);
-
+ private static final NamedSPILoader<DocValuesFormat> LOADER =
+ new NamedSPILoader<>(DocValuesFormat.class);
+
private Holder() {}
-
+
static NamedSPILoader<DocValuesFormat> getLoader() {
if (LOADER == null) {
- throw new IllegalStateException("You tried to lookup a DocValuesFormat by name before all formats could be initialized. "+
- "This likely happens if you call DocValuesFormat#forName from a DocValuesFormat's ctor.");
+ throw new IllegalStateException(
+ "You tried to lookup a DocValuesFormat by name before all formats could be initialized. "
+ + "This likely happens if you call DocValuesFormat#forName from a DocValuesFormat's ctor.");
}
return LOADER;
}
}
-
- /** Unique name that's used to retrieve this format when
- * reading the index.
- */
+
+ /** Unique name that's used to retrieve this format when reading the index. */
private final String name;
/**
* Creates a new docvalues format.
- * <p>
- * The provided name will be written into the index segment in some configurations
- * (such as when using {@code PerFieldDocValuesFormat}): in such configurations,
- * for the segment to be read this class should be registered with Java's
- * SPI mechanism (registered in META-INF/ of your jar file, etc).
+ *
+ * <p>The provided name will be written into the index segment in some configurations (such as
+ * when using {@code PerFieldDocValuesFormat}): in such configurations, for the segment to be read
+ * this class should be registered with Java's SPI mechanism (registered in META-INF/ of your jar
+ * file, etc).
+ *
* @param name must be all ascii alphanumeric, and less than 128 characters in length.
*/
protected DocValuesFormat(String name) {
@@ -77,18 +77,16 @@ public abstract class DocValuesFormat implements NamedSPILoader.NamedSPI {
this.name = name;
}
- /** Returns a {@link DocValuesConsumer} to write docvalues to the
- * index. */
+ /** Returns a {@link DocValuesConsumer} to write docvalues to the index. */
public abstract DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException;
- /**
- * Returns a {@link DocValuesProducer} to read docvalues from the index.
- * <p>
- * NOTE: by the time this call returns, it must hold open any files it will
- * need to use; else, those files may be deleted. Additionally, required files
- * may be deleted during the execution of this call before there is a chance
- * to open them. Under these circumstances an IOException should be thrown by
- * the implementation. IOExceptions are expected and will automatically cause
+ /**
+ * Returns a {@link DocValuesProducer} to read docvalues from the index.
+ *
+ * <p>NOTE: by the time this call returns, it must hold open any files it will need to use; else,
+ * those files may be deleted. Additionally, required files may be deleted during the execution of
+ * this call before there is a chance to open them. Under these circumstances an IOException
+ * should be thrown by the implementation. IOExceptions are expected and will automatically cause
* a retry of the segment opening logic with the newly revised segments.
*/
public abstract DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException;
@@ -97,32 +95,32 @@ public abstract class DocValuesFormat implements NamedSPILoader.NamedSPI {
public final String getName() {
return name;
}
-
+
@Override
public String toString() {
return "DocValuesFormat(name=" + name + ")";
}
-
+
/** looks up a format by name */
public static DocValuesFormat forName(String name) {
return Holder.getLoader().lookup(name);
}
-
+
/** returns a list of all available format names */
public static Set<String> availableDocValuesFormats() {
return Holder.getLoader().availableServices();
}
-
- /**
- * Reloads the DocValues format list from the given {@link ClassLoader}.
- * Changes to the docvalues formats are visible after the method ends, all
- * iterators ({@link #availableDocValuesFormats()},...) stay consistent.
- *
- * <p><b>NOTE:</b> Only new docvalues formats are added, existing ones are
- * never removed or replaced.
- *
- * <p><em>This method is expensive and should only be called for discovery
- * of new docvalues formats on the given classpath/classloader!</em>
+
+ /**
+ * Reloads the DocValues format list from the given {@link ClassLoader}. Changes to the docvalues
+ * formats are visible after the method ends, all iterators ({@link
+ * #availableDocValuesFormats()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new docvalues formats are added, existing ones are never removed or
+ * replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery of new docvalues
+ * formats on the given classpath/classloader!</em>
*/
public static void reloadDocValuesFormats(ClassLoader classloader) {
Holder.getLoader().reload(classloader);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
index 5fe0b33..abd975f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
@@ -16,10 +16,8 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
-
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
@@ -28,56 +26,62 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Accountable;
-/** Abstract API that produces numeric, binary, sorted, sortedset,
- * and sortednumeric docvalues.
+/**
+ * Abstract API that produces numeric, binary, sorted, sortedset, and sortednumeric docvalues.
*
* @lucene.experimental
*/
public abstract class DocValuesProducer implements Closeable, Accountable {
-
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
+
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected DocValuesProducer() {}
- /** Returns {@link NumericDocValues} for this field.
- * The returned instance need not be thread-safe: it will only be
- * used by a single thread. */
+ /**
+ * Returns {@link NumericDocValues} for this field. The returned instance need not be thread-safe:
+ * it will only be used by a single thread.
+ */
public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException;
- /** Returns {@link BinaryDocValues} for this field.
- * The returned instance need not be thread-safe: it will only be
- * used by a single thread. */
+ /**
+ * Returns {@link BinaryDocValues} for this field. The returned instance need not be thread-safe:
+ * it will only be used by a single thread.
+ */
public abstract BinaryDocValues getBinary(FieldInfo field) throws IOException;
- /** Returns {@link SortedDocValues} for this field.
- * The returned instance need not be thread-safe: it will only be
- * used by a single thread. */
+ /**
+ * Returns {@link SortedDocValues} for this field. The returned instance need not be thread-safe:
+ * it will only be used by a single thread.
+ */
public abstract SortedDocValues getSorted(FieldInfo field) throws IOException;
-
- /** Returns {@link SortedNumericDocValues} for this field.
- * The returned instance need not be thread-safe: it will only be
- * used by a single thread. */
+
+ /**
+ * Returns {@link SortedNumericDocValues} for this field. The returned instance need not be
+ * thread-safe: it will only be used by a single thread.
+ */
public abstract SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException;
-
- /** Returns {@link SortedSetDocValues} for this field.
- * The returned instance need not be thread-safe: it will only be
- * used by a single thread. */
+
+ /**
+ * Returns {@link SortedSetDocValues} for this field. The returned instance need not be
+ * thread-safe: it will only be used by a single thread.
+ */
public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException;
-
- /**
+
+ /**
* Checks consistency of this producer
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
+ *
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
-
- /**
- * Returns an instance optimized for merging. This instance may only be
- * consumed in the thread that called {@link #getMergeInstance()}.
- * <p>
- * The default implementation returns {@code this} */
+
+ /**
+ * Returns an instance optimized for merging. This instance may only be consumed in the thread
+ * that called {@link #getMergeInstance()}.
+ *
+ * <p>The default implementation returns {@code this}
+ */
public DocValuesProducer getMergeInstance() {
return this;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldInfosFormat.java
index d4b21d7..00e744e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/FieldInfosFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldInfosFormat.java
@@ -16,9 +16,7 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
import org.apache.lucene.index.FieldInfos; // javadocs
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
@@ -26,18 +24,24 @@ import org.apache.lucene.store.IOContext;
/**
* Encodes/decodes {@link FieldInfos}
+ *
* @lucene.experimental
*/
public abstract class FieldInfosFormat {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected FieldInfosFormat() {
- }
-
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected FieldInfosFormat() {}
+
/** Read the {@link FieldInfos} previously written with {@link #write}. */
- public abstract FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException;
+ public abstract FieldInfos read(
+ Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
+ throws IOException;
- /** Writes the provided {@link FieldInfos} to the
- * directory. */
- public abstract void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException;
+ /** Writes the provided {@link FieldInfos} to the directory. */
+ public abstract void write(
+ Directory directory,
+ SegmentInfo segmentInfo,
+ String segmentSuffix,
+ FieldInfos infos,
+ IOContext context)
+ throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
index f4fc9ac..0659827 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
@@ -16,33 +16,27 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MappedMultiFields;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.ReaderSlice;
-/**
- * Abstract API that consumes terms, doc, freq, prox, offset and
- * payloads postings. Concrete implementations of this
- * actually do "something" with the postings (write it into
- * the index in a specific format).
+/**
+ * Abstract API that consumes terms, doc, freq, prox, offset and payloads postings. Concrete
+ * implementations of this actually do "something" with the postings (write it into the index in a
+ * specific format).
*
* @lucene.experimental
*/
-
public abstract class FieldsConsumer implements Closeable {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected FieldsConsumer() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected FieldsConsumer() {}
// TODO: can we somehow compute stats for you...?
@@ -50,46 +44,38 @@ public abstract class FieldsConsumer implements Closeable {
// iterables, no counts/stats) base classes from
// Fields/Terms/Docs/AndPositions?
- /** Write all fields, terms and postings. This the "pull"
- * API, allowing you to iterate more than once over the
- * postings, somewhat analogous to using a DOM API to
- * traverse an XML tree.
+ /**
+ * Write all fields, terms and postings. This the "pull" API, allowing you to iterate more than
+ * once over the postings, somewhat analogous to using a DOM API to traverse an XML tree.
*
- * <p><b>Notes</b>:
+ * <p><b>Notes</b>:
*
- * <ul>
- * <li> You must compute index statistics,
- * including each Term's docFreq and totalTermFreq,
- * as well as the summary sumTotalTermFreq,
- * sumTotalDocFreq and docCount.
- *
- * <li> You must skip terms that have no docs and
- * fields that have no terms, even though the provided
- * Fields API will expose them; this typically
- * requires lazily writing the field or term until
- * you've actually seen the first term or
- * document.
- *
- * <li> The provided Fields instance is limited: you
- * cannot call any methods that return
- * statistics/counts; you cannot pass a non-null
- * live docs when pulling docs/positions enums.
- * </ul>
+ * <ul>
+ * <li>You must compute index statistics, including each Term's docFreq and totalTermFreq, as
+ * well as the summary sumTotalTermFreq, sumTotalDocFreq and docCount.
+ * <li>You must skip terms that have no docs and fields that have no terms, even though the
+ * provided Fields API will expose them; this typically requires lazily writing the field or
+ * term until you've actually seen the first term or document.
+ * <li>The provided Fields instance is limited: you cannot call any methods that return
+ * statistics/counts; you cannot pass a non-null live docs when pulling docs/positions
+ * enums.
+ * </ul>
*/
public abstract void write(Fields fields, NormsProducer norms) throws IOException;
-
- /** Merges in the fields from the readers in
- * <code>mergeState</code>. The default implementation skips
- * and maps around deleted documents, and calls {@link #write(Fields,NormsProducer)}.
- * Implementations can override this method for more sophisticated
- * merging (bulk-byte copying, etc). */
+
+ /**
+ * Merges in the fields from the readers in <code>mergeState</code>. The default implementation
+ * skips and maps around deleted documents, and calls {@link #write(Fields,NormsProducer)}.
+ * Implementations can override this method for more sophisticated merging (bulk-byte copying,
+ * etc).
+ */
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
final List<Fields> fields = new ArrayList<>();
final List<ReaderSlice> slices = new ArrayList<>();
int docBase = 0;
- for(int readerIndex=0;readerIndex<mergeState.fieldsProducers.length;readerIndex++) {
+ for (int readerIndex = 0; readerIndex < mergeState.fieldsProducers.length; readerIndex++) {
final FieldsProducer f = mergeState.fieldsProducers[readerIndex];
final int maxDoc = mergeState.maxDocs[readerIndex];
@@ -99,9 +85,11 @@ public abstract class FieldsConsumer implements Closeable {
docBase += maxDoc;
}
- Fields mergedFields = new MappedMultiFields(mergeState,
- new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
- slices.toArray(ReaderSlice.EMPTY_ARRAY)));
+ Fields mergedFields =
+ new MappedMultiFields(
+ mergeState,
+ new MultiFields(
+ fields.toArray(Fields.EMPTY_ARRAY), slices.toArray(ReaderSlice.EMPTY_ARRAY)));
write(mergedFields, norms);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java
index 481b160..7aa88a4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java
@@ -16,42 +16,39 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
-
import org.apache.lucene.index.Fields;
import org.apache.lucene.util.Accountable;
-/** Abstract API that produces terms, doc, freq, prox, offset and
- * payloads postings.
+/**
+ * Abstract API that produces terms, doc, freq, prox, offset and payloads postings.
*
* @lucene.experimental
*/
-
public abstract class FieldsProducer extends Fields implements Closeable, Accountable {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected FieldsProducer() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected FieldsProducer() {}
@Override
public abstract void close() throws IOException;
-
- /**
+
+ /**
* Checks consistency of this reader.
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
+ *
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
-
- /**
- * Returns an instance optimized for merging. This instance may only be
- * consumed in the thread that called {@link #getMergeInstance()}.
- * <p>
- * The default implementation returns {@code this} */
+
+ /**
+ * Returns an instance optimized for merging. This instance may only be consumed in the thread
+ * that called {@link #getMergeInstance()}.
+ *
+ * <p>The default implementation returns {@code this}
+ */
public FieldsProducer getMergeInstance() {
return this;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
index 4a5e934..0a71c7d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
@@ -16,13 +16,12 @@
*/
package org.apache.lucene.codecs;
-
/**
* A codec that forwards all its method calls to another codec.
- * <p>
- * Extend this class when you need to reuse the functionality of an existing
- * codec. For example, if you want to build a codec that redefines LuceneMN's
- * {@link LiveDocsFormat}:
+ *
+ * <p>Extend this class when you need to reuse the functionality of an existing codec. For example,
+ * if you want to build a codec that redefines LuceneMN's {@link LiveDocsFormat}:
+ *
* <pre class="prettyprint">
* public final class CustomCodec extends FilterCodec {
*
@@ -36,22 +35,21 @@ package org.apache.lucene.codecs;
*
* }
* </pre>
- *
- * <p><em>Please note:</em> Don't call {@link Codec#forName} from
- * the no-arg constructor of your own codec. When the SPI framework
- * loads your own Codec as SPI component, SPI has not yet fully initialized!
- * If you want to extend another Codec, instantiate it directly by calling
- * its constructor.
- *
+ *
+ * <p><em>Please note:</em> Don't call {@link Codec#forName} from the no-arg constructor of your own
+ * codec. When the SPI framework loads your own Codec as SPI component, SPI has not yet fully
+ * initialized! If you want to extend another Codec, instantiate it directly by calling its
+ * constructor.
+ *
* @lucene.experimental
*/
public abstract class FilterCodec extends Codec {
/** The codec to filter. */
protected final Codec delegate;
-
- /** Sole constructor. When subclassing this codec,
- * create a no-arg ctor and pass the delegate codec
+
+ /**
+ * Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec
* and a unique name to this ctor.
*/
protected FilterCodec(String name, Codec delegate) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/LiveDocsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/LiveDocsFormat.java
index 286b75d..87745d7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/LiveDocsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/LiveDocsFormat.java
@@ -16,31 +16,34 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
import java.util.Collection;
-
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
-/** Format for live/deleted documents
- * @lucene.experimental */
+/**
+ * Format for live/deleted documents
+ *
+ * @lucene.experimental
+ */
public abstract class LiveDocsFormat {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected LiveDocsFormat() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected LiveDocsFormat() {}
/** Read live docs bits. */
- public abstract Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException;
-
- /** Persist live docs bits. Use {@link
- * SegmentCommitInfo#getNextDelGen} to determine the
- * generation of the deletes file you should write to. */
- public abstract void writeLiveDocs(Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException;
+ public abstract Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context)
+ throws IOException;
+
+ /**
+ * Persist live docs bits. Use {@link SegmentCommitInfo#getNextDelGen} to determine the generation
+ * of the deletes file you should write to.
+ */
+ public abstract void writeLiveDocs(
+ Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context)
+ throws IOException;
/** Records all files in use by this {@link SegmentCommitInfo} into the files argument. */
public abstract void files(SegmentCommitInfo info, Collection<String> files) throws IOException;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
index 78cceb1..7f62ec2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
@@ -16,32 +16,30 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
-
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.MathUtil;
/**
* This abstract class reads skip lists with multiple levels.
- *
- * See {@link MultiLevelSkipListWriter} for the information about the encoding
- * of the multi level skip lists.
- *
- * Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
- * which defines the actual format of the skip data.
+ *
+ * <p>See {@link MultiLevelSkipListWriter} for the information about the encoding of the multi level
+ * skip lists.
+ *
+ * <p>Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)} which
+ * defines the actual format of the skip data.
+ *
* @lucene.experimental
*/
-
public abstract class MultiLevelSkipListReader implements Closeable {
/** the maximum number of skip levels possible for this index */
- protected int maxNumberOfSkipLevels;
-
+ protected int maxNumberOfSkipLevels;
+
/** number of levels in this skip list */
protected int numberOfSkipLevels;
-
+
// Expert: defines the number of top skip levels to buffer in memory.
// Reducing this number results in less memory usage, but possibly
// slower performance due to more random I/Os.
@@ -50,7 +48,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
// skipLevel entries, the second top level can not contain more
// than skipLevel^2 entries and so forth.
private int numberOfLevelsToBuffer = 1;
-
+
private int docCount;
/** skipStream for each level. */
@@ -59,16 +57,17 @@ public abstract class MultiLevelSkipListReader implements Closeable {
/** The start pointer of each skip level. */
private long skipPointer[];
- /** skipInterval of each level. */
+ /** skipInterval of each level. */
private int skipInterval[];
- /** Number of docs skipped per level.
- * It's possible for some values to overflow a signed int, but this has been accounted for.
+ /**
+ * Number of docs skipped per level. It's possible for some values to overflow a signed int, but
+ * this has been accounted for.
*/
private int[] numSkipped;
/** Doc id of current skip entry per level. */
- protected int[] skipDoc;
+ protected int[] skipDoc;
/** Doc id of last read skip entry with docId <= target. */
private int lastDoc;
@@ -76,14 +75,14 @@ public abstract class MultiLevelSkipListReader implements Closeable {
/** Child pointer of current skip entry per level. */
private long[] childPointer;
- /** childPointer of last read skip entry with docId <=
- * target. */
+ /** childPointer of last read skip entry with docId <= target. */
private long lastChildPointer;
private final int skipMultiplier;
/** Creates a {@code MultiLevelSkipListReader}. */
- protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) {
+ protected MultiLevelSkipListReader(
+ IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) {
this.skipStream = new IndexInput[maxSkipLevels];
this.skipPointer = new long[maxSkipLevels];
this.childPointer = new long[maxSkipLevels];
@@ -91,7 +90,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
this.maxNumberOfSkipLevels = maxSkipLevels;
this.skipInterval = new int[maxSkipLevels];
this.skipMultiplier = skipMultiplier;
- this.skipStream [0]= skipStream;
+ this.skipStream[0] = skipStream;
this.skipInterval[0] = skipInterval;
for (int i = 1; i < maxSkipLevels; i++) {
// cache skip intervals
@@ -100,22 +99,22 @@ public abstract class MultiLevelSkipListReader implements Closeable {
skipDoc = new int[maxSkipLevels];
}
- /** Creates a {@code MultiLevelSkipListReader}, where
- * {@code skipInterval} and {@code skipMultiplier} are
- * the same. */
+ /**
+ * Creates a {@code MultiLevelSkipListReader}, where {@code skipInterval} and {@code
+ * skipMultiplier} are the same.
+ */
protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
this(skipStream, maxSkipLevels, skipInterval, skipInterval);
}
-
- /** Returns the id of the doc to which the last call of {@link #skipTo(int)}
- * has skipped. */
+
+ /** Returns the id of the doc to which the last call of {@link #skipTo(int)} has skipped. */
public int getDoc() {
return lastDoc;
}
-
-
- /** Skips entries to the first beyond the current whose document number is
- * greater than or equal to <i>target</i>. Returns the current doc count.
+
+ /**
+ * Skips entries to the first beyond the current whose document number is greater than or equal to
+ * <i>target</i>. Returns the current doc count.
*/
public int skipTo(int target) throws IOException {
@@ -124,7 +123,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
int level = 0;
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
level++;
- }
+ }
while (level >= 0) {
if (target > skipDoc[level]) {
@@ -135,41 +134,40 @@ public abstract class MultiLevelSkipListReader implements Closeable {
// no more skips on this level, go down one level
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
seekChild(level - 1);
- }
+ }
level--;
}
}
-
+
return numSkipped[0] - skipInterval[0] - 1;
}
-
+
private boolean loadNextSkip(int level) throws IOException {
// we have to skip, the target document is greater than the current
- // skip list entry
+ // skip list entry
setLastSkipData(level);
-
+
numSkipped[level] += skipInterval[level];
// numSkipped may overflow a signed int, so compare as unsigned.
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
// this skip list is exhausted
skipDoc[level] = Integer.MAX_VALUE;
- if (numberOfSkipLevels > level) numberOfSkipLevels = level;
+ if (numberOfSkipLevels > level) numberOfSkipLevels = level;
return false;
}
// read next skip entry
skipDoc[level] += readSkipData(level, skipStream[level]);
-
+
if (level != 0) {
// read the child pointer if we are not on the leaf level
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
}
-
- return true;
+ return true;
}
-
+
/** Seeks the skip entry on the given level */
protected void seekChild(int level) throws IOException {
skipStream[level].seek(lastChildPointer);
@@ -193,24 +191,24 @@ public abstract class MultiLevelSkipListReader implements Closeable {
public void init(long skipPointer, int df) throws IOException {
this.skipPointer[0] = skipPointer;
this.docCount = df;
- assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
- : "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
+ assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
+ : "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
Arrays.fill(skipDoc, 0);
Arrays.fill(numSkipped, 0);
Arrays.fill(childPointer, 0);
-
+
for (int i = 1; i < numberOfSkipLevels; i++) {
skipStream[i] = null;
}
loadSkipLevels();
}
-
- /** Loads the skip levels */
+
+ /** Loads the skip levels */
private void loadSkipLevels() throws IOException {
if (docCount <= skipInterval[0]) {
numberOfSkipLevels = 1;
} else {
- numberOfSkipLevels = 1+MathUtil.log(docCount/skipInterval[0], skipMultiplier);
+ numberOfSkipLevels = 1 + MathUtil.log(docCount / skipInterval[0], skipMultiplier);
}
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
@@ -218,13 +216,13 @@ public abstract class MultiLevelSkipListReader implements Closeable {
}
skipStream[0].seek(skipPointer[0]);
-
+
int toBuffer = numberOfLevelsToBuffer;
-
+
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
// the length of the current level
long length = skipStream[0].readVLong();
-
+
// the start pointer of the current level
skipPointer[i] = skipStream[0].getFilePointer();
if (toBuffer > 0) {
@@ -234,44 +232,43 @@ public abstract class MultiLevelSkipListReader implements Closeable {
} else {
// clone this stream, it is already at the start of the current level
skipStream[i] = skipStream[0].clone();
-
+
// move base stream beyond the current level
skipStream[0].seek(skipStream[0].getFilePointer() + length);
}
}
-
+
// use base stream for the lowest level
skipPointer[0] = skipStream[0].getFilePointer();
}
-
+
/**
* Subclasses must implement the actual skip data encoding in this method.
- *
+ *
* @param level the level skip data shall be read from
* @param skipStream the skip stream to read from
- */
+ */
protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
-
+
/** Copies the values of the last read skip entry on this level */
protected void setLastSkipData(int level) {
lastDoc = skipDoc[level];
lastChildPointer = childPointer[level];
}
-
/** used to buffer the top skip levels */
- private final static class SkipBuffer extends IndexInput {
+ private static final class SkipBuffer extends IndexInput {
private byte[] data;
private long pointer;
private int pos;
-
+
SkipBuffer(IndexInput input, int length) throws IOException {
super("SkipBuffer on " + input);
data = new byte[length];
pointer = input.getFilePointer();
input.readBytes(data, 0, length);
}
-
+
@Override
public void close() {
data = null;
@@ -300,9 +297,9 @@ public abstract class MultiLevelSkipListReader implements Closeable {
@Override
public void seek(long pos) {
- this.pos = (int) (pos - pointer);
+ this.pos = (int) (pos - pointer);
}
-
+
@Override
public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
throw new UnsupportedOperationException();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
index 7cf04a0..7a13a63 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
@@ -16,9 +16,7 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
@@ -26,75 +24,77 @@ import org.apache.lucene.util.MathUtil;
/**
* This abstract class writes skip lists with multiple levels.
- *
+ *
* <pre>
*
* Example for skipInterval = 3:
* c (skip level 2)
- * c c c (skip level 1)
+ * c c c (skip level 1)
* x x x x x x x x x x (skip level 0)
* d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
* 3 6 9 12 15 18 21 24 27 30 (df)
- *
+ *
* d - document
* x - skip data
* c - skip data with child pointer
- *
+ *
* Skip level i contains every skipInterval-th entry from skip level i-1.
* Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))).
- *
+ *
* Each skip entry on a level {@code i>0} contains a pointer to the corresponding skip entry in list i-1.
* This guarantees a logarithmic amount of skips to find the target document.
- *
+ *
* While this class takes care of writing the different skip levels,
* subclasses must define the actual format of the skip data.
* </pre>
+ *
* @lucene.experimental
*/
-
public abstract class MultiLevelSkipListWriter {
/** number of levels in this skip list */
protected final int numberOfSkipLevels;
-
+
/** the skip interval in the list with level = 0 */
private final int skipInterval;
/** skipInterval used for level > 0 */
private final int skipMultiplier;
-
- /** for every skip level a different buffer is used */
+
+ /** for every skip level a different buffer is used */
private ByteBuffersDataOutput[] skipBuffer;
/** Creates a {@code MultiLevelSkipListWriter}. */
- protected MultiLevelSkipListWriter(int skipInterval, int skipMultiplier, int maxSkipLevels, int df) {
+ protected MultiLevelSkipListWriter(
+ int skipInterval, int skipMultiplier, int maxSkipLevels, int df) {
this.skipInterval = skipInterval;
this.skipMultiplier = skipMultiplier;
-
+
int numberOfSkipLevels;
// calculate the maximum number of skip levels for this document frequency
if (df <= skipInterval) {
numberOfSkipLevels = 1;
} else {
- numberOfSkipLevels = 1+MathUtil.log(df/skipInterval, skipMultiplier);
+ numberOfSkipLevels = 1 + MathUtil.log(df / skipInterval, skipMultiplier);
}
-
+
// make sure it does not exceed maxSkipLevels
if (numberOfSkipLevels > maxSkipLevels) {
numberOfSkipLevels = maxSkipLevels;
}
this.numberOfSkipLevels = numberOfSkipLevels;
}
-
- /** Creates a {@code MultiLevelSkipListWriter}, where
- * {@code skipInterval} and {@code skipMultiplier} are
- * the same. */
+
+ /**
+ * Creates a {@code MultiLevelSkipListWriter}, where {@code skipInterval} and {@code
+ * skipMultiplier} are the same.
+ */
protected MultiLevelSkipListWriter(int skipInterval, int maxSkipLevels, int df) {
this(skipInterval, skipInterval, maxSkipLevels, df);
}
/** Allocates internal skip buffers. */
protected void init() {
- skipBuffer = new ByteBuffersDataOutput [numberOfSkipLevels];
+ skipBuffer = new ByteBuffersDataOutput[numberOfSkipLevels];
for (int i = 0; i < numberOfSkipLevels; i++) {
skipBuffer[i] = ByteBuffersDataOutput.newResettableInstance();
}
@@ -108,22 +108,22 @@ public abstract class MultiLevelSkipListWriter {
for (int i = 0; i < skipBuffer.length; i++) {
skipBuffer[i].reset();
}
- }
+ }
}
/**
* Subclasses must implement the actual skip data encoding in this method.
- *
+ *
* @param level the level skip data shall be writing for
* @param skipBuffer the skip buffer to write to
*/
protected abstract void writeSkipData(int level, DataOutput skipBuffer) throws IOException;
/**
- * Writes the current skip data to the buffers. The current document frequency determines
- * the max level is skip data is to be written to.
- *
- * @param df the current document frequency
+ * Writes the current skip data to the buffers. The current document frequency determines the max
+ * level is skip data is to be written to.
+ *
+ * @param df the current document frequency
* @throws IOException If an I/O error occurs
*/
public void bufferSkip(int df) throws IOException {
@@ -131,41 +131,41 @@ public abstract class MultiLevelSkipListWriter {
assert df % skipInterval == 0;
int numLevels = 1;
df /= skipInterval;
-
+
// determine max level
while ((df % skipMultiplier) == 0 && numLevels < numberOfSkipLevels) {
numLevels++;
df /= skipMultiplier;
}
-
+
long childPointer = 0;
-
+
for (int level = 0; level < numLevels; level++) {
writeSkipData(level, skipBuffer[level]);
-
+
long newChildPointer = skipBuffer[level].size();
-
+
if (level != 0) {
// store child pointers for all levels except the lowest
skipBuffer[level].writeVLong(childPointer);
}
-
- //remember the childPointer for the next level
+
+ // remember the childPointer for the next level
childPointer = newChildPointer;
}
}
/**
* Writes the buffered skip lists to the given output.
- *
- * @param output the IndexOutput the skip lists shall be written to
+ *
+ * @param output the IndexOutput the skip lists shall be written to
* @return the pointer the skip list starts
*/
public long writeSkip(IndexOutput output) throws IOException {
long skipPointer = output.getFilePointer();
- //System.out.println("skipper.writeSkip fp=" + skipPointer);
+ // System.out.println("skipper.writeSkip fp=" + skipPointer);
if (skipBuffer == null || skipBuffer.length == 0) return skipPointer;
-
+
for (int level = numberOfSkipLevels - 1; level > 0; level--) {
long length = skipBuffer[level].size();
if (length > 0) {
@@ -174,7 +174,7 @@ public abstract class MultiLevelSkipListWriter {
}
}
skipBuffer[0].copyTo(output);
-
+
return skipPointer;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java b/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java
index 8f4d69c..ed248b5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java
@@ -19,9 +19,12 @@ package org.apache.lucene.codecs;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.util.BytesRef;
-/** {@link PointValues} whose order of points can be changed.
- * This class is useful for codecs to optimize flush.
- * @lucene.internal */
+/**
+ * {@link PointValues} whose order of points can be changed. This class is useful for codecs to
+ * optimize flush.
+ *
+ * @lucene.internal
+ */
public abstract class MutablePointValues extends PointValues {
/** Sole constructor. */
@@ -38,5 +41,4 @@ public abstract class MutablePointValues extends PointValues {
/** Swap the i-th and j-th values. */
public abstract void swap(int i, int j);
-
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
index c21fc01..69a1b5a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
@@ -20,54 +20,51 @@ import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-
import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState;
-/**
- * Abstract API that consumes normalization values.
- * Concrete implementations of this
- * actually do "something" with the norms (write it into
- * the index in a specific format).
- * <p>
- * The lifecycle is:
+/**
+ * Abstract API that consumes normalization values. Concrete implementations of this actually do
+ * "something" with the norms (write it into the index in a specific format).
+ *
+ * <p>The lifecycle is:
+ *
* <ol>
- * <li>NormsConsumer is created by
- * {@link NormsFormat#normsConsumer(SegmentWriteState)}.
- * <li>{@link #addNormsField} is called for each field with
- * normalization values. The API is a "pull" rather
- * than "push", and the implementation is free to iterate over the
- * values multiple times ({@link Iterable#iterator()}).
+ * <li>NormsConsumer is created by {@link NormsFormat#normsConsumer(SegmentWriteState)}.
+ * <li>{@link #addNormsField} is called for each field with normalization values. The API is a
+ * "pull" rather than "push", and the implementation is free to iterate over the values
+ * multiple times ({@link Iterable#iterator()}).
* <li>After all fields are added, the consumer is {@link #close}d.
* </ol>
*
* @lucene.experimental
*/
public abstract class NormsConsumer implements Closeable {
-
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
+
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected NormsConsumer() {}
-
+
/**
* Writes normalization values for a field.
+ *
* @param field field information
* @param normsProducer NormsProducer of the numeric norm values
* @throws IOException if an I/O error occurred.
*/
- public abstract void addNormsField(FieldInfo field, NormsProducer normsProducer) throws IOException;
-
- /** Merges in the fields from the readers in
- * <code>mergeState</code>. The default implementation
- * calls {@link #mergeNormsField} for each field,
- * filling segments with missing norms for the field with zeros.
- * Implementations can override this method
- * for more sophisticated merging (bulk-byte copying, etc). */
+ public abstract void addNormsField(FieldInfo field, NormsProducer normsProducer)
+ throws IOException;
+
+ /**
+ * Merges in the fields from the readers in <code>mergeState</code>. The default implementation
+ * calls {@link #mergeNormsField} for each field, filling segments with missing norms for the
+ * field with zeros. Implementations can override this method for more sophisticated merging
+ * (bulk-byte copying, etc).
+ */
public void merge(MergeState mergeState) throws IOException {
- for(NormsProducer normsProducer : mergeState.normsProducers) {
+ for (NormsProducer normsProducer : mergeState.normsProducers) {
if (normsProducer != null) {
normsProducer.checkIntegrity();
}
@@ -78,12 +75,12 @@ public abstract class NormsConsumer implements Closeable {
}
}
}
-
+
/** Tracks state of one numeric sub-reader that we are merging */
private static class NumericDocValuesSub extends DocIDMerger.Sub {
private final NumericDocValues values;
-
+
public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values) {
super(docMap);
this.values = values;
@@ -98,94 +95,95 @@ public abstract class NormsConsumer implements Closeable {
/**
* Merges the norms from <code>toMerge</code>.
- * <p>
- * The default implementation calls {@link #addNormsField}, passing
- * an Iterable that merges and filters deleted documents on the fly.
+ *
+ * <p>The default implementation calls {@link #addNormsField}, passing an Iterable that merges and
+ * filters deleted documents on the fly.
*/
- public void mergeNormsField(final FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
+ public void mergeNormsField(final FieldInfo mergeFieldInfo, final MergeState mergeState)
+ throws IOException {
// TODO: try to share code with default merge of DVConsumer by passing MatchAllBits ?
- addNormsField(mergeFieldInfo,
- new NormsProducer() {
- @Override
- public NumericDocValues getNorms(FieldInfo fieldInfo) throws IOException {
- if (fieldInfo != mergeFieldInfo) {
- throw new IllegalArgumentException("wrong fieldInfo");
- }
-
- List<NumericDocValuesSub> subs = new ArrayList<>();
- assert mergeState.docMaps.length == mergeState.docValuesProducers.length;
- for (int i=0;i<mergeState.docValuesProducers.length;i++) {
- NumericDocValues norms = null;
- NormsProducer normsProducer = mergeState.normsProducers[i];
- if (normsProducer != null) {
- FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
- if (readerFieldInfo != null && readerFieldInfo.hasNorms()) {
- norms = normsProducer.getNorms(readerFieldInfo);
- }
- }
-
- if (norms != null) {
- subs.add(new NumericDocValuesSub(mergeState.docMaps[i], norms));
- }
- }
-
- final DocIDMerger<NumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
-
- return new NumericDocValues() {
- private int docID = -1;
- private NumericDocValuesSub current;
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int nextDoc() throws IOException {
- current = docIDMerger.next();
- if (current == null) {
- docID = NO_MORE_DOCS;
- } else {
- docID = current.mappedDocID;
- }
- return docID;
- }
-
- @Override
- public int advance(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean advanceExact(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long cost() {
- return 0;
- }
-
- @Override
- public long longValue() throws IOException {
- return current.values.longValue();
- }
- };
- }
-
- @Override
- public void checkIntegrity() {
- }
-
- @Override
- public void close() {
- }
-
- @Override
- public long ramBytesUsed() {
- return 0;
- }
- });
+ addNormsField(
+ mergeFieldInfo,
+ new NormsProducer() {
+ @Override
+ public NumericDocValues getNorms(FieldInfo fieldInfo) throws IOException {
+ if (fieldInfo != mergeFieldInfo) {
+ throw new IllegalArgumentException("wrong fieldInfo");
+ }
+
+ List<NumericDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == mergeState.docValuesProducers.length;
+ for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
+ NumericDocValues norms = null;
+ NormsProducer normsProducer = mergeState.normsProducers[i];
+ if (normsProducer != null) {
+ FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
+ if (readerFieldInfo != null && readerFieldInfo.hasNorms()) {
+ norms = normsProducer.getNorms(readerFieldInfo);
+ }
+ }
+
+ if (norms != null) {
+ subs.add(new NumericDocValuesSub(mergeState.docMaps[i], norms));
+ }
+ }
+
+ final DocIDMerger<NumericDocValuesSub> docIDMerger =
+ DocIDMerger.of(subs, mergeState.needsIndexSort);
+
+ return new NumericDocValues() {
+ private int docID = -1;
+ private NumericDocValuesSub current;
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ current = docIDMerger.next();
+ if (current == null) {
+ docID = NO_MORE_DOCS;
+ } else {
+ docID = current.mappedDocID;
+ }
+ return docID;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ return 0;
+ }
+
+ @Override
+ public long longValue() throws IOException {
+ return current.values.longValue();
+ }
+ };
+ }
+
+ @Override
+ public void checkIntegrity() {}
+
+ @Override
+ public void close() {}
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
+ });
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java
index 6b5afbd..646fa17 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java
@@ -16,33 +16,25 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
-/**
- * Encodes/decodes per-document score normalization values.
- */
+/** Encodes/decodes per-document score normalization values. */
public abstract class NormsFormat {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected NormsFormat() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected NormsFormat() {}
- /** Returns a {@link NormsConsumer} to write norms to the
- * index. */
+ /** Returns a {@link NormsConsumer} to write norms to the index. */
public abstract NormsConsumer normsConsumer(SegmentWriteState state) throws IOException;
- /**
- * Returns a {@link NormsProducer} to read norms from the index.
- * <p>
- * NOTE: by the time this call returns, it must hold open any files it will
- * need to use; else, those files may be deleted. Additionally, required files
- * may be deleted during the execution of this call before there is a chance
- * to open them. Under these circumstances an IOException should be thrown by
- * the implementation. IOExceptions are expected and will automatically cause
+ /**
+ * Returns a {@link NormsProducer} to read norms from the index.
+ *
+ * <p>NOTE: by the time this call returns, it must hold open any files it will need to use; else,
+ * those files may be deleted. Additionally, required files may be deleted during the execution of
+ * this call before there is a chance to open them. Under these circumstances an IOException
+ * should be thrown by the implementation. IOExceptions are expected and will automatically cause
* a retry of the segment opening logic with the newly revised segments.
*/
public abstract NormsProducer normsProducer(SegmentReadState state) throws IOException;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java
index 647d9e9..8af9d72 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java
@@ -18,41 +18,42 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
-
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.util.Accountable;
-
-/** Abstract API that produces field normalization values
+/**
+ * Abstract API that produces field normalization values
*
* @lucene.experimental
*/
public abstract class NormsProducer implements Closeable, Accountable {
-
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
+
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected NormsProducer() {}
- /** Returns {@link NumericDocValues} for this field.
- * The returned instance need not be thread-safe: it will only be
- * used by a single thread. */
+ /**
+ * Returns {@link NumericDocValues} for this field. The returned instance need not be thread-safe:
+ * it will only be used by a single thread.
+ */
public abstract NumericDocValues getNorms(FieldInfo field) throws IOException;
-
- /**
+
+ /**
* Checks consistency of this producer
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
+ *
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
-
- /**
- * Returns an instance optimized for merging. This instance may only be used
- * from the thread that acquires it.
- * <p>
- * The default implementation returns {@code this} */
+
+ /**
+ * Returns an instance optimized for merging. This instance may only be used from the thread that
+ * acquires it.
+ *
+ * <p>The default implementation returns {@code this}
+ */
public NormsProducer getMergeInstance() {
return this;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsFormat.java
index 1723e42..8979768 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PointsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsFormat.java
@@ -16,67 +16,61 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
-/**
+/**
* Encodes/decodes indexed points.
*
- * @lucene.experimental */
+ * @lucene.experimental
+ */
public abstract class PointsFormat {
- /**
- * Creates a new point format.
- */
- protected PointsFormat() {
- }
+ /** Creates a new point format. */
+ protected PointsFormat() {}
/** Writes a new segment */
public abstract PointsWriter fieldsWriter(SegmentWriteState state) throws IOException;
- /** Reads a segment. NOTE: by the time this call
- * returns, it must hold open any files it will need to
- * use; else, those files may be deleted.
- * Additionally, required files may be deleted during the execution of
- * this call before there is a chance to open them. Under these
- * circumstances an IOException should be thrown by the implementation.
- * IOExceptions are expected and will automatically cause a retry of the
- * segment opening logic with the newly revised segments.
- * */
+ /**
+ * Reads a segment. NOTE: by the time this call returns, it must hold open any files it will need
+ * to use; else, those files may be deleted. Additionally, required files may be deleted during
+ * the execution of this call before there is a chance to open them. Under these circumstances an
+ * IOException should be thrown by the implementation. IOExceptions are expected and will
+ * automatically cause a retry of the segment opening logic with the newly revised segments.
+ */
public abstract PointsReader fieldsReader(SegmentReadState state) throws IOException;
/** A {@code PointsFormat} that has nothing indexed */
- public static final PointsFormat EMPTY = new PointsFormat() {
- @Override
- public PointsWriter fieldsWriter(SegmentWriteState state) {
- throw new UnsupportedOperationException();
- }
+ public static final PointsFormat EMPTY =
+ new PointsFormat() {
+ @Override
+ public PointsWriter fieldsWriter(SegmentWriteState state) {
+ throw new UnsupportedOperationException();
+ }
- @Override
- public PointsReader fieldsReader(SegmentReadState state) {
- return new PointsReader() {
- @Override
- public void close() {
- }
+ @Override
+ public PointsReader fieldsReader(SegmentReadState state) {
+ return new PointsReader() {
+ @Override
+ public void close() {}
- @Override
- public long ramBytesUsed() {
- return 0L;
- }
+ @Override
+ public long ramBytesUsed() {
+ return 0L;
+ }
- @Override
- public void checkIntegrity() {
- }
+ @Override
+ public void checkIntegrity() {}
- @Override
- public PointValues getValues(String field) {
- throw new IllegalArgumentException("field=\"" + field + "\" was not indexed with points");
- }
- };
- }
- };
+ @Override
+ public PointValues getValues(String field) {
+ throw new IllegalArgumentException(
+ "field=\"" + field + "\" was not indexed with points");
+ }
+ };
+ }
+ };
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java
index 213b72e..72c640a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java
@@ -16,14 +16,13 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
-
import org.apache.lucene.index.PointValues;
import org.apache.lucene.util.Accountable;
-/** Abstract API to visit point values.
+/**
+ * Abstract API to visit point values.
*
* @lucene.experimental
*/
@@ -32,11 +31,12 @@ public abstract class PointsReader implements Closeable, Accountable {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected PointsReader() {}
- /**
+ /**
* Checks consistency of this reader.
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
+ *
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
@@ -44,11 +44,12 @@ public abstract class PointsReader implements Closeable, Accountable {
/** Return {@link PointValues} for the given {@code field}. */
public abstract PointValues getValues(String field) throws IOException;
- /**
- * Returns an instance optimized for merging. This instance may only be used
- * in the thread that acquires it.
- * <p>
- * The default implementation returns {@code this} */
+ /**
+ * Returns an instance optimized for merging. This instance may only be used in the thread that
+ * acquires it.
+ *
+ * <p>The default implementation returns {@code this}
+ */
public PointsReader getMergeInstance() {
return this;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
index 1f62c9f..b7c9769 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
@@ -16,35 +16,33 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
-
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues;
-/** Abstract API to write points
+/**
+ * Abstract API to write points
*
* @lucene.experimental
*/
-
public abstract class PointsWriter implements Closeable {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected PointsWriter() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected PointsWriter() {}
/** Write all values contained in the provided reader */
public abstract void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException;
- /** Default naive merge implementation for one field: it just re-indexes all the values
- * from the incoming segment. The default codec overrides this for 1D fields and uses
- * a faster but more complex implementation. */
+ /**
+ * Default naive merge implementation for one field: it just re-indexes all the values from the
+ * incoming segment. The default codec overrides this for 1D fields and uses a faster but more
+ * complex implementation.
+ */
protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException {
long maxPointCount = 0;
int docCount = 0;
- for (int i=0;i<mergeState.pointsReaders.length;i++) {
+ for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader pointsReader = mergeState.pointsReaders[i];
if (pointsReader != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
@@ -59,17 +57,18 @@ public abstract class PointsWriter implements Closeable {
}
final long finalMaxPointCount = maxPointCount;
final int finalDocCount = docCount;
- writeField(fieldInfo,
+ writeField(
+ fieldInfo,
new PointsReader() {
-
+
@Override
public long ramBytesUsed() {
return 0;
}
-
+
@Override
public void close() throws IOException {}
-
+
@Override
public PointValues getValues(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
@@ -77,10 +76,10 @@ public abstract class PointsWriter implements Closeable {
}
return new PointValues() {
-
+
@Override
public void intersect(IntersectVisitor mergedVisitor) throws IOException {
- for (int i=0;i<mergeState.pointsReaders.length;i++) {
+ for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader pointsReader = mergeState.pointsReaders[i];
if (pointsReader == null) {
// This segment has no points
@@ -102,28 +101,30 @@ public abstract class PointsWriter implements Closeable {
continue;
}
MergeState.DocMap docMap = mergeState.docMaps[i];
- values.intersect(new IntersectVisitor() {
- @Override
- public void visit(int docID) {
- // Should never be called because our compare method never returns Relation.CELL_INSIDE_QUERY
- throw new IllegalStateException();
- }
-
- @Override
- public void visit(int docID, byte[] packedValue) throws IOException {
- int newDocID = docMap.get(docID);
- if (newDocID != -1) {
- // Not deleted:
- mergedVisitor.visit(newDocID, packedValue);
- }
- }
-
- @Override
- public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
- // Forces this segment's PointsReader to always visit all docs + values:
- return Relation.CELL_CROSSES_QUERY;
- }
- });
+ values.intersect(
+ new IntersectVisitor() {
+ @Override
+ public void visit(int docID) {
+ // Should never be called because our compare method never returns
+ // Relation.CELL_INSIDE_QUERY
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public void visit(int docID, byte[] packedValue) throws IOException {
+ int newDocID = docMap.get(docID);
+ if (newDocID != -1) {
+ // Not deleted:
+ mergedVisitor.visit(newDocID, packedValue);
+ }
+ }
+
+ @Override
+ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+ // Forces this segment's PointsReader to always visit all docs + values:
+ return Relation.CELL_CROSSES_QUERY;
+ }
+ });
}
}
@@ -168,7 +169,7 @@ public abstract class PointsWriter implements Closeable {
}
};
}
-
+
@Override
public void checkIntegrity() throws IOException {
throw new UnsupportedOperationException();
@@ -176,8 +177,10 @@ public abstract class PointsWriter implements Closeable {
});
}
- /** Default merge implementation to merge incoming points readers by visiting all their points and
- * adding to this writer */
+ /**
+ * Default merge implementation to merge incoming points readers by visiting all their points and
+ * adding to this writer
+ */
public void merge(MergeState mergeState) throws IOException {
// check each incoming reader
for (PointsReader reader : mergeState.pointsReaders) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java
index 7994e05..4edb32c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java
@@ -16,44 +16,45 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
import java.util.ServiceLoader;
import java.util.Set;
-
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; // javadocs
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.NamedSPILoader;
-/**
+/**
* Encodes/decodes terms, postings, and proximity data.
- * <p>
- * Note, when extending this class, the name ({@link #getName}) may
- * written into the index in certain configurations. In order for the segment
- * to be read, the name must resolve to your implementation via {@link #forName(String)}.
- * This method uses Java's
- * {@link ServiceLoader Service Provider Interface} (SPI) to resolve format names.
- * <p>
- * If you implement your own format, make sure that it has a no-arg constructor
- * so SPI can load it.
+ *
+ * <p>Note, when extending this class, the name ({@link #getName}) may written into the index in
+ * certain configurations. In order for the segment to be read, the name must resolve to your
+ * implementation via {@link #forName(String)}. This method uses Java's {@link ServiceLoader Service
+ * Provider Interface} (SPI) to resolve format names.
+ *
+ * <p>If you implement your own format, make sure that it has a no-arg constructor so SPI can load
+ * it.
+ *
* @see ServiceLoader
- * @lucene.experimental */
+ * @lucene.experimental
+ */
public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
/**
- * This static holder class prevents classloading deadlock by delaying
- * init of postings formats until needed.
+ * This static holder class prevents classloading deadlock by delaying init of postings formats
+ * until needed.
*/
private static final class Holder {
- private static final NamedSPILoader<PostingsFormat> LOADER = new NamedSPILoader<>(PostingsFormat.class);
-
+ private static final NamedSPILoader<PostingsFormat> LOADER =
+ new NamedSPILoader<>(PostingsFormat.class);
+
private Holder() {}
-
+
static NamedSPILoader<PostingsFormat> getLoader() {
if (LOADER == null) {
- throw new IllegalStateException("You tried to lookup a PostingsFormat by name before all formats could be initialized. "+
- "This likely happens if you call PostingsFormat#forName from a PostingsFormat's ctor.");
+ throw new IllegalStateException(
+ "You tried to lookup a PostingsFormat by name before all formats could be initialized. "
+ + "This likely happens if you call PostingsFormat#forName from a PostingsFormat's ctor.");
}
return LOADER;
}
@@ -62,22 +63,22 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
/** Zero-length {@code PostingsFormat} array. */
public static final PostingsFormat[] EMPTY = new PostingsFormat[0];
- /** Unique name that's used to retrieve this format when
- * reading the index.
- */
+ /** Unique name that's used to retrieve this format when reading the index. */
private final String name;
-
+
/**
* Creates a new postings format.
- * <p>
- * The provided name will be written into the index segment in some configurations
- * (such as when using {@link PerFieldPostingsFormat}): in such configurations,
- * for the segment to be read this class should be registered with Java's
- * SPI mechanism (registered in META-INF/ of your jar file, etc).
+ *
+ * <p>The provided name will be written into the index segment in some configurations (such as
+ * when using {@link PerFieldPostingsFormat}): in such configurations, for the segment to be read
+ * this class should be registered with Java's SPI mechanism (registered in META-INF/ of your jar
+ * file, etc).
+ *
* @param name must be all ascii alphanumeric, and less than 128 characters in length.
*/
protected PostingsFormat(String name) {
- // TODO: can we somehow detect name conflicts here? Two different classes trying to claim the same name? Otherwise you see confusing errors...
+ // TODO: can we somehow detect name conflicts here? Two different classes trying to claim the
+ // same name? Otherwise you see confusing errors...
NamedSPILoader.checkServiceName(name);
this.name = name;
}
@@ -87,46 +88,44 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
public final String getName() {
return name;
}
-
+
/** Writes a new segment */
public abstract FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException;
- /** Reads a segment. NOTE: by the time this call
- * returns, it must hold open any files it will need to
- * use; else, those files may be deleted.
- * Additionally, required files may be deleted during the execution of
- * this call before there is a chance to open them. Under these
- * circumstances an IOException should be thrown by the implementation.
- * IOExceptions are expected and will automatically cause a retry of the
- * segment opening logic with the newly revised segments.
- * */
+ /**
+ * Reads a segment. NOTE: by the time this call returns, it must hold open any files it will need
+ * to use; else, those files may be deleted. Additionally, required files may be deleted during
+ * the execution of this call before there is a chance to open them. Under these circumstances an
+ * IOException should be thrown by the implementation. IOExceptions are expected and will
+ * automatically cause a retry of the segment opening logic with the newly revised segments.
+ */
public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException;
@Override
public String toString() {
return "PostingsFormat(name=" + name + ")";
}
-
+
/** looks up a format by name */
public static PostingsFormat forName(String name) {
return Holder.getLoader().lookup(name);
}
-
+
/** returns a list of all available format names */
public static Set<String> availablePostingsFormats() {
return Holder.getLoader().availableServices();
}
-
- /**
- * Reloads the postings format list from the given {@link ClassLoader}.
- * Changes to the postings formats are visible after the method ends, all
- * iterators ({@link #availablePostingsFormats()},...) stay consistent.
- *
- * <p><b>NOTE:</b> Only new postings formats are added, existing ones are
- * never removed or replaced.
- *
- * <p><em>This method is expensive and should only be called for discovery
- * of new postings formats on the given classpath/classloader!</em>
+
+ /**
+ * Reloads the postings format list from the given {@link ClassLoader}. Changes to the postings
+ * formats are visible after the method ends, all iterators ({@link
+ * #availablePostingsFormats()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new postings formats are added, existing ones are never removed or
+ * replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery of new postings formats
+ * on the given classpath/classloader!</em>
*/
public static void reloadPostingsFormats(ClassLoader classloader) {
Holder.getLoader().reload(classloader);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
index a1244ca..545df6b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
@@ -16,27 +16,25 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
-
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.ImpactsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
-/** The core terms dictionaries (BlockTermsReader,
- * BlockTreeTermsReader) interact with a single instance
- * of this class to manage creation of {@link org.apache.lucene.index.PostingsEnum} and
- * {@link org.apache.lucene.index.PostingsEnum} instances. It provides an
- * IndexInput (termsIn) where this class may read any
- * previously stored data that it had written in its
- * corresponding {@link PostingsWriterBase} at indexing
- * time.
- * @lucene.experimental */
+/**
+ * The core terms dictionaries (BlockTermsReader, BlockTreeTermsReader) interact with a single
+ * instance of this class to manage creation of {@link org.apache.lucene.index.PostingsEnum} and
+ * {@link org.apache.lucene.index.PostingsEnum} instances. It provides an IndexInput (termsIn) where
+ * this class may read any previously stored data that it had written in its corresponding {@link
+ * PostingsWriterBase} at indexing time.
+ *
+ * @lucene.experimental
+ */
// TODO: maybe move under blocktree? but it's used by other terms dicts (e.g. Block)
@@ -45,39 +43,44 @@ import org.apache.lucene.util.Accountable;
// TermsDict + PostingsReader/WriterBase == PostingsConsumer/Producer
public abstract class PostingsReaderBase implements Closeable, Accountable {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected PostingsReaderBase() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected PostingsReaderBase() {}
- /** Performs any initialization, such as reading and
- * verifying the header from the provided terms
- * dictionary {@link IndexInput}. */
+ /**
+ * Performs any initialization, such as reading and verifying the header from the provided terms
+ * dictionary {@link IndexInput}.
+ */
public abstract void init(IndexInput termsIn, SegmentReadState state) throws IOException;
/** Return a newly created empty TermState */
public abstract BlockTermState newTermState() throws IOException;
- /** Actually decode metadata for next term
- * @see PostingsWriterBase#encodeTerm
+ /**
+ * Actually decode metadata for next term
+ *
+ * @see PostingsWriterBase#encodeTerm
*/
- public abstract void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
+ public abstract void decodeTerm(
+ DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
- /** Must fully consume state, since after this call that
- * TermState may be reused. */
- public abstract PostingsEnum postings(FieldInfo fieldInfo, BlockTermState state, PostingsEnum reuse, int flags) throws IOException;
+ /** Must fully consume state, since after this call that TermState may be reused. */
+ public abstract PostingsEnum postings(
+ FieldInfo fieldInfo, BlockTermState state, PostingsEnum reuse, int flags) throws IOException;
/**
* Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
+ *
* @see #postings(FieldInfo, BlockTermState, PostingsEnum, int)
*/
- public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException;
+ public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
+ throws IOException;
- /**
+ /**
* Checks consistency of this reader.
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
+ *
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
index a8f8ed4..a98dc3b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
@@ -16,7 +16,8 @@
*/
package org.apache.lucene.codecs;
-
+import java.io.Closeable;
+import java.io.IOException;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
@@ -26,13 +27,10 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
-import java.io.Closeable;
-import java.io.IOException;
-
/**
- * Class that plugs into term dictionaries, such as {@link
- * BlockTreeTermsWriter}, and handles writing postings.
- *
+ * Class that plugs into term dictionaries, such as {@link BlockTreeTermsWriter}, and handles
+ * writing postings.
+ *
* @see PostingsReaderBase
* @lucene.experimental
*/
@@ -41,38 +39,37 @@ import java.io.IOException;
// TermsDict + PostingsReader/WriterBase == FieldsProducer/Consumer
public abstract class PostingsWriterBase implements Closeable {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected PostingsWriterBase() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected PostingsWriterBase() {}
- /** Called once after startup, before any terms have been
- * added. Implementations typically write a header to
- * the provided {@code termsOut}. */
+ /**
+ * Called once after startup, before any terms have been added. Implementations typically write a
+ * header to the provided {@code termsOut}.
+ */
public abstract void init(IndexOutput termsOut, SegmentWriteState state) throws IOException;
- /** Write all postings for one term; use the provided
- * {@link TermsEnum} to pull a {@link org.apache.lucene.index.PostingsEnum}.
- * This method should not
- * re-position the {@code TermsEnum}! It is already
- * positioned on the term that should be written. This
- * method must set the bit in the provided {@link
- * FixedBitSet} for every docID written. If no docs
- * were written, this method should return null, and the
- * terms dict will skip the term. */
- public abstract BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms) throws IOException;
+ /**
+ * Write all postings for one term; use the provided {@link TermsEnum} to pull a {@link
+ * org.apache.lucene.index.PostingsEnum}. This method should not re-position the {@code
+ * TermsEnum}! It is already positioned on the term that should be written. This method must set
+ * the bit in the provided {@link FixedBitSet} for every docID written. If no docs were written,
+ * this method should return null, and the terms dict will skip the term.
+ */
+ public abstract BlockTermState writeTerm(
+ BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms)
+ throws IOException;
/**
- * Encode metadata as long[] and byte[]. {@code absolute} controls whether
- * current term is delta encoded according to latest term.
- * Usually elements in {@code longs} are file pointers, so each one always
- * increases when a new term is consumed. {@code out} is used to write generic
- * bytes, which are not monotonic.
+ * Encode metadata as long[] and byte[]. {@code absolute} controls whether current term is delta
+ * encoded according to latest term. Usually elements in {@code longs} are file pointers, so each
+ * one always increases when a new term is consumed. {@code out} is used to write generic bytes,
+ * which are not monotonic.
*/
- public abstract void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
+ public abstract void encodeTerm(
+ DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute)
+ throws IOException;
- /**
- * Sets the current field for writing. */
+ /** Sets the current field for writing. */
public abstract void setField(FieldInfo fieldInfo);
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
index f51f0c6..1f1b050 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
@@ -16,23 +16,20 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
/**
- * Extension of {@link PostingsWriterBase}, adding a push
- * API for writing each element of the postings. This API
- * is somewhat analogous to an XML SAX API, while {@link
- * PostingsWriterBase} is more like an XML DOM API.
- *
+ * Extension of {@link PostingsWriterBase}, adding a push API for writing each element of the
+ * postings. This API is somewhat analogous to an XML SAX API, while {@link PostingsWriterBase} is
+ * more like an XML DOM API.
+ *
* @see PostingsReaderBase
* @lucene.experimental
*/
@@ -48,8 +45,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
/** {@link FieldInfo} of current field being written. */
protected FieldInfo fieldInfo;
- /** {@link IndexOptions} of current field being
- written */
+ /** {@link IndexOptions} of current field being written */
protected IndexOptions indexOptions;
/** True if the current field writes freqs. */
@@ -64,28 +60,28 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
/** True if the current field writes offsets. */
protected boolean writeOffsets;
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected PushPostingsWriterBase() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected PushPostingsWriterBase() {}
/** Return a newly created empty TermState */
public abstract BlockTermState newTermState() throws IOException;
- /** Start a new term. Note that a matching call to {@link
- * #finishTerm(BlockTermState)} is done, only if the term has at least one
- * document. */
+ /**
+ * Start a new term. Note that a matching call to {@link #finishTerm(BlockTermState)} is done,
+ * only if the term has at least one document.
+ */
public abstract void startTerm(NumericDocValues norms) throws IOException;
- /** Finishes the current term. The provided {@link
- * BlockTermState} contains the term's summary statistics,
- * and will holds metadata from PBF when returned */
+ /**
+ * Finishes the current term. The provided {@link BlockTermState} contains the term's summary
+ * statistics, and will holds metadata from PBF when returned
+ */
public abstract void finishTerm(BlockTermState state) throws IOException;
- /**
- * Sets the current field for writing, and returns the
- * fixed length of long[] metadata (which is fixed per
- * field), called when the writing switches to another field. */
+ /**
+ * Sets the current field for writing, and returns the fixed length of long[] metadata (which is
+ * fixed per field), called when the writing switches to another field.
+ */
@Override
public void setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
@@ -93,7 +89,8 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
writeFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ writeOffsets =
+ indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
writePayloads = fieldInfo.hasPayloads();
if (writeFreqs == false) {
@@ -116,7 +113,9 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
}
@Override
- public final BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms) throws IOException {
+ public final BlockTermState writeTerm(
+ BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms)
+ throws IOException {
NumericDocValues normValues;
if (fieldInfo.hasNorms() == false) {
normValues = null;
@@ -146,7 +145,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
startDoc(docID, freq);
if (writePositions) {
- for(int i=0;i<freq;i++) {
+ for (int i = 0; i < freq; i++) {
int pos = postingsEnum.nextPosition();
BytesRef payload = writePayloads ? postingsEnum.getPayload() : null;
int startOffset;
@@ -176,20 +175,21 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
}
}
- /** Adds a new doc in this term.
- * <code>freq</code> will be -1 when term frequencies are omitted
- * for the field. */
+ /**
+ * Adds a new doc in this term. <code>freq</code> will be -1 when term frequencies are omitted for
+ * the field.
+ */
public abstract void startDoc(int docID, int freq) throws IOException;
- /** Add a new position and payload, and start/end offset. A
- * null payload means no payload; a non-null payload with
- * zero length also means no payload. Caller may reuse
- * the {@link BytesRef} for the payload between calls
- * (method must fully consume the payload). <code>startOffset</code>
- * and <code>endOffset</code> will be -1 when offsets are not indexed. */
- public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
+ /**
+ * Add a new position and payload, and start/end offset. A null payload means no payload; a
+ * non-null payload with zero length also means no payload. Caller may reuse the {@link BytesRef}
+ * for the payload between calls (method must fully consume the payload). <code>startOffset</code>
+ * and <code>endOffset</code> will be -1 when offsets are not indexed.
+ */
+ public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset)
+ throws IOException;
- /** Called when we are done adding positions and payloads
- * for each doc. */
+ /** Called when we are done adding positions and payloads for each doc. */
public abstract void finishDoc() throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoFormat.java
index 40daf57..941b8ba 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoFormat.java
@@ -16,39 +16,40 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
- * Expert: Controls the format of the
- * {@link SegmentInfo} (segment metadata file).
+ * Expert: Controls the format of the {@link SegmentInfo} (segment metadata file).
+ *
* @see SegmentInfo
* @lucene.experimental
*/
public abstract class SegmentInfoFormat {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected SegmentInfoFormat() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected SegmentInfoFormat() {}
/**
* Read {@link SegmentInfo} data from a directory.
+ *
* @param directory directory to read from
* @param segmentName name of the segment to read
* @param segmentID expected identifier for the segment
* @return infos instance to be populated with data
* @throws IOException If an I/O error occurs
*/
- public abstract SegmentInfo read(Directory directory, String segmentName, byte segmentID[], IOContext context) throws IOException;
+ public abstract SegmentInfo read(
+ Directory directory, String segmentName, byte segmentID[], IOContext context)
+ throws IOException;
/**
- * Write {@link SegmentInfo} data.
- * The codec must add its SegmentInfo filename(s) to {@code info} before doing i/o.
+ * Write {@link SegmentInfo} data. The codec must add its SegmentInfo filename(s) to {@code info}
+ * before doing i/o.
+ *
* @throws IOException If an I/O error occurs
*/
- public abstract void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException;
+ public abstract void write(Directory dir, SegmentInfo info, IOContext ioContext)
+ throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java
index 549fe2f..f5f83b2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java
@@ -16,28 +16,22 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-/**
- * Controls the format of stored fields
- */
+/** Controls the format of stored fields */
public abstract class StoredFieldsFormat {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected StoredFieldsFormat() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected StoredFieldsFormat() {}
- /** Returns a {@link StoredFieldsReader} to load stored
- * fields. */
- public abstract StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException;
+ /** Returns a {@link StoredFieldsReader} to load stored fields. */
+ public abstract StoredFieldsReader fieldsReader(
+ Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException;
- /** Returns a {@link StoredFieldsWriter} to write stored
- * fields. */
- public abstract StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException;
+ /** Returns a {@link StoredFieldsWriter} to write stored fields. */
+ public abstract StoredFieldsWriter fieldsWriter(
+ Directory directory, SegmentInfo si, IOContext context) throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java
index 1f32576..ea76e67 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java
@@ -18,43 +18,43 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
-
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.util.Accountable;
/**
* Codec API for reading stored fields.
- * <p>
- * You need to implement {@link #visitDocument(int, StoredFieldVisitor)} to
- * read the stored fields for a document, implement {@link #clone()} (creating
- * clones of any IndexInputs used, etc), and {@link #close()}
+ *
+ * <p>You need to implement {@link #visitDocument(int, StoredFieldVisitor)} to read the stored
+ * fields for a document, implement {@link #clone()} (creating clones of any IndexInputs used, etc),
+ * and {@link #close()}
+ *
* @lucene.experimental
*/
public abstract class StoredFieldsReader implements Cloneable, Closeable, Accountable {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected StoredFieldsReader() {
- }
-
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected StoredFieldsReader() {}
+
/** Visit the stored fields for document <code>docID</code> */
public abstract void visitDocument(int docID, StoredFieldVisitor visitor) throws IOException;
@Override
public abstract StoredFieldsReader clone();
-
- /**
+
+ /**
* Checks consistency of this reader.
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
+ *
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
-
- /**
+
+ /**
* Returns an instance optimized for merging. This instance may not be cloned.
- * <p>
- * The default implementation returns {@code this} */
+ *
+ * <p>The default implementation returns {@code this}
+ */
public StoredFieldsReader getMergeInstance() {
return this;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
index b778d206..02f2887 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
@@ -16,13 +16,14 @@
*/
package org.apache.lucene.codecs;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.StoredField;
@@ -36,33 +37,30 @@ import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
-
/**
* Codec API for writing stored fields:
+ *
* <ol>
- * <li>For every document, {@link #startDocument()} is called,
- * informing the Codec that a new document has started.
- * <li>{@link #writeField(FieldInfo, IndexableField)} is called for
- * each field in the document.
- * <li>After all documents have been written, {@link #finish(FieldInfos, int)}
- * is called for verification/sanity-checks.
+ * <li>For every document, {@link #startDocument()} is called, informing the Codec that a new
+ * document has started.
+ * <li>{@link #writeField(FieldInfo, IndexableField)} is called for each field in the document.
+ * <li>After all documents have been written, {@link #finish(FieldInfos, int)} is called for
+ * verification/sanity-checks.
* <li>Finally the writer is closed ({@link #close()})
* </ol>
- *
+ *
* @lucene.experimental
*/
public abstract class StoredFieldsWriter implements Closeable, Accountable {
-
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected StoredFieldsWriter() {
- }
- /** Called before writing the stored fields of the document.
- * {@link #writeField(FieldInfo, IndexableField)} will be called
- * for each stored field. Note that this is
- * called even if the document has no stored fields. */
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected StoredFieldsWriter() {}
+
+ /**
+ * Called before writing the stored fields of the document. {@link #writeField(FieldInfo,
+ * IndexableField)} will be called for each stored field. Note that this is called even if the
+ * document has no stored fields.
+ */
public abstract void startDocument() throws IOException;
/** Called when a document and all its fields have been added. */
@@ -70,13 +68,12 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
/** Writes a single stored field. */
public abstract void writeField(FieldInfo info, IndexableField field) throws IOException;
-
- /** Called before {@link #close()}, passing in the number
- * of documents that were written. Note that this is
- * intentionally redundant (equivalent to the number of
- * calls to {@link #startDocument()}, but a Codec should
- * check that this is the case to detect the JRE bug described
- * in LUCENE-1282. */
+
+ /**
+ * Called before {@link #close()}, passing in the number of documents that were written. Note that
+ * this is intentionally redundant (equivalent to the number of calls to {@link #startDocument()},
+ * but a Codec should check that this is the case to detect the JRE bug described in LUCENE-1282.
+ */
public abstract void finish(FieldInfos fis, int numDocs) throws IOException;
private static class StoredFieldsMergeSub extends DocIDMerger.Sub {
@@ -85,7 +82,8 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
private final MergeVisitor visitor;
int docID = -1;
- public StoredFieldsMergeSub(MergeVisitor visitor, MergeState.DocMap docMap, StoredFieldsReader reader, int maxDoc) {
+ public StoredFieldsMergeSub(
+ MergeVisitor visitor, MergeState.DocMap docMap, StoredFieldsReader reader, int maxDoc) {
super(docMap);
this.maxDoc = maxDoc;
this.reader = reader;
@@ -102,23 +100,29 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
}
}
}
-
- /** Merges in the stored fields from the readers in
- * <code>mergeState</code>. The default implementation skips
- * over deleted documents, and uses {@link #startDocument()},
- * {@link #writeField(FieldInfo, IndexableField)}, and {@link #finish(FieldInfos, int)},
- * returning the number of documents that were written.
- * Implementations can override this method for more sophisticated
- * merging (bulk-byte copying, etc). */
+
+ /**
+ * Merges in the stored fields from the readers in <code>mergeState</code>. The default
+ * implementation skips over deleted documents, and uses {@link #startDocument()}, {@link
+ * #writeField(FieldInfo, IndexableField)}, and {@link #finish(FieldInfos, int)}, returning the
+ * number of documents that were written. Implementations can override this method for more
+ * sophisticated merging (bulk-byte copying, etc).
+ */
public int merge(MergeState mergeState) throws IOException {
List<StoredFieldsMergeSub> subs = new ArrayList<>();
- for(int i=0;i<mergeState.storedFieldsReaders.length;i++) {
+ for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i];
storedFieldsReader.checkIntegrity();
- subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i]));
+ subs.add(
+ new StoredFieldsMergeSub(
+ new MergeVisitor(mergeState, i),
+ mergeState.docMaps[i],
+ storedFieldsReader,
+ mergeState.maxDocs[i]));
}
- final DocIDMerger<StoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
+ final DocIDMerger<StoredFieldsMergeSub> docIDMerger =
+ DocIDMerger.of(subs, mergeState.needsIndexSort);
int docCount = 0;
while (true) {
@@ -135,11 +139,12 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
}
-
- /**
+
+ /**
* A visitor that adds every field it sees.
- * <p>
- * Use like this:
+ *
+ * <p>Use like this:
+ *
* <pre>
* MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
* for (...) {
@@ -155,10 +160,8 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
Number numericValue;
FieldInfo currentField;
FieldInfos remapper;
-
- /**
- * Create new merge visitor.
- */
+
+ /** Create new merge visitor. */
public MergeVisitor(MergeState mergeState, int readerIndex) {
// if field numbers are aligned, we can save hash lookups
// on every field access. Otherwise, we need to lookup
@@ -171,7 +174,7 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
}
}
}
-
+
@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
reset(fieldInfo);
@@ -266,7 +269,7 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
stringValue = null;
numericValue = null;
}
-
+
void write() throws IOException {
writeField(currentField, this);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermStats.java b/lucene/core/src/java/org/apache/lucene/codecs/TermStats.java
index c3c2a48..60c522e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermStats.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermStats.java
@@ -16,22 +16,19 @@
*/
package org.apache.lucene.codecs;
-
import org.apache.lucene.index.TermsEnum; // javadocs
/**
* Holder for per-term statistics.
- *
+ *
* @see TermsEnum#docFreq
* @see TermsEnum#totalTermFreq
*/
public class TermStats {
- /** How many documents have at least one occurrence of
- * this term. */
+ /** How many documents have at least one occurrence of this term. */
public final int docFreq;
-
- /** Total number of times this term occurs across all
- * documents in the field. */
+
+ /** Total number of times this term occurs across all documents in the field. */
public final long totalTermFreq;
/** Sole constructor. */
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsFormat.java
index a0a65a6..76745b9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsFormat.java
@@ -16,28 +16,23 @@
*/
package org.apache.lucene.codecs;
-
import java.io.IOException;
-
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-/**
- * Controls the format of term vectors
- */
+/** Controls the format of term vectors */
public abstract class TermVectorsFormat {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected TermVectorsFormat() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected TermVectorsFormat() {}
- /** Returns a {@link TermVectorsReader} to read term
- * vectors. */
- public abstract TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException;
+ /** Returns a {@link TermVectorsReader} to read term vectors. */
+ public abstract TermVectorsReader vectorsReader(
+ Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context)
+ throws IOException;
- /** Returns a {@link TermVectorsWriter} to write term
- * vectors. */
- public abstract TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException;
+ /** Returns a {@link TermVectorsWriter} to write term vectors. */
+ public abstract TermVectorsWriter vectorsWriter(
+ Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
index dc9115d..532a29e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
@@ -16,51 +16,49 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
-
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; // javadocs
import org.apache.lucene.index.Fields;
import org.apache.lucene.util.Accountable;
/**
* Codec API for reading term vectors:
- *
+ *
* @lucene.experimental
*/
public abstract class TermVectorsReader implements Cloneable, Closeable, Accountable {
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected TermVectorsReader() {
- }
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected TermVectorsReader() {}
- /** Returns term vectors for this document, or null if
- * term vectors were not indexed. If offsets are
- * available they are in an {@link OffsetAttribute}
- * available from the {@link org.apache.lucene.index.PostingsEnum}. */
+ /**
+ * Returns term vectors for this document, or null if term vectors were not indexed. If offsets
+ * are available they are in an {@link OffsetAttribute} available from the {@link
+ * org.apache.lucene.index.PostingsEnum}.
+ */
public abstract Fields get(int doc) throws IOException;
-
- /**
+
+ /**
* Checks consistency of this reader.
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
+ *
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
-
- /** Create a clone that one caller at a time may use to
- * read term vectors. */
+
+ /** Create a clone that one caller at a time may use to read term vectors. */
@Override
public abstract TermVectorsReader clone();
-
- /**
- * Returns an instance optimized for merging. This instance may only be
- * consumed in the thread that called {@link #getMergeInstance()}.
- * <p>
- * The default implementation returns {@code this} */
+
+ /**
+ * Returns an instance optimized for merging. This instance may only be consumed in the thread
+ * that called {@link #getMergeInstance()}.
+ *
+ * <p>The default implementation returns {@code this}
+ */
public TermVectorsReader getMergeInstance() {
return this;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
index 7865c08..ee88de3 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
@@ -16,12 +16,13 @@
*/
package org.apache.lucene.codecs;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-
import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -36,58 +37,58 @@ import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
-
/**
* Codec API for writing term vectors:
+ *
* <ol>
- * <li>For every document, {@link #startDocument(int)} is called,
- * informing the Codec how many fields will be written.
- * <li>{@link #startField(FieldInfo, int, boolean, boolean, boolean)} is called for
- * each field in the document, informing the codec how many terms
- * will be written for that field, and whether or not positions,
- * offsets, or payloads are enabled.
- * <li>Within each field, {@link #startTerm(BytesRef, int)} is called
- * for each term.
- * <li>If offsets and/or positions are enabled, then
- * {@link #addPosition(int, int, int, BytesRef)} will be called for each term
- * occurrence.
- * <li>After all documents have been written, {@link #finish(FieldInfos, int)}
- * is called for verification/sanity-checks.
+ * <li>For every document, {@link #startDocument(int)} is called, informing the Codec how many
+ * fields will be written.
+ * <li>{@link #startField(FieldInfo, int, boolean, boolean, boolean)} is called for each field in
+ * the document, informing the codec how many terms will be written for that field, and
+ * whether or not positions, offsets, or payloads are enabled.
+ * <li>Within each field, {@link #startTerm(BytesRef, int)} is called for each term.
+ * <li>If offsets and/or positions are enabled, then {@link #addPosition(int, int, int, BytesRef)}
+ * will be called for each term occurrence.
+ * <li>After all documents have been written, {@link #finish(FieldInfos, int)} is called for
+ * verification/sanity-checks.
* <li>Finally the writer is closed ({@link #close()})
* </ol>
- *
+ *
* @lucene.experimental
*/
public abstract class TermVectorsWriter implements Closeable, Accountable {
-
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected TermVectorsWriter() {
- }
- /** Called before writing the term vectors of the document.
- * {@link #startField(FieldInfo, int, boolean, boolean, boolean)} will
- * be called <code>numVectorFields</code> times. Note that if term
- * vectors are enabled, this is called even if the document
- * has no vector fields, in this case <code>numVectorFields</code>
- * will be zero. */
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected TermVectorsWriter() {}
+
+ /**
+ * Called before writing the term vectors of the document. {@link #startField(FieldInfo, int,
+ * boolean, boolean, boolean)} will be called <code>numVectorFields</code> times. Note that if
+ * term vectors are enabled, this is called even if the document has no vector fields, in this
+ * case <code>numVectorFields</code> will be zero.
+ */
public abstract void startDocument(int numVectorFields) throws IOException;
/** Called after a doc and all its fields have been added. */
- public void finishDocument() throws IOException {};
+ public void finishDocument() throws IOException {}
+ ;
- /** Called before writing the terms of the field.
- * {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
- public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException;
+ /**
+ * Called before writing the terms of the field. {@link #startTerm(BytesRef, int)} will be called
+ * <code>numTerms</code> times.
+ */
+ public abstract void startField(
+ FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads)
+ throws IOException;
/** Called after a field and all its terms have been added. */
- public void finishField() throws IOException {};
+ public void finishField() throws IOException {}
+ ;
- /** Adds a term and its term frequency <code>freq</code>.
- * If this field has positions and/or offsets enabled, then
- * {@link #addPosition(int, int, int, BytesRef)} will be called
- * <code>freq</code> times respectively.
+ /**
+ * Adds a term and its term frequency <code>freq</code>. If this field has positions and/or
+ * offsets enabled, then {@link #addPosition(int, int, int, BytesRef)} will be called <code>freq
+ * </code> times respectively.
*/
public abstract void startTerm(BytesRef term, int freq) throws IOException;
@@ -95,27 +96,29 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
public void finishTerm() throws IOException {}
/** Adds a term position and offsets */
- public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException;
-
- /** Called before {@link #close()}, passing in the number
- * of documents that were written. Note that this is
- * intentionally redundant (equivalent to the number of
- * calls to {@link #startDocument(int)}, but a Codec should
- * check that this is the case to detect the JRE bug described
- * in LUCENE-1282. */
+ public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload)
+ throws IOException;
+
+ /**
+ * Called before {@link #close()}, passing in the number of documents that were written. Note that
+ * this is intentionally redundant (equivalent to the number of calls to {@link
+ * #startDocument(int)}, but a Codec should check that this is the case to detect the JRE bug
+ * described in LUCENE-1282.
+ */
public abstract void finish(FieldInfos fis, int numDocs) throws IOException;
- /**
+ /**
* Called by IndexWriter when writing new segments.
- * <p>
- * This is an expert API that allows the codec to consume
- * positions and offsets directly from the indexer.
- * <p>
- * The default implementation calls {@link #addPosition(int, int, int, BytesRef)},
- * but subclasses can override this if they want to efficiently write
- * all the positions, then all the offsets, for example.
- * <p>
- * NOTE: This API is extremely expert and subject to change or removal!!!
+ *
+ * <p>This is an expert API that allows the codec to consume positions and offsets directly from
+ * the indexer.
+ *
+ * <p>The default implementation calls {@link #addPosition(int, int, int, BytesRef)}, but
+ * subclasses can override this if they want to efficiently write all the positions, then all the
+ * offsets, for example.
+ *
+ * <p>NOTE: This API is extremely expert and subject to change or removal!!!
+ *
* @lucene.internal
*/
// TODO: we should probably nuke this and make a more efficient 4.x format
@@ -129,7 +132,7 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
final int startOffset;
final int endOffset;
final BytesRef thisPayload;
-
+
if (positions == null) {
position = -1;
thisPayload = null;
@@ -152,7 +155,7 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
thisPayload = null;
}
}
-
+
if (offsets == null) {
startOffset = endOffset = -1;
} else {
@@ -163,7 +166,7 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
addPosition(position, startOffset, endOffset, thisPayload);
}
}
-
+
private static class TermVectorsMergeSub extends DocIDMerger.Sub {
private final TermVectorsReader reader;
private final int maxDoc;
@@ -186,19 +189,18 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
}
}
- /** Merges in the term vectors from the readers in
- * <code>mergeState</code>. The default implementation skips
- * over deleted documents, and uses {@link #startDocument(int)},
- * {@link #startField(FieldInfo, int, boolean, boolean, boolean)},
- * {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
- * and {@link #finish(FieldInfos, int)},
- * returning the number of documents that were written.
- * Implementations can override this method for more sophisticated
- * merging (bulk-byte copying, etc). */
+ /**
+ * Merges in the term vectors from the readers in <code>mergeState</code>. The default
+ * implementation skips over deleted documents, and uses {@link #startDocument(int)}, {@link
+ * #startField(FieldInfo, int, boolean, boolean, boolean)}, {@link #startTerm(BytesRef, int)},
+ * {@link #addPosition(int, int, int, BytesRef)}, and {@link #finish(FieldInfos, int)}, returning
+ * the number of documents that were written. Implementations can override this method for more
+ * sophisticated merging (bulk-byte copying, etc).
+ */
public int merge(MergeState mergeState) throws IOException {
List<TermVectorsMergeSub> subs = new ArrayList<>();
- for(int i=0;i<mergeState.termVectorsReaders.length;i++) {
+ for (int i = 0; i < mergeState.termVectorsReaders.length; i++) {
TermVectorsReader reader = mergeState.termVectorsReaders[i];
if (reader != null) {
reader.checkIntegrity();
@@ -206,7 +208,8 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i]));
}
- final DocIDMerger<TermVectorsMergeSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
+ final DocIDMerger<TermVectorsMergeSub> docIDMerger =
+ DocIDMerger.of(subs, mergeState.needsIndexSort);
int docCount = 0;
while (true) {
@@ -229,9 +232,8 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
}
-
- /** Safe (but, slowish) default method to write every
- * vector field in the document. */
+
+ /** Safe (but, slowish) default method to write every vector field in the document. */
protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
if (vectors == null) {
startDocument(0);
@@ -249,18 +251,19 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
}
}
startDocument(numFields);
-
+
String lastFieldName = null;
-
+
TermsEnum termsEnum = null;
PostingsEnum docsAndPositionsEnum = null;
-
+
int fieldCount = 0;
- for(String fieldName : vectors) {
+ for (String fieldName : vectors) {
fieldCount++;
final FieldInfo fieldInfo = mergeState.mergeFieldInfos.fieldInfo(fieldName);
- assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
+ assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0
+ : "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
lastFieldName = fieldName;
final Terms terms = vectors.terms(fieldName);
@@ -268,49 +271,52 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {
// FieldsEnum shouldn't lie...
continue;
}
-
+
final boolean hasPositions = terms.hasPositions();
final boolean hasOffsets = terms.hasOffsets();
final boolean hasPayloads = terms.hasPayloads();
assert !hasPayloads || hasPositions;
-
+
int numTerms = (int) terms.size();
if (numTerms == -1) {
- // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
+ // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics
+ // function
numTerms = 0;
termsEnum = terms.iterator();
- while(termsEnum.next() != null) {
+ while (termsEnum.next() != null) {
numTerms++;
}
}
-
+
startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
termsEnum = terms.iterator();
int termCount = 0;
- while(termsEnum.next() != null) {
+ while (termsEnum.next() != null) {
termCount++;
final int freq = (int) termsEnum.totalTermFreq();
-
+
startTerm(termsEnum.term(), freq);
if (hasPositions || hasOffsets) {
- docsAndPositionsEnum = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS | PostingsEnum.PAYLOADS);
+ docsAndPositionsEnum =
+ termsEnum.postings(
+ docsAndPositionsEnum, PostingsEnum.OFFSETS | PostingsEnum.PAYLOADS);
assert docsAndPositionsEnum != null;
-
+
final int docID = docsAndPositionsEnum.nextDoc();
assert docID != DocIdSetIterator.NO_MORE_DOCS;
assert docsAndPositionsEnum.freq() == freq;
- for(int posUpto=0; posUpto<freq; posUpto++) {
+ for (int posUpto = 0; posUpto < freq; posUpto++) {
final int pos = docsAndPositionsEnum.nextPosition();
final int startOffset = docsAndPositionsEnum.startOffset();
final int endOffset = docsAndPositionsEnum.endOffset();
-
+
final BytesRef payload = docsAndPositionsEnum.getPayload();
- assert !hasPositions || pos >= 0 ;
+ assert !hasPositions || pos >= 0;
addPosition(pos, startOffset, endOffset, payload);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/VectorFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/VectorFormat.java
index a7a64e1..7cce5b2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/VectorFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/VectorFormat.java
@@ -18,59 +18,56 @@
package org.apache.lucene.codecs;
import java.io.IOException;
-
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorValues;
/**
- * Encodes/decodes per-document vector and any associated indexing structures required to support nearest-neighbor search
+ * Encodes/decodes per-document vector and any associated indexing structures required to support
+ * nearest-neighbor search
*/
public abstract class VectorFormat {
/** Sole constructor */
protected VectorFormat() {}
- /**
- * Returns a {@link VectorWriter} to write the vectors to the index.
- */
+ /** Returns a {@link VectorWriter} to write the vectors to the index. */
public abstract VectorWriter fieldsWriter(SegmentWriteState state) throws IOException;
- /**
- * Returns a {@link VectorReader} to read the vectors from the index.
- */
+ /** Returns a {@link VectorReader} to read the vectors from the index. */
public abstract VectorReader fieldsReader(SegmentReadState state) throws IOException;
/**
- * EMPTY throws an exception when written. It acts as a sentinel indicating a Codec that does not support vectors.
+ * EMPTY throws an exception when written. It acts as a sentinel indicating a Codec that does not
+ * support vectors.
*/
- public static final VectorFormat EMPTY = new VectorFormat() {
- @Override
- public VectorWriter fieldsWriter(SegmentWriteState state) {
- throw new UnsupportedOperationException("Attempt to write EMPTY VectorValues: maybe you forgot to use codec=Lucene90");
- }
-
- @Override
- public VectorReader fieldsReader(SegmentReadState state) {
- return new VectorReader() {
+ public static final VectorFormat EMPTY =
+ new VectorFormat() {
@Override
- public void checkIntegrity() {
+ public VectorWriter fieldsWriter(SegmentWriteState state) {
+ throw new UnsupportedOperationException(
+ "Attempt to write EMPTY VectorValues: maybe you forgot to use codec=Lucene90");
}
@Override
- public VectorValues getVectorValues(String field) {
- return VectorValues.EMPTY;
- }
+ public VectorReader fieldsReader(SegmentReadState state) {
+ return new VectorReader() {
+ @Override
+ public void checkIntegrity() {}
- @Override
- public void close() throws IOException {
- }
+ @Override
+ public VectorValues getVectorValues(String field) {
+ return VectorValues.EMPTY;
+ }
- @Override
- public long ramBytesUsed() {
- return 0;
+ @Override
+ public void close() throws IOException {}
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
+ };
}
};
- }
- };
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/VectorReader.java b/lucene/core/src/java/org/apache/lucene/codecs/VectorReader.java
index 15a3d46..6b878ca 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/VectorReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/VectorReader.java
@@ -19,13 +19,10 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
-
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.util.Accountable;
-/**
- * Reads vectors from an index.
- */
+/** Reads vectors from an index. */
public abstract class VectorReader implements Closeable, Accountable {
/** Sole constructor */
@@ -33,9 +30,10 @@ public abstract class VectorReader implements Closeable, Accountable {
/**
* Checks consistency of this reader.
- * <p>
- * Note that this may be costly in terms of I/O, e.g.
- * may involve computing a checksum value against large data files.
+ *
+ * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
+ * against large data files.
+ *
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
@@ -44,12 +42,12 @@ public abstract class VectorReader implements Closeable, Accountable {
public abstract VectorValues getVectorValues(String field) throws IOException;
/**
- * Returns an instance optimized for merging. This instance may only be
- * consumed in the thread that called {@link #getMergeInstance()}.
- * <p>
- * The default implementation returns {@code this} */
+ * Returns an instance optimized for merging. This instance may only be consumed in the thread
+ * that called {@link #getMergeInstance()}.
+ *
+ * <p>The default implementation returns {@code this}
+ */
public VectorReader getMergeInstance() {
return this;
}
-
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
index 7b13310..d14fded 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
@@ -17,12 +17,13 @@
package org.apache.lucene.codecs;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-
import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
@@ -32,11 +33,7 @@ import org.apache.lucene.index.VectorValues;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
-
-/**
- * Writes vectors to an index.
- */
+/** Writes vectors to an index. */
public abstract class VectorWriter implements Closeable {
/** Sole constructor */
@@ -65,7 +62,8 @@ public abstract class VectorWriter implements Closeable {
finish();
}
- private void mergeVectors(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
+ private void mergeVectors(FieldInfo mergeFieldInfo, final MergeState mergeState)
+ throws IOException {
if (mergeState.infoStream.isEnabled("VV")) {
mergeState.infoStream.message("VV", "merging " + mergeState.segmentInfo);
}
@@ -78,16 +76,27 @@ public abstract class VectorWriter implements Closeable {
if (vectorReader != null) {
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
int segmentDimension = mergeFieldInfo.getVectorDimension();
- VectorValues.SearchStrategy segmentSearchStrategy = mergeFieldInfo.getVectorSearchStrategy();
+ VectorValues.SearchStrategy segmentSearchStrategy =
+ mergeFieldInfo.getVectorSearchStrategy();
if (dimension == -1) {
dimension = segmentDimension;
searchStrategy = mergeFieldInfo.getVectorSearchStrategy();
} else if (dimension != segmentDimension) {
- throw new IllegalStateException("Varying dimensions for vector-valued field " + mergeFieldInfo.name
- + ": " + dimension + "!=" + segmentDimension);
+ throw new IllegalStateException(
+ "Varying dimensions for vector-valued field "
+ + mergeFieldInfo.name
+ + ": "
+ + dimension
+ + "!="
+ + segmentDimension);
} else if (searchStrategy != segmentSearchStrategy) {
- throw new IllegalStateException("Varying search strategys for vector-valued field " + mergeFieldInfo.name
- + ": " + searchStrategy + "!=" + segmentSearchStrategy);
+ throw new IllegalStateException(
+ "Varying search strategys for vector-valued field "
+ + mergeFieldInfo.name
+ + ": "
+ + searchStrategy
+ + "!="
+ + segmentSearchStrategy);
}
VectorValues values = vectorReader.getVectorValues(mergeFieldInfo.name);
if (values != null) {
@@ -134,10 +143,12 @@ public abstract class VectorWriter implements Closeable {
}
/**
- * View over multiple VectorValues supporting iterator-style access via DocIdMerger. Maintains a reverse ordinal
- * mapping for documents having values in order to support random access by dense ordinal.
+ * View over multiple VectorValues supporting iterator-style access via DocIdMerger. Maintains a
+ * reverse ordinal mapping for documents having values in order to support random access by dense
+ * ordinal.
*/
- private static class VectorValuesMerger extends VectorValues implements RandomAccessVectorValuesProducer {
+ private static class VectorValuesMerger extends VectorValues
+ implements RandomAccessVectorValuesProducer {
private final List<VectorValuesSub> subs;
private final DocIDMerger<VectorValuesSub> docIdMerger;
private final int[] ordBase;
@@ -146,7 +157,8 @@ public abstract class VectorWriter implements Closeable {
private int docId;
private VectorValuesSub current;
- // For each doc with a vector, record its ord in the segments being merged. This enables random access into the
+ // For each doc with a vector, record its ord in the segments being merged. This enables random
+ // access into the
// unmerged segments using the ords from the merged segment.
private int[] ordMap;
private int ord;
@@ -244,7 +256,8 @@ public abstract class VectorWriter implements Closeable {
if (sub.values instanceof RandomAccessVectorValuesProducer) {
raSubs.add(((RandomAccessVectorValuesProducer) sub.values).randomAccess());
} else {
- throw new IllegalStateException("Cannot merge VectorValues without support for random access");
+ throw new IllegalStateException(
+ "Cannot merge VectorValues without support for random access");
}
}
}
@@ -272,7 +285,7 @@ public abstract class VectorWriter implements Closeable {
// get the index of the greatest lower bound
segmentOrd = -2 - segmentOrd;
}
- while(segmentOrd < ordBase.length - 1 && ordBase[segmentOrd + 1] == ordBase[segmentOrd]) {
+ while (segmentOrd < ordBase.length - 1 && ordBase[segmentOrd + 1] == ordBase[segmentOrd]) {
// forward over empty segments which will share the same ordBase
segmentOrd++;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
index bee914b..79916f8 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
@@ -24,7 +24,6 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
@@ -43,40 +42,32 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.Outputs;
-/** A block-based terms index and dictionary that assigns
- * terms to variable length blocks according to how they
- * share prefixes. The terms index is a prefix trie
- * whose leaves are term blocks. The advantage of this
- * approach is that seekExact is often able to
- * determine a term cannot exist without doing any IO, and
- * intersection with Automata is very fast. Note that this
- * terms dictionary has its own fixed terms index (ie, it
- * does not support a pluggable terms index
- * implementation).
+/**
+ * A block-based terms index and dictionary that assigns terms to variable length blocks according
+ * to how they share prefixes. The terms index is a prefix trie whose leaves are term blocks. The
+ * advantage of this approach is that seekExact is often able to determine a term cannot exist
+ * without doing any IO, and intersection with Automata is very fast. Note that this terms
+ * dictionary has its own fixed terms index (ie, it does not support a pluggable terms index
+ * implementation).
*
- * <p><b>NOTE</b>: this terms dictionary supports
- * min/maxItemsPerBlock during indexing to control how
- * much memory the terms index uses.</p>
+ * <p><b>NOTE</b>: this terms dictionary supports min/maxItemsPerBlock during indexing to control
+ * how much memory the terms index uses.
*
- * <p>The data structure used by this implementation is very
- * similar to a burst trie
- * (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499),
- * but with added logic to break up too-large blocks of all
- * terms sharing a given prefix into smaller ones.</p>
+ * <p>The data structure used by this implementation is very similar to a burst trie
+ * (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499), but with added logic to break
+ * up too-large blocks of all terms sharing a given prefix into smaller ones.
*
- * <p>Use {@link org.apache.lucene.index.CheckIndex} with the <code>-verbose</code>
- * option to see summary statistics on the blocks in the
- * dictionary.
+ * <p>Use {@link org.apache.lucene.index.CheckIndex} with the <code>-verbose</code> option to see
+ * summary statistics on the blocks in the dictionary.
*
- * See {@link BlockTreeTermsWriter}.
+ * <p>See {@link BlockTreeTermsWriter}.
*
* @lucene.experimental
*/
-
public final class BlockTreeTermsReader extends FieldsProducer {
static final Outputs<BytesRef> FST_OUTPUTS = ByteSequenceOutputs.getSingleton();
-
+
static final BytesRef NO_OUTPUT = FST_OUTPUTS.getNoOutput();
static final int OUTPUT_FLAGS_NUM_BITS = 2;
@@ -86,7 +77,8 @@ public final class BlockTreeTermsReader extends FieldsProducer {
/** Extension of terms file */
static final String TERMS_EXTENSION = "tim";
- final static String TERMS_CODEC_NAME = "BlockTreeTermsDict";
+
+ static final String TERMS_CODEC_NAME = "BlockTreeTermsDict";
/** Initial terms format. */
public static final int VERSION_START = 3;
@@ -105,45 +97,63 @@ public final class BlockTreeTermsReader extends FieldsProducer {
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tip";
- final static String TERMS_INDEX_CODEC_NAME = "BlockTreeTermsIndex";
+
+ static final String TERMS_INDEX_CODEC_NAME = "BlockTreeTermsIndex";
/** Extension of terms meta file */
static final String TERMS_META_EXTENSION = "tmd";
- final static String TERMS_META_CODEC_NAME = "BlockTreeTermsMeta";
+
+ static final String TERMS_META_CODEC_NAME = "BlockTreeTermsMeta";
// Open input to the main terms dict file (_X.tib)
final IndexInput termsIn;
// Open input to the terms index file (_X.tip)
final IndexInput indexIn;
- //private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+ // private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
// Reads the terms dict entries, to gather state to
// produce DocsEnum on demand
final PostingsReaderBase postingsReader;
- private final Map<String,FieldReader> fieldMap;
+ private final Map<String, FieldReader> fieldMap;
private final List<String> fieldList;
final String segment;
-
+
final int version;
/** Sole constructor. */
- public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException {
+ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state)
+ throws IOException {
boolean success = false;
-
+
this.postingsReader = postingsReader;
this.segment = state.segmentInfo.name;
try {
- String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
+ String termsName =
+ IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
termsIn = state.directory.openInput(termsName, state.context);
- version = CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
-
- String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
+ version =
+ CodecUtil.checkIndexHeader(
+ termsIn,
+ TERMS_CODEC_NAME,
+ VERSION_START,
+ VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+
+ String indexName =
+ IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
indexIn = state.directory.openInput(indexName, state.context);
- CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.checkIndexHeader(
+ indexIn,
+ TERMS_INDEX_CODEC_NAME,
+ version,
+ version,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
if (version < VERSION_META_FILE) {
// Have PostingsReader init itself
@@ -157,15 +167,25 @@ public final class BlockTreeTermsReader extends FieldsProducer {
}
// Read per-field details
- String metaName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_META_EXTENSION);
+ String metaName =
+ IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_META_EXTENSION);
Map<String, FieldReader> fieldMap = null;
Throwable priorE = null;
long indexLength = -1, termsLength = -1;
- try (ChecksumIndexInput metaIn = version >= VERSION_META_FILE ? state.directory.openChecksumInput(metaName, state.context) : null) {
+ try (ChecksumIndexInput metaIn =
+ version >= VERSION_META_FILE
+ ? state.directory.openChecksumInput(metaName, state.context)
+ : null) {
try {
final IndexInput indexMetaIn, termsMetaIn;
if (version >= VERSION_META_FILE) {
- CodecUtil.checkIndexHeader(metaIn, TERMS_META_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.checkIndexHeader(
+ metaIn,
+ TERMS_META_CODEC_NAME,
+ version,
+ version,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
indexMetaIn = termsMetaIn = metaIn;
postingsReader.init(metaIn, state);
} else {
@@ -184,7 +204,8 @@ public final class BlockTreeTermsReader extends FieldsProducer {
final int field = termsMetaIn.readVInt();
final long numTerms = termsMetaIn.readVLong();
if (numTerms <= 0) {
- throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsMetaIn);
+ throw new CorruptIndexException(
+ "Illegal numTerms for field number: " + field, termsMetaIn);
}
final BytesRef rootCode = readBytesRef(termsMetaIn);
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
@@ -192,30 +213,55 @@ public final class BlockTreeTermsReader extends FieldsProducer {
throw new CorruptIndexException("invalid field number: " + field, termsMetaIn);
}
final long sumTotalTermFreq = termsMetaIn.readVLong();
- // when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
- final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsMetaIn.readVLong();
+ // when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is
+ // written.
+ final long sumDocFreq =
+ fieldInfo.getIndexOptions() == IndexOptions.DOCS
+ ? sumTotalTermFreq
+ : termsMetaIn.readVLong();
final int docCount = termsMetaIn.readVInt();
if (version < VERSION_META_LONGS_REMOVED) {
final int longsSize = termsMetaIn.readVInt();
if (longsSize < 0) {
- throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsMetaIn);
+ throw new CorruptIndexException(
+ "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize,
+ termsMetaIn);
}
}
BytesRef minTerm = readBytesRef(termsMetaIn);
BytesRef maxTerm = readBytesRef(termsMetaIn);
- if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
- throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), termsMetaIn);
+ if (docCount < 0
+ || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
+ throw new CorruptIndexException(
+ "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(),
+ termsMetaIn);
}
- if (sumDocFreq < docCount) { // #postings must be >= #docs with field
- throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsMetaIn);
+ if (sumDocFreq < docCount) { // #postings must be >= #docs with field
+ throw new CorruptIndexException(
+ "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsMetaIn);
}
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
- throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsMetaIn);
+ throw new CorruptIndexException(
+ "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq,
+ termsMetaIn);
}
final long indexStartFP = indexMetaIn.readVLong();
- FieldReader previous = fieldMap.put(fieldInfo.name,
- new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
- indexStartFP, indexMetaIn, indexIn, minTerm, maxTerm));
+ FieldReader previous =
+ fieldMap.put(
+ fieldInfo.name,
+ new FieldReader(
+ this,
+ fieldInfo,
+ numTerms,
+ rootCode,
+ sumTotalTermFreq,
+ sumDocFreq,
+ docCount,
+ indexStartFP,
+ indexMetaIn,
+ indexIn,
+ minTerm,
+ maxTerm));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsMetaIn);
}
@@ -235,7 +281,8 @@ public final class BlockTreeTermsReader extends FieldsProducer {
}
}
if (version >= VERSION_META_FILE) {
- // At this point the checksum of the meta file has been verified so the lengths are likely correct
+ // At this point the checksum of the meta file has been verified so the lengths are likely
+ // correct
CodecUtil.retrieveChecksum(indexIn, indexLength);
CodecUtil.retrieveChecksum(termsIn, termsLength);
} else {
@@ -260,7 +307,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
if (numBytes < 0) {
throw new CorruptIndexException("invalid bytes length: " + numBytes, in);
}
-
+
BytesRef bytes = new BytesRef();
bytes.length = numBytes;
bytes.bytes = new byte[numBytes];
@@ -285,7 +332,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
public void close() throws IOException {
try {
IOUtils.close(indexIn, termsIn, postingsReader);
- } finally {
+ } finally {
// Clear so refs to terms index is GCable even if
// app hangs onto us:
fieldMap.clear();
@@ -327,7 +374,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
@Override
public long ramBytesUsed() {
long sizeInBytes = postingsReader.ramBytesUsed();
- for(FieldReader reader : fieldMap.values()) {
+ for (FieldReader reader : fieldMap.values()) {
sizeInBytes += reader.ramBytesUsed();
}
return sizeInBytes;
@@ -335,25 +382,31 @@ public final class BlockTreeTermsReader extends FieldsProducer {
@Override
public Collection<Accountable> getChildResources() {
- List<Accountable> resources = new ArrayList<>(Accountables.namedAccountables("field", fieldMap));
+ List<Accountable> resources =
+ new ArrayList<>(Accountables.namedAccountables("field", fieldMap));
resources.add(Accountables.namedAccountable("delegate", postingsReader));
return Collections.unmodifiableList(resources);
}
@Override
- public void checkIntegrity() throws IOException {
+ public void checkIntegrity() throws IOException {
// terms index
CodecUtil.checksumEntireFile(indexIn);
// term dictionary
CodecUtil.checksumEntireFile(termsIn);
-
+
// postings
postingsReader.checkIntegrity();
}
@Override
public String toString() {
- return getClass().getSimpleName() + "(fields=" + fieldMap.size() + ",delegate=" + postingsReader + ")";
+ return getClass().getSimpleName()
+ + "(fields="
+ + fieldMap.size()
+ + ",delegate="
+ + postingsReader
+ + ")";
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
index c05daeb..985125a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
@@ -16,13 +16,11 @@
*/
package org.apache.lucene.codecs.blocktree;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
-
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
@@ -57,7 +55,7 @@ import org.apache.lucene.util.fst.Util;
/*
TODO:
-
+
- Currently there is a one-to-one mapping of indexed
term to term block, but we could decouple the two, ie,
put more terms into the index than there are blocks.
@@ -87,108 +85,111 @@ import org.apache.lucene.util.fst.Util;
/**
* Block-based terms index and dictionary writer.
- * <p>
- * Writes terms dict and index, block-encoding (column
- * stride) each term's metadata for each set of terms
- * between two index terms.
- * <p>
*
- * Files:
+ * <p>Writes terms dict and index, block-encoding (column stride) each term's metadata for each set
+ * of terms between two index terms.
+ *
+ * <p>Files:
+ *
* <ul>
- * <li><code>.tim</code>: <a href="#Termdictionary">Term Dictionary</a></li>
- * <li><code>.tip</code>: <a href="#Termindex">Term Index</a></li>
+ * <li><code>.tim</code>: <a href="#Termdictionary">Term Dictionary</a>
+ * <li><code>.tip</code>: <a href="#Termindex">Term Index</a>
* </ul>
- * <p>
- * <a id="Termdictionary"></a>
+ *
+ * <p><a id="Termdictionary"></a>
+ *
* <h2>Term Dictionary</h2>
*
- * <p>The .tim file contains the list of terms in each
- * field along with per-term statistics (such as docfreq)
- * and per-term metadata (typically pointers to the postings list
- * for that term in the inverted index).
- * </p>
+ * <p>The .tim file contains the list of terms in each field along with per-term statistics (such as
+ * docfreq) and per-term metadata (typically pointers to the postings list for that term in the
+ * inverted index).
*
- * <p>The .tim is arranged in blocks: with blocks containing
- * a variable number of entries (by default 25-48), where
- * each entry is either a term or a reference to a
- * sub-block.</p>
+ * <p>The .tim is arranged in blocks: with blocks containing a variable number of entries (by
+ * default 25-48), where each entry is either a term or a reference to a sub-block.
*
- * <p>NOTE: The term dictionary can plug into different postings implementations:
- * the postings writer/reader are actually responsible for encoding
- * and decoding the Postings Metadata and Term Metadata sections.</p>
+ * <p>NOTE: The term dictionary can plug into different postings implementations: the postings
+ * writer/reader are actually responsible for encoding and decoding the Postings Metadata and Term
+ * Metadata sections.
*
* <ul>
- * <li>TermsDict (.tim) --> Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
- * FieldSummary, DirOffset, Footer</li>
- * <li>NodeBlock --> (OuterNode | InnerNode)</li>
- * <li>OuterNode --> EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, < TermStats ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata</i>><sup>EntryCount</sup></li>
- * <li>InnerNode --> EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, < TermStats ? ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata ? </i>><sup>EntryCount</sup></li>
- * <li>TermStats --> DocFreq, TotalTermFreq </li>
- * <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>,
- * SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm, MaxTerm><sup>NumFields</sup></li>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
- * <li>MinTerm,MaxTerm --> {@link DataOutput#writeVInt VInt} length followed by the byte[]</li>
- * <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
- * FieldNumber,RootCodeLength,DocCount,LongsSize --> {@link DataOutput#writeVInt VInt}</li>
- * <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq -->
- * {@link DataOutput#writeVLong VLong}</li>
- * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
+ * <li>TermsDict (.tim) --> Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
+ * FieldSummary, DirOffset, Footer
+ * <li>NodeBlock --> (OuterNode | InnerNode)
+ * <li>OuterNode --> EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, <
+ * TermStats ><sup>EntryCount</sup>, MetaLength,
+ * <<i>TermMetadata</i>><sup>EntryCount</sup>
+ * <li>InnerNode --> EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength,
+ * < TermStats ? ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata ?
+ * </i>><sup>EntryCount</sup>
+ * <li>TermStats --> DocFreq, TotalTermFreq
+ * <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, RootCodeLength,
+ * Byte<sup>RootCodeLength</sup>, SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm,
+ * MaxTerm><sup>NumFields</sup>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}
+ * <li>DirOffset --> {@link DataOutput#writeLong Uint64}
+ * <li>MinTerm,MaxTerm --> {@link DataOutput#writeVInt VInt} length followed by the byte[]
+ * <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
+ * FieldNumber,RootCodeLength,DocCount,LongsSize --> {@link DataOutput#writeVInt VInt}
+ * <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --> {@link DataOutput#writeVLong
+ * VLong}
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}
* </ul>
- * <p>Notes:</p>
+ *
+ * <p>Notes:
+ *
* <ul>
- * <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
- * for the BlockTree implementation.</li>
- * <li>DirOffset is a pointer to the FieldSummary section.</li>
- * <li>DocFreq is the count of documents which contain the term.</li>
- * <li>TotalTermFreq is the total number of occurrences of the term. This is encoded
- * as the difference between the total number of occurrences and the DocFreq.</li>
- * <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)</li>
- * <li>NumTerms is the number of unique terms for the field.</li>
- * <li>RootCode points to the root block for the field.</li>
- * <li>SumDocFreq is the total number of postings, the number of term-document pairs across
- * the entire field.</li>
- * <li>DocCount is the number of documents that have at least one posting for this field.</li>
- * <li>LongsSize records how many long values the postings writer/reader record per term
- * (e.g., to hold freq/prox/doc file offsets).
- * <li>MinTerm, MaxTerm are the lowest and highest term in this field.</li>
- * <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
- * these contain arbitrary per-file data (such as parameters or versioning information)
- * and per-term data (such as pointers to inverted files).</li>
- * <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points
- * to child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted </li>
+ * <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information for
+ * the BlockTree implementation.
+ * <li>DirOffset is a pointer to the FieldSummary section.
+ * <li>DocFreq is the count of documents which contain the term.
+ * <li>TotalTermFreq is the total number of occurrences of the term. This is encoded as the
+ * difference between the total number of occurrences and the DocFreq.
+ * <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)
+ * <li>NumTerms is the number of unique terms for the field.
+ * <li>RootCode points to the root block for the field.
+ * <li>SumDocFreq is the total number of postings, the number of term-document pairs across the
+ * entire field.
+ * <li>DocCount is the number of documents that have at least one posting for this field.
+ * <li>LongsSize records how many long values the postings writer/reader record per term (e.g., to
+ * hold freq/prox/doc file offsets).
+ * <li>MinTerm, MaxTerm are the lowest and highest term in this field.
+ * <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
+ * these contain arbitrary per-file data (such as parameters or versioning information) and
+ * per-term data (such as pointers to inverted files).
+ * <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points to
+ * child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted
* </ul>
+ *
* <a id="Termindex"></a>
+ *
* <h2>Term Index</h2>
- * <p>The .tip file contains an index into the term dictionary, so that it can be
- * accessed randomly. The index is also used to determine
- * when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p>
+ *
+ * <p>The .tip file contains an index into the term dictionary, so that it can be accessed randomly.
+ * The index is also used to determine when a given term cannot exist on disk (in the .tim file),
+ * saving a disk seek.
+ *
* <ul>
* <li>TermsIndex (.tip) --> Header, FSTIndex<sup>NumFields</sup>
- * <IndexStartFP><sup>NumFields</sup>, DirOffset, Footer</li>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
- * <li>IndexStartFP --> {@link DataOutput#writeVLong VLong}</li>
- * <!-- TODO: better describe FST output here -->
- * <li>FSTIndex --> {@link FST FST<byte[]>}</li>
- * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
+ * <IndexStartFP><sup>NumFields</sup>, DirOffset, Footer
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}
+ * <li>DirOffset --> {@link DataOutput#writeLong Uint64}
+ * <li>IndexStartFP --> {@link DataOutput#writeVLong VLong}
+ * <!-- TODO: better describe FST output here -->
+ * <li>FSTIndex --> {@link FST FST<byte[]>}
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}
* </ul>
- * <p>Notes:</p>
+ *
+ * <p>Notes:
+ *
* <ul>
- * <li>The .tip file contains a separate FST for each
- * field. The FST maps a term prefix to the on-disk
- * block that holds all terms starting with that
- * prefix. Each field's IndexStartFP points to its
- * FST.</li>
- * <li>DirOffset is a pointer to the start of the IndexStartFPs
- * for all fields</li>
- * <li>It's possible that an on-disk block would contain
- * too many terms (more than the allowed maximum
- * (default: 48)). When this happens, the block is
- * sub-divided into new blocks (called "floor
- * blocks"), and then the output in the FST for the
- * block's prefix encodes the leading byte of each
- * sub-block, and its file pointer.
+ * <li>The .tip file contains a separate FST for each field. The FST maps a term prefix to the
+ * on-disk block that holds all terms starting with that prefix. Each field's IndexStartFP
+ * points to its FST.
+ * <li>DirOffset is a pointer to the start of the IndexStartFPs for all fields
+ * <li>It's possible that an on-disk block would contain too many terms (more than the allowed
+ * maximum (default: 48)). When this happens, the block is sub-divided into new blocks (called
+ * "floor blocks"), and then the output in the FST for the block's prefix encodes the leading
+ * byte of each sub-block, and its file pointer.
* </ul>
*
* @see BlockTreeTermsReader
@@ -196,20 +197,22 @@ import org.apache.lucene.util.fst.Util;
*/
public final class BlockTreeTermsWriter extends FieldsConsumer {
- /** Suggested default value for the {@code
- * minItemsInBlock} parameter to {@link
- * #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
- public final static int DEFAULT_MIN_BLOCK_SIZE = 25;
+ /**
+ * Suggested default value for the {@code minItemsInBlock} parameter to {@link
+ * #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}.
+ */
+ public static final int DEFAULT_MIN_BLOCK_SIZE = 25;
- /** Suggested default value for the {@code
- * maxItemsInBlock} parameter to {@link
- * #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
- public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
+ /**
+ * Suggested default value for the {@code maxItemsInBlock} parameter to {@link
+ * #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}.
+ */
+ public static final int DEFAULT_MAX_BLOCK_SIZE = 48;
- //public static boolean DEBUG = false;
- //public static boolean DEBUG2 = false;
+ // public static boolean DEBUG = false;
+ // public static boolean DEBUG2 = false;
- //private final static boolean SAVE_DOT_FILES = false;
+ // private final static boolean SAVE_DOT_FILES = false;
private final IndexOutput metaOut;
private final IndexOutput termsOut;
@@ -223,18 +226,18 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
private final List<ByteBuffersDataOutput> fields = new ArrayList<>();
- /** Create a new writer. The number of items (terms or
- * sub-blocks) per block will aim to be between
- * minItemsPerBlock and maxItemsPerBlock, though in some
- * cases the blocks may be smaller than the min. */
- public BlockTreeTermsWriter(SegmentWriteState state,
- PostingsWriterBase postingsWriter,
- int minItemsInBlock,
- int maxItemsInBlock)
- throws IOException
- {
- validateSettings(minItemsInBlock,
- maxItemsInBlock);
+ /**
+ * Create a new writer. The number of items (terms or sub-blocks) per block will aim to be between
+ * minItemsPerBlock and maxItemsPerBlock, though in some cases the blocks may be smaller than the
+ * min.
+ */
+ public BlockTreeTermsWriter(
+ SegmentWriteState state,
+ PostingsWriterBase postingsWriter,
+ int minItemsInBlock,
+ int maxItemsInBlock)
+ throws IOException {
+ validateSettings(minItemsInBlock, maxItemsInBlock);
this.minItemsInBlock = minItemsInBlock;
this.maxItemsInBlock = maxItemsInBlock;
@@ -243,26 +246,48 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
this.fieldInfos = state.fieldInfos;
this.postingsWriter = postingsWriter;
- final String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_EXTENSION);
+ final String termsName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_EXTENSION);
termsOut = state.directory.createOutput(termsName, state.context);
boolean success = false;
IndexOutput metaOut = null, indexOut = null;
try {
- CodecUtil.writeIndexHeader(termsOut, BlockTreeTermsReader.TERMS_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
- state.segmentInfo.getId(), state.segmentSuffix);
-
- final String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_INDEX_EXTENSION);
+ CodecUtil.writeIndexHeader(
+ termsOut,
+ BlockTreeTermsReader.TERMS_CODEC_NAME,
+ BlockTreeTermsReader.VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+
+ final String indexName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name,
+ state.segmentSuffix,
+ BlockTreeTermsReader.TERMS_INDEX_EXTENSION);
indexOut = state.directory.createOutput(indexName, state.context);
- CodecUtil.writeIndexHeader(indexOut, BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
- state.segmentInfo.getId(), state.segmentSuffix);
- //segment = state.segmentInfo.name;
-
- final String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_META_EXTENSION);
+ CodecUtil.writeIndexHeader(
+ indexOut,
+ BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME,
+ BlockTreeTermsReader.VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+ // segment = state.segmentInfo.name;
+
+ final String metaName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name,
+ state.segmentSuffix,
+ BlockTreeTermsReader.TERMS_META_EXTENSION);
metaOut = state.directory.createOutput(metaName, state.context);
- CodecUtil.writeIndexHeader(metaOut, BlockTreeTermsReader.TERMS_META_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
- state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.writeIndexHeader(
+ metaOut,
+ BlockTreeTermsReader.TERMS_META_CODEC_NAME,
+ BlockTreeTermsReader.VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
- postingsWriter.init(metaOut, state); // have consumer write its format/header
+ postingsWriter.init(metaOut, state); // have consumer write its format/header
this.metaOut = metaOut;
this.indexOut = indexOut;
@@ -274,30 +299,37 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
}
- /** Throws {@code IllegalArgumentException} if any of these settings
- * is invalid. */
+ /** Throws {@code IllegalArgumentException} if any of these settings is invalid. */
public static void validateSettings(int minItemsInBlock, int maxItemsInBlock) {
if (minItemsInBlock <= 1) {
throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
}
if (minItemsInBlock > maxItemsInBlock) {
- throw new IllegalArgumentException("maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock);
+ throw new IllegalArgumentException(
+ "maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock="
+ + maxItemsInBlock
+ + " minItemsInBlock="
+ + minItemsInBlock);
}
- if (2*(minItemsInBlock-1) > maxItemsInBlock) {
- throw new IllegalArgumentException("maxItemsInBlock must be at least 2*(minItemsInBlock-1); got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock);
+ if (2 * (minItemsInBlock - 1) > maxItemsInBlock) {
+ throw new IllegalArgumentException(
+ "maxItemsInBlock must be at least 2*(minItemsInBlock-1); got maxItemsInBlock="
+ + maxItemsInBlock
+ + " minItemsInBlock="
+ + minItemsInBlock);
}
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
- //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);
+ // if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);
String lastField = null;
- for(String field : fields) {
+ for (String field : fields) {
assert lastField == null || lastField.compareTo(field) < 0;
lastField = field;
- //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field);
+ // if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field);
Terms terms = fields.terms(field);
if (terms == null) {
continue;
@@ -307,25 +339,28 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
while (true) {
BytesRef term = termsEnum.next();
- //if (DEBUG) System.out.println("BTTW: next term " + term);
+ // if (DEBUG) System.out.println("BTTW: next term " + term);
if (term == null) {
break;
}
- //if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term));
+ // if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
+ // brToString(term));
termsWriter.write(term, termsEnum, norms);
}
termsWriter.finish();
- //if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field);
+ // if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field);
}
}
-
+
static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
assert fp < (1L << 62);
- return (fp << 2) | (hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) | (isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
+ return (fp << 2)
+ | (hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
+ | (isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
}
private static class PendingEntry {
@@ -386,7 +421,13 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
public final boolean isFloor;
public final int floorLeadByte;
- public PendingBlock(BytesRef prefix, long fp, boolean hasTerms, boolean isFloor, int floorLeadByte, List<FST<BytesRef>> subIndices) {
+ public PendingBlock(
+ BytesRef prefix,
+ long fp,
+ boolean hasTerms,
+ boolean isFloor,
+ int floorLeadByte,
+ List<FST<BytesRef>> subIndices) {
super(false);
this.prefix = prefix;
this.fp = fp;
@@ -401,9 +442,14 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
return "BLOCK: prefix=" + brToString(prefix);
}
- public void compileIndex(List<PendingBlock> blocks, ByteBuffersDataOutput scratchBytes, IntsRefBuilder scratchIntsRef) throws IOException {
+ public void compileIndex(
+ List<PendingBlock> blocks,
+ ByteBuffersDataOutput scratchBytes,
+ IntsRefBuilder scratchIntsRef)
+ throws IOException {
- assert (isFloor && blocks.size() > 1) || (isFloor == false && blocks.size() == 1): "isFloor=" + isFloor + " blocks=" + blocks;
+ assert (isFloor && blocks.size() > 1) || (isFloor == false && blocks.size() == 1)
+ : "isFloor=" + isFloor + " blocks=" + blocks;
assert this == blocks.get(0);
assert scratchBytes.size() == 0;
@@ -413,13 +459,14 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// outputs sharing in the FST
scratchBytes.writeVLong(encodeOutput(fp, hasTerms, isFloor));
if (isFloor) {
- scratchBytes.writeVInt(blocks.size()-1);
- for (int i=1;i<blocks.size();i++) {
+ scratchBytes.writeVInt(blocks.size() - 1);
+ for (int i = 1; i < blocks.size(); i++) {
PendingBlock sub = blocks.get(i);
assert sub.floorLeadByte != -1;
- //if (DEBUG) {
- // System.out.println(" write floorLeadByte=" + Integer.toHexString(sub.floorLeadByte&0xff));
- //}
+ // if (DEBUG) {
+ // System.out.println(" write floorLeadByte=" +
+ // Integer.toHexString(sub.floorLeadByte&0xff));
+ // }
scratchBytes.writeByte((byte) sub.floorLeadByte);
assert sub.fp > fp;
scratchBytes.writeVLong((sub.fp - fp) << 1 | (sub.hasTerms ? 1 : 0));
@@ -427,20 +474,23 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
- final FSTCompiler<BytesRef> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).shouldShareNonSingletonNodes(false).build();
- //if (DEBUG) {
+ final FSTCompiler<BytesRef> fstCompiler =
+ new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
+ .shouldShareNonSingletonNodes(false)
+ .build();
+ // if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
- //}
- //indexBuilder.DEBUG = false;
+ // }
+ // indexBuilder.DEBUG = false;
final byte[] bytes = scratchBytes.toArrayCopy();
assert bytes.length > 0;
fstCompiler.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length));
scratchBytes.reset();
// Copy over index for all sub-blocks
- for(PendingBlock block : blocks) {
+ for (PendingBlock block : blocks) {
if (block.subIndices != null) {
- for(FST<BytesRef> subIndex : block.subIndices) {
+ for (FST<BytesRef> subIndex : block.subIndices) {
append(fstCompiler, subIndex, scratchIntsRef);
}
block.subIndices = null;
@@ -462,13 +512,16 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// TODO: maybe we could add bulk-add method to
// Builder? Takes FST and unions it w/ current
// FST.
- private void append(FSTCompiler<BytesRef> fstCompiler, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
+ private void append(
+ FSTCompiler<BytesRef> fstCompiler, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef)
+ throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
- while((indexEnt = subIndexEnum.next()) != null) {
- //if (DEBUG) {
- // System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
- //}
+ while ((indexEnt = subIndexEnum.next()) != null) {
+ // if (DEBUG) {
+ // System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output="
+ // + indexEnt.output);
+ // }
fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
@@ -509,7 +562,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
singletonCount = 0;
}
}
-
}
class TermsWriter {
@@ -544,11 +596,12 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
assert count > 0;
- //if (DEBUG2) {
+ // if (DEBUG2) {
// BytesRef br = new BytesRef(lastTerm.bytes());
// br.length = prefixLength;
- // System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count=" + count);
- //}
+ // System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
+ // + count);
+ // }
// Root block better write all remaining pending entries:
assert prefixLength > 0 || count == pending.size();
@@ -561,12 +614,12 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
boolean hasTerms = false;
boolean hasSubBlocks = false;
- int start = pending.size()-count;
+ int start = pending.size() - count;
int end = pending.size();
int nextBlockStart = start;
int nextFloorLeadLabel = -1;
- for (int i=start; i<end; i++) {
+ for (int i = start; i < end; i++) {
PendingEntry ent = pending.get(i);
@@ -578,7 +631,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// Suffix is 0, i.e. prefix 'foo' and term is
// 'foo' so the term has empty string suffix
// in this block
- assert lastSuffixLeadLabel == -1: "i=" + i + " lastSuffixLeadLabel=" + lastSuffixLeadLabel;
+ assert lastSuffixLeadLabel == -1
+ : "i=" + i + " lastSuffixLeadLabel=" + lastSuffixLeadLabel;
suffixLeadLabel = -1;
} else {
suffixLeadLabel = term.termBytes[prefixLength] & 0xff;
@@ -588,18 +642,31 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
assert block.prefix.length > prefixLength;
suffixLeadLabel = block.prefix.bytes[block.prefix.offset + prefixLength] & 0xff;
}
- // if (DEBUG) System.out.println(" i=" + i + " ent=" + ent + " suffixLeadLabel=" + suffixLeadLabel);
+ // if (DEBUG) System.out.println(" i=" + i + " ent=" + ent + " suffixLeadLabel=" +
+ // suffixLeadLabel);
if (suffixLeadLabel != lastSuffixLeadLabel) {
int itemsInBlock = i - nextBlockStart;
- if (itemsInBlock >= minItemsInBlock && end-nextBlockStart > maxItemsInBlock) {
- // The count is too large for one block, so we must break it into "floor" blocks, where we record
- // the leading label of the suffix of the first term in each floor block, so at search time we can
- // jump to the right floor block. We just use a naive greedy segmenter here: make a new floor
- // block as soon as we have at least minItemsInBlock. This is not always best: it often produces
+ if (itemsInBlock >= minItemsInBlock && end - nextBlockStart > maxItemsInBlock) {
+ // The count is too large for one block, so we must break it into "floor" blocks, where
+ // we record
+ // the leading label of the suffix of the first term in each floor block, so at search
+ // time we can
+ // jump to the right floor block. We just use a naive greedy segmenter here: make a new
+ // floor
+ // block as soon as we have at least minItemsInBlock. This is not always best: it often
+ // produces
// a too-small block as the final block:
boolean isFloor = itemsInBlock < count;
- newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasSubBlocks));
+ newBlocks.add(
+ writeBlock(
+ prefixLength,
+ isFloor,
+ nextFloorLeadLabel,
+ nextBlockStart,
+ i,
+ hasTerms,
+ hasSubBlocks));
hasTerms = false;
hasSubBlocks = false;
@@ -621,7 +688,15 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
if (nextBlockStart < end) {
int itemsInBlock = end - nextBlockStart;
boolean isFloor = itemsInBlock < count;
- newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasSubBlocks));
+ newBlocks.add(
+ writeBlock(
+ prefixLength,
+ isFloor,
+ nextFloorLeadLabel,
+ nextBlockStart,
+ end,
+ hasTerms,
+ hasSubBlocks));
}
assert newBlocks.isEmpty() == false;
@@ -633,7 +708,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
firstBlock.compileIndex(newBlocks, scratchBytes, scratchIntsRef);
// Remove slice from the top of the pending stack, that we just wrote:
- pending.subList(pending.size()-count, pending.size()).clear();
+ pending.subList(pending.size() - count, pending.size()).clear();
// Append new block
pending.add(firstBlock);
@@ -651,13 +726,21 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
return true;
}
- /** Writes the specified slice (start is inclusive, end is exclusive)
- * from pending stack as a new block. If isFloor is true, there
- * were too many (more than maxItemsInBlock) entries sharing the
- * same prefix, and so we broke it into multiple floor blocks where
- * we record the starting label of the suffix of each floor block. */
- private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end,
- boolean hasTerms, boolean hasSubBlocks) throws IOException {
+ /**
+ * Writes the specified slice (start is inclusive, end is exclusive) from pending stack as a new
+ * block. If isFloor is true, there were too many (more than maxItemsInBlock) entries sharing
+ * the same prefix, and so we broke it into multiple floor blocks where we record the starting
+ * label of the suffix of each floor block.
+ */
+ private PendingBlock writeBlock(
+ int prefixLength,
+ boolean isFloor,
+ int floorLeadLabel,
+ int start,
+ int end,
+ boolean hasTerms,
+ boolean hasSubBlocks)
+ throws IOException {
assert end > start;
@@ -669,7 +752,10 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
System.arraycopy(lastTerm.get().bytes, 0, prefix.bytes, 0, prefixLength);
prefix.length = prefixLength;
- //if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" + brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
+ // if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
+ // brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
+ // pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
+ // " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
// Write block header:
int numEntries = end - start;
@@ -693,7 +779,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// compact format in this case:
boolean isLeafBlock = hasSubBlocks == false;
- //System.out.println(" isLeaf=" + isLeafBlock);
+ // System.out.println(" isLeaf=" + isLeafBlock);
final List<FST<BytesRef>> subIndices;
@@ -702,22 +788,23 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
if (isLeafBlock) {
// Block contains only ordinary terms:
subIndices = null;
- StatsWriter statsWriter = new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS);
- for (int i=start;i<end;i++) {
+ StatsWriter statsWriter =
+ new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS);
+ for (int i = start; i < end; i++) {
PendingEntry ent = pending.get(i);
- assert ent.isTerm: "i=" + i;
+ assert ent.isTerm : "i=" + i;
PendingTerm term = (PendingTerm) ent;
- assert StringHelper.startsWith(term.termBytes, prefix): term + " prefix=" + prefix;
+ assert StringHelper.startsWith(term.termBytes, prefix) : term + " prefix=" + prefix;
BlockTermState state = term.state;
final int suffix = term.termBytes.length - prefixLength;
- //if (DEBUG2) {
+ // if (DEBUG2) {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
- //}
+ // }
// For leaf block we write suffix straight
suffixLengthsWriter.writeVInt(suffix);
@@ -735,21 +822,22 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
} else {
// Block has at least one prefix term or a sub block:
subIndices = new ArrayList<>();
- StatsWriter statsWriter = new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS);
- for (int i=start;i<end;i++) {
+ StatsWriter statsWriter =
+ new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS);
+ for (int i = start; i < end; i++) {
PendingEntry ent = pending.get(i);
if (ent.isTerm) {
PendingTerm term = (PendingTerm) ent;
- assert StringHelper.startsWith(term.termBytes, prefix): term + " prefix=" + prefix;
+ assert StringHelper.startsWith(term.termBytes, prefix) : term + " prefix=" + prefix;
BlockTermState state = term.state;
final int suffix = term.termBytes.length - prefixLength;
- //if (DEBUG2) {
+ // if (DEBUG2) {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
- //}
+ // }
// For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block, and 1 bit to record if
@@ -783,17 +871,23 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block:f
- suffixLengthsWriter.writeVInt((suffix<<1)|1);
+ suffixLengthsWriter.writeVInt((suffix << 1) | 1);
suffixWriter.append(block.prefix.bytes, prefixLength, suffix);
- //if (DEBUG2) {
+ // if (DEBUG2) {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
- // System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
- //}
-
- assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel: "floorLeadLabel=" + floorLeadLabel + " suffixLead=" + (block.prefix.bytes[prefixLength] & 0xff);
+ // System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
+ // subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
+ // }
+
+ assert floorLeadLabel == -1
+ || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel
+ : "floorLeadLabel="
+ + floorLeadLabel
+ + " suffixLead="
+ + (block.prefix.bytes[prefixLength] & 0xff);
assert block.fp < startFP;
suffixLengthsWriter.writeVLong(startFP - block.fp);
@@ -805,18 +899,24 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
assert subIndices.size() != 0;
}
- // Write suffixes byte[] blob to terms dict output, either uncompressed, compressed with LZ4 or with LowercaseAsciiCompression.
+ // Write suffixes byte[] blob to terms dict output, either uncompressed, compressed with LZ4
+ // or with LowercaseAsciiCompression.
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
- // If there are 2 suffix bytes or less per term, then we don't bother compressing as suffix are unlikely what
- // makes the terms dictionary large, and it also tends to be frequently the case for dense IDs like
+ // If there are 2 suffix bytes or less per term, then we don't bother compressing as suffix
+ // are unlikely what
+ // makes the terms dictionary large, and it also tends to be frequently the case for dense IDs
+ // like
// auto-increment IDs, so not compressing in that case helps not hurt ID lookups by too much.
- // We also only start compressing when the prefix length is greater than 2 since blocks whose prefix length is
+ // We also only start compressing when the prefix length is greater than 2 since blocks whose
+ // prefix length is
// 1 or 2 always all get visited when running a fuzzy query whose max number of edits is 2.
if (suffixWriter.length() > 2L * numEntries && prefixLength > 2) {
- // LZ4 inserts references whenever it sees duplicate strings of 4 chars or more, so only try it out if the
+ // LZ4 inserts references whenever it sees duplicate strings of 4 chars or more, so only try
+ // it out if the
// average suffix length is greater than 6.
if (suffixWriter.length() > 6L * numEntries) {
- LZ4.compress(suffixWriter.bytes(), 0, suffixWriter.length(), spareWriter, compressionHashTable);
+ LZ4.compress(
+ suffixWriter.bytes(), 0, suffixWriter.length(), spareWriter, compressionHashTable);
if (spareWriter.size() < suffixWriter.length() - (suffixWriter.length() >>> 2)) {
// LZ4 saved more than 25%, go for it
compressionAlg = CompressionAlgorithm.LZ4;
@@ -827,7 +927,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
if (spareBytes.length < suffixWriter.length()) {
spareBytes = new byte[ArrayUtil.oversize(suffixWriter.length(), 1)];
}
- if (LowercaseAsciiCompression.compress(suffixWriter.bytes(), suffixWriter.length(), spareBytes, spareWriter)) {
+ if (LowercaseAsciiCompression.compress(
+ suffixWriter.bytes(), suffixWriter.length(), spareBytes, spareWriter)) {
compressionAlg = CompressionAlgorithm.LOWERCASE_ASCII;
}
}
@@ -889,7 +990,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
docsSeen = new FixedBitSet(maxDoc);
postingsWriter.setField(fieldInfo);
}
-
+
/** Writes one term's worth of postings. */
public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException {
/*
@@ -904,12 +1005,15 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
if (state != null) {
assert state.docFreq != 0;
- assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq: "postingsWriter=" + postingsWriter;
+ assert fieldInfo.getIndexOptions() == IndexOptions.DOCS
+ || state.totalTermFreq >= state.docFreq
+ : "postingsWriter=" + postingsWriter;
pushTerm(text);
-
+
PendingTerm term = new PendingTerm(text, state);
pending.add(term);
- //if (DEBUG) System.out.println(" add pending term = " + text + " pending.size()=" + pending.size());
+ // if (DEBUG) System.out.println(" add pending term = " + text + " pending.size()=" +
+ // pending.size());
sumDocFreq += state.docFreq;
sumTotalTermFreq += state.totalTermFreq;
@@ -924,7 +1028,14 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
/** Pushes the new term to the top of the stack, and writes new blocks. */
private void pushTerm(BytesRef text) throws IOException {
// Find common prefix between last term and current term:
- int prefixLength = Arrays.mismatch(lastTerm.bytes(), 0, lastTerm.length(), text.bytes, text.offset, text.offset + text.length);
+ int prefixLength =
+ Arrays.mismatch(
+ lastTerm.bytes(),
+ 0,
+ lastTerm.length(),
+ text.bytes,
+ text.offset,
+ text.offset + text.length);
if (prefixLength == -1) { // Only happens for the first term, if it is empty
assert lastTerm.length() == 0;
prefixLength = 0;
@@ -933,15 +1044,16 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// if (DEBUG) System.out.println(" shared=" + pos + " lastTerm.length=" + lastTerm.length);
// Close the "abandoned" suffix now:
- for(int i=lastTerm.length()-1;i>=prefixLength;i--) {
+ for (int i = lastTerm.length() - 1; i >= prefixLength; i--) {
// How many items on top of the stack share the current suffix
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
- // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " minItemsInBlock=" + minItemsInBlock);
- writeBlocks(i+1, prefixTopSize);
- prefixStarts[i] -= prefixTopSize-1;
+ // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
+ // minItemsInBlock=" + minItemsInBlock);
+ writeBlocks(i + 1, prefixTopSize);
+ prefixStarts[i] -= prefixTopSize - 1;
}
}
@@ -950,7 +1062,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
// Init new tail:
- for(int i=prefixLength;i<text.length;i++) {
+ for (int i = prefixLength; i < text.length; i++) {
prefixStarts[i] = pending.size();
}
@@ -960,7 +1072,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// Finishes all terms in this field
public void finish() throws IOException {
if (numTerms > 0) {
- // if (DEBUG) System.out.println("BTTW: finish prefixStarts=" + Arrays.toString(prefixStarts));
+ // if (DEBUG) System.out.println("BTTW: finish prefixStarts=" +
+ // Arrays.toString(prefixStarts));
// Add empty term to force closing of all final blocks:
pushTerm(new BytesRef());
@@ -972,7 +1085,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
writeBlocks(0, pending.size());
// We better have one final "root" block:
- assert pending.size() == 1 && !pending.get(0).isTerm: "pending.size()=" + pending.size() + " pending=" + pending;
+ assert pending.size() == 1 && !pending.get(0).isTerm
+ : "pending.size()=" + pending.size() + " pending=" + pending;
final PendingBlock root = (PendingBlock) pending.get(0);
assert root.prefix.length == 0;
final BytesRef rootCode = root.index.getEmptyOutput();
@@ -996,7 +1110,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
metaOut.writeVLong(indexOut.getFilePointer());
// Write FST to index
root.index.save(metaOut, indexOut);
- //System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name);
+ // System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name);
/*
if (DEBUG) {
@@ -1009,23 +1123,26 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
*/
} else {
- assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1;
+ assert sumTotalTermFreq == 0
+ || fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1;
assert sumDocFreq == 0;
assert docsSeen.cardinality() == 0;
}
}
- private final ByteBuffersDataOutput suffixLengthsWriter = ByteBuffersDataOutput.newResettableInstance();
+ private final ByteBuffersDataOutput suffixLengthsWriter =
+ ByteBuffersDataOutput.newResettableInstance();
private final BytesRefBuilder suffixWriter = new BytesRefBuilder();
private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput spareWriter = ByteBuffersDataOutput.newResettableInstance();
private byte[] spareBytes = BytesRef.EMPTY_BYTES;
- private final LZ4.HighCompressionHashTable compressionHashTable = new LZ4.HighCompressionHashTable();
+ private final LZ4.HighCompressionHashTable compressionHashTable =
+ new LZ4.HighCompressionHashTable();
}
private boolean closed;
-
+
@Override
public void close() throws IOException {
if (closed) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/CompressionAlgorithm.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/CompressionAlgorithm.java
index a98a3ca..1cfcda2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/CompressionAlgorithm.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/CompressionAlgorithm.java
@@ -17,22 +17,17 @@
package org.apache.lucene.codecs.blocktree;
import java.io.IOException;
-
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
-/**
- * Compression algorithm used for suffixes of a block of terms.
- */
+/** Compression algorithm used for suffixes of a block of terms. */
enum CompressionAlgorithm {
-
NO_COMPRESSION(0x00) {
@Override
void read(DataInput in, byte[] out, int len) throws IOException {
in.readBytes(out, 0, len);
}
-
},
LOWERCASE_ASCII(0x01) {
@@ -41,7 +36,6 @@ enum CompressionAlgorithm {
void read(DataInput in, byte[] out, int len) throws IOException {
LowercaseAsciiCompression.decompress(in, out, len);
}
-
},
LZ4(0x02) {
@@ -50,19 +44,17 @@ enum CompressionAlgorithm {
void read(DataInput in, byte[] out, int len) throws IOException {
org.apache.lucene.util.compress.LZ4.decompress(in, len, out, 0);
}
-
};
private static final CompressionAlgorithm[] BY_CODE = new CompressionAlgorithm[3];
+
static {
for (CompressionAlgorithm alg : CompressionAlgorithm.values()) {
BY_CODE[alg.code] = alg;
}
}
- /**
- * Look up a {@link CompressionAlgorithm} by its {@link CompressionAlgorithm#code}.
- */
+ /** Look up a {@link CompressionAlgorithm} by its {@link CompressionAlgorithm#code}. */
static final CompressionAlgorithm byCode(int code) {
if (code < 0 || code >= BY_CODE.length) {
throw new IllegalArgumentException("Illegal code for a compression algorithm: " + code);
@@ -77,5 +69,4 @@ enum CompressionAlgorithm {
}
abstract void read(DataInput in, byte[] out, int len) throws IOException;
-
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
index 748fbbb..55d86df 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
@@ -19,7 +19,6 @@ package org.apache.lucene.codecs.blocktree;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
-
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Terms;
@@ -37,6 +36,7 @@ import org.apache.lucene.util.fst.OffHeapFSTStore;
/**
* BlockTree's implementation of {@link Terms}.
+ *
* @lucene.internal
*/
public final class FieldReader extends Terms implements Accountable {
@@ -45,7 +45,7 @@ public final class FieldReader extends Terms implements Accountable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(FieldReader.class)
- + 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
+ + 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
final long numTerms;
final FieldInfo fieldInfo;
@@ -59,13 +59,25 @@ public final class FieldReader extends Terms implements Accountable {
final BlockTreeTermsReader parent;
final FST<BytesRef> index;
- //private boolean DEBUG;
-
- FieldReader(BlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
- long indexStartFP, IndexInput metaIn, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
+ // private boolean DEBUG;
+
+ FieldReader(
+ BlockTreeTermsReader parent,
+ FieldInfo fieldInfo,
+ long numTerms,
+ BytesRef rootCode,
+ long sumTotalTermFreq,
+ long sumDocFreq,
+ int docCount,
+ long indexStartFP,
+ IndexInput metaIn,
+ IndexInput indexIn,
+ BytesRef minTerm,
+ BytesRef maxTerm)
+ throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
- //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
+ // DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
this.parent = parent;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
@@ -75,9 +87,12 @@ public final class FieldReader extends Terms implements Accountable {
this.minTerm = minTerm;
this.maxTerm = maxTerm;
// if (DEBUG) {
- // System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
+ // System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode="
+ // + rootCode + " divisor=" + indexDivisor);
// }
- rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
+ rootBlockFP =
+ (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong()
+ >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
// Initialize FST always off-heap.
final IndexInput clone = indexIn.clone();
clone.seek(indexStartFP);
@@ -87,14 +102,14 @@ public final class FieldReader extends Terms implements Accountable {
index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
}
/*
- if (false) {
- final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
- Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
- Util.toDot(index, w, false, false);
- System.out.println("FST INDEX: SAVED to " + dotFileName);
- w.close();
- }
- */
+ if (false) {
+ final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
+ Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
+ Util.toDot(index, w, false, false);
+ System.out.println("FST INDEX: SAVED to " + dotFileName);
+ w.close();
+ }
+ */
}
@Override
@@ -117,7 +132,7 @@ public final class FieldReader extends Terms implements Accountable {
}
}
- /** For debugging -- used by CheckIndex too*/
+ /** For debugging -- used by CheckIndex too */
@Override
public Stats getStats() throws IOException {
return new SegmentTermsEnum(this).computeBlockStats();
@@ -130,14 +145,17 @@ public final class FieldReader extends Terms implements Accountable {
@Override
public boolean hasOffsets() {
- return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ return fieldInfo
+ .getIndexOptions()
+ .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0;
}
@Override
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
-
+
@Override
public boolean hasPayloads() {
return fieldInfo.hasPayloads();
@@ -170,19 +188,21 @@ public final class FieldReader extends Terms implements Accountable {
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
- // if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" + BlockTreeTermsWriter.brToString(startTerm));
- //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
+ // if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
+ // BlockTreeTermsWriter.brToString(startTerm));
+ // System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...?
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
- return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
+ return new IntersectTermsEnum(
+ this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
}
-
+
@Override
public long ramBytesUsed() {
- return BASE_RAM_BYTES_USED + ((index!=null)? index.ramBytesUsed() : 0);
+ return BASE_RAM_BYTES_USED + ((index != null) ? index.ramBytesUsed() : 0);
}
@Override
@@ -196,6 +216,16 @@ public final class FieldReader extends Terms implements Accountable {
@Override
public String toString() {
- return "BlockTreeTerms(seg=" + parent.segment +" terms=" + numTerms + ",postings=" + sumDocFreq + ",positions=" + sumTotalTermFreq + ",docs=" + docCount + ")";
+ return "BlockTreeTerms(seg="
+ + parent.segment
+ + " terms="
+ + numTerms
+ + ",postings="
+ + sumDocFreq
+ + ",positions="
+ + sumTotalTermFreq
+ + ",docs="
+ + docCount
+ + ")";
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
index 4e09548..5404a63 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
@@ -16,9 +16,7 @@
*/
package org.apache.lucene.codecs.blocktree;
-
import java.io.IOException;
-
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
@@ -36,24 +34,24 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
-/** This is used to implement efficient {@link Terms#intersect} for
- * block-tree. Note that it cannot seek, except for the initial term on
- * init. It just "nexts" through the intersection of the automaton and
- * the terms. It does not use the terms index at all: on init, it
- * loads the root block, and scans its way to the initial term.
- * Likewise, in next it scans until it finds a term that matches the
- * current automaton transition. */
-
+/**
+ * This is used to implement efficient {@link Terms#intersect} for block-tree. Note that it cannot
+ * seek, except for the initial term on init. It just "nexts" through the intersection of the
+ * automaton and the terms. It does not use the terms index at all: on init, it loads the root
+ * block, and scans its way to the initial term. Likewise, in next it scans until it finds a term
+ * that matches the current automaton transition.
+ */
final class IntersectTermsEnum extends BaseTermsEnum {
- //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+ // static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
final IndexInput in;
- final static Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
+ static final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
IntersectTermsEnumFrame[] stack;
-
- @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
final RunAutomaton runAutomaton;
final Automaton automaton;
@@ -72,7 +70,13 @@ final class IntersectTermsEnum extends BaseTermsEnum {
// TODO: in some cases we can filter by length? eg
// regexp foo*bar must be at least length 6 bytes
- public IntersectTermsEnum(FieldReader fr, Automaton automaton, RunAutomaton runAutomaton, BytesRef commonSuffix, BytesRef startTerm) throws IOException {
+ public IntersectTermsEnum(
+ FieldReader fr,
+ Automaton automaton,
+ RunAutomaton runAutomaton,
+ BytesRef commonSuffix,
+ BytesRef startTerm)
+ throws IOException {
this.fr = fr;
assert automaton != null;
@@ -84,14 +88,13 @@ final class IntersectTermsEnum extends BaseTermsEnum {
in = fr.parent.termsIn.clone();
stack = new IntersectTermsEnumFrame[5];
- for(int idx=0;idx<stack.length;idx++) {
+ for (int idx = 0; idx < stack.length; idx++) {
stack[idx] = new IntersectTermsEnumFrame(this, idx);
}
- for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
+ for (int arcIdx = 0; arcIdx < arcs.length; arcIdx++) {
arcs[arcIdx] = new FST.Arc<>();
}
-
fstReader = fr.index.getBytesReader();
// TODO: if the automaton is "smallish" we really
@@ -138,9 +141,11 @@ final class IntersectTermsEnum extends BaseTermsEnum {
private IntersectTermsEnumFrame getFrame(int ord) throws IOException {
if (ord >= stack.length) {
- final IntersectTermsEnumFrame[] next = new IntersectTermsEnumFrame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ final IntersectTermsEnumFrame[] next =
+ new IntersectTermsEnumFrame
+ [ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(stack, 0, next, 0, stack.length);
- for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
+ for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
next[stackOrd] = new IntersectTermsEnumFrame(this, stackOrd);
}
stack = next;
@@ -151,10 +156,11 @@ final class IntersectTermsEnum extends BaseTermsEnum {
private FST.Arc<BytesRef> getArc(int ord) {
if (ord >= arcs.length) {
- @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<BytesRef>[] next =
- new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ final FST.Arc<BytesRef>[] next =
+ new FST.Arc[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(arcs, 0, next, 0, arcs.length);
- for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
+ for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) {
next[arcOrd] = new FST.Arc<>();
}
arcs = next;
@@ -165,8 +171,8 @@ final class IntersectTermsEnum extends BaseTermsEnum {
private IntersectTermsEnumFrame pushFrame(int state) throws IOException {
assert currentFrame != null;
- final IntersectTermsEnumFrame f = getFrame(currentFrame == null ? 0 : 1+currentFrame.ord);
-
+ final IntersectTermsEnumFrame f = getFrame(currentFrame == null ? 0 : 1 + currentFrame.ord);
+
f.fp = f.fpOrig = currentFrame.lastSubFP;
f.prefix = currentFrame.prefix + currentFrame.suffix;
f.setState(state);
@@ -184,7 +190,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
// TODO: we could be more efficient for the next()
// case by using current arc as starting point,
// passed to findTargetArc
- arc = fr.index.findTargetArc(target, arc, getArc(1+idx), fstReader);
+ arc = fr.index.findTargetArc(target, arc, getArc(1 + idx), fstReader);
assert arc != null;
output = fstOutputs.add(output, arc.output());
idx++;
@@ -228,8 +234,10 @@ final class IntersectTermsEnum extends BaseTermsEnum {
private int getState() {
int state = currentFrame.state;
- for(int idx=0;idx<currentFrame.suffix;idx++) {
- state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
+ for (int idx = 0; idx < currentFrame.suffix; idx++) {
+ state =
+ runAutomaton.step(
+ state, currentFrame.suffixBytes[currentFrame.startBytePos + idx] & 0xff);
assert state != -1;
}
return state;
@@ -247,7 +255,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
FST.Arc<BytesRef> arc = arcs[0];
assert arc == currentFrame.arc;
- for(int idx=0;idx<=target.length;idx++) {
+ for (int idx = 0; idx <= target.length; idx++) {
while (true) {
final int savNextEnt = currentFrame.nextEnt;
@@ -264,7 +272,12 @@ final class IntersectTermsEnum extends BaseTermsEnum {
if (term.bytes.length < term.length) {
term.bytes = ArrayUtil.grow(term.bytes, term.length);
}
- System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+ System.arraycopy(
+ currentFrame.suffixBytes,
+ currentFrame.startBytePos,
+ term.bytes,
+ currentFrame.prefix,
+ currentFrame.suffix);
if (isSubBlock && StringHelper.startsWith(target, term)) {
// Recurse
@@ -297,7 +310,12 @@ final class IntersectTermsEnum extends BaseTermsEnum {
currentFrame.suffixesReader.setPosition(savePos);
currentFrame.suffixLengthsReader.setPosition(saveLengthPos);
currentFrame.termState.termBlockOrd = saveTermBlockOrd;
- System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+ System.arraycopy(
+ currentFrame.suffixBytes,
+ currentFrame.startBytePos,
+ term.bytes,
+ currentFrame.prefix,
+ currentFrame.suffix);
term.length = currentFrame.prefix + currentFrame.suffix;
// If the last entry was a block we don't
// need to bother recursing and pushing to
@@ -324,7 +342,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
throw NoMoreTermsException.INSTANCE;
}
final long lastFP = currentFrame.fpOrig;
- currentFrame = stack[currentFrame.ord-1];
+ currentFrame = stack[currentFrame.ord - 1];
currentTransition = currentFrame.transition;
assert currentFrame.lastSubFP == lastFP;
}
@@ -339,14 +357,13 @@ final class IntersectTermsEnum extends BaseTermsEnum {
// Only used internally when there are no more terms in next():
public static final NoMoreTermsException INSTANCE = new NoMoreTermsException();
- private NoMoreTermsException() {
- }
+ private NoMoreTermsException() {}
@Override
public Throwable fillInStackTrace() {
// Do nothing:
return this;
- }
+ }
}
@Override
@@ -365,7 +382,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
boolean isSubBlock = popPushNext();
nextTerm:
-
while (true) {
assert currentFrame.transition == currentTransition;
@@ -402,7 +418,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
// Advance where we are in the automaton to match this label:
while (label > currentTransition.max) {
- if (currentFrame.transitionIndex >= currentFrame.transitionCount-1) {
+ if (currentFrame.transitionIndex >= currentFrame.transitionCount - 1) {
// Pop this frame: no further matches are possible because
// we've moved beyond what the max transition will allow
if (currentFrame.ord == 0) {
@@ -410,7 +426,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
currentFrame = null;
return null;
}
- currentFrame = stack[currentFrame.ord-1];
+ currentFrame = stack[currentFrame.ord - 1];
currentTransition = currentFrame.transition;
isSubBlock = popPushNext();
continue nextTerm;
@@ -492,7 +508,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
state = currentTransition.dest;
int end = currentFrame.startBytePos + currentFrame.suffix;
- for (int idx=currentFrame.startBytePos+1;idx<end;idx++) {
+ for (int idx = currentFrame.startBytePos + 1; idx < end; idx++) {
lastState = state;
state = runAutomaton.step(state, suffixBytes[idx] & 0xff);
if (state == -1) {
@@ -514,7 +530,8 @@ final class IntersectTermsEnum extends BaseTermsEnum {
currentFrame.lastState = lastState;
} else if (runAutomaton.isAccept(state)) {
copyTerm();
- assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
+ assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
+ : "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
return term;
} else {
// This term is a prefix of a term accepted by the automaton, but is not itself accepted
@@ -542,7 +559,12 @@ final class IntersectTermsEnum extends BaseTermsEnum {
if (term.bytes.length < len) {
term.bytes = ArrayUtil.grow(term.bytes, len);
}
- System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+ System.arraycopy(
+ currentFrame.suffixBytes,
+ currentFrame.startBytePos,
+ term.bytes,
+ currentFrame.prefix,
+ currentFrame.suffix);
term.length = len;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
index b4e5821..63265e4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
@@ -16,10 +16,8 @@
*/
package org.apache.lucene.codecs.blocktree;
-
import java.io.IOException;
import java.util.Arrays;
-
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexOptions;
@@ -78,7 +76,7 @@ final class IntersectTermsEnumFrame {
int numFollowFloorBlocks;
int nextFloorLabel;
-
+
final Transition transition = new Transition();
int transitionIndex;
int transitionCount;
@@ -117,7 +115,7 @@ final class IntersectTermsEnumFrame {
}
void loadNextFloorBlock() throws IOException {
- assert numFollowFloorBlocks > 0: "nextFloorLabel=" + nextFloorLabel;
+ assert numFollowFloorBlocks > 0 : "nextFloorLabel=" + nextFloorLabel;
do {
fp = fpOrig + (floorDataReader.readVLong() >>> 1);
@@ -144,7 +142,8 @@ final class IntersectTermsEnumFrame {
// Must set min to -1 so the "label < min" check never falsely triggers:
transition.min = -1;
- // Must set max to -1 so we immediately realize we need to step to the next transition and then pop this frame:
+ // Must set max to -1 so we immediately realize we need to step to the next transition and
+ // then pop this frame:
transition.max = -1;
}
}
@@ -164,7 +163,7 @@ final class IntersectTermsEnumFrame {
// first block in case it has empty suffix:
if (ite.runAutomaton.isAccept(state) == false && transitionCount != 0) {
// Maybe skip floor blocks:
- assert transitionIndex == 0: "transitionIndex=" + transitionIndex;
+ assert transitionIndex == 0 : "transitionIndex=" + transitionIndex;
while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
fp = fpOrig + (floorDataReader.readVLong() >>> 1);
numFollowFloorBlocks--;
@@ -265,7 +264,8 @@ final class IntersectTermsEnumFrame {
}
public void nextLeaf() {
- assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+ assert nextEnt != -1 && nextEnt < entCount
+ : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
suffix = suffixLengthsReader.readVInt();
startBytePos = suffixesReader.getPosition();
@@ -273,7 +273,8 @@ final class IntersectTermsEnumFrame {
}
public boolean nextNonLeaf() {
- assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+ assert nextEnt != -1 && nextEnt < entCount
+ : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
final int code = suffixLengthsReader.readVInt();
suffix = code >>> 1;
@@ -336,12 +337,12 @@ final class IntersectTermsEnumFrame {
}
} else {
termState.docFreq = statsReader.readVInt();
- //if (DEBUG) System.out.println(" dF=" + state.docFreq);
+ // if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
termState.totalTermFreq = termState.docFreq; // all postings have freq=1
} else {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
- //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
+ // if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
}
// metadata
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
index 587aaeb..b583478 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
@@ -16,10 +16,8 @@
*/
package org.apache.lucene.codecs.blocktree;
-
import java.io.IOException;
import java.io.PrintStream;
-
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
@@ -35,7 +33,6 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
/** Iterates through terms in this field. */
-
final class SegmentTermsEnum extends BaseTermsEnum {
// Lazy init:
@@ -49,11 +46,12 @@ final class SegmentTermsEnum extends BaseTermsEnum {
private int targetBeforeCurrentLength;
- //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+ // static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
- // What prefix of the current term was present in the index; when we only next() through the index, this stays at 0. It's only set when
+ // What prefix of the current term was present in the index; when we only next() through the
+ // index, this stays at 0. It's only set when
// we seekCeil/Exact:
private int validIndexPrefix;
@@ -63,7 +61,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
final BytesRefBuilder term = new BytesRefBuilder();
private final FST.BytesReader fstReader;
- @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
public SegmentTermsEnum(FieldReader fr) throws IOException {
this.fr = fr;
@@ -72,7 +71,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// System.out.println("BTTR.init seg=" + fr.parent.segment);
// }
stack = new SegmentTermsEnumFrame[0];
-
+
// Used to hold seek by TermState, or cached seek
staticFrame = new SegmentTermsEnumFrame(this, -1);
@@ -84,7 +83,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// Init w/ root block; don't use index since it may
// not (and need not) have been loaded
- for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
+ for (int arcIdx = 0; arcIdx < arcs.length; arcIdx++) {
arcs[arcIdx] = new FST.Arc<>();
}
@@ -97,18 +96,18 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} else {
arc = null;
}
- //currentFrame = pushFrame(arc, rootCode, 0);
- //currentFrame.loadBlock();
+ // currentFrame = pushFrame(arc, rootCode, 0);
+ // currentFrame.loadBlock();
validIndexPrefix = 0;
// if (DEBUG) {
// System.out.println("init frame state " + currentFrame.ord);
// printSeekState();
// }
- //System.out.println();
+ // System.out.println();
// computeBlockStats().print(System.out);
}
-
+
// Not private to avoid synthetic access$NNN methods
void initIndexInput() {
if (this.in == null) {
@@ -116,15 +115,14 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
}
- /** Runs next() through the entire terms dict,
- * computing aggregate statistics. */
+ /** Runs next() through the entire terms dict, computing aggregate statistics. */
public Stats computeBlockStats() throws IOException {
Stats stats = new Stats(fr.parent.segment, fr.fieldInfo.name);
if (fr.index != null) {
stats.indexNumBytes = fr.index.ramBytesUsed();
}
-
+
currentFrame = staticFrame;
FST.Arc<BytesRef> arc;
if (fr.index != null) {
@@ -160,7 +158,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
break allTerms;
}
final long lastFP = currentFrame.fpOrig;
- currentFrame = stack[currentFrame.ord-1];
+ currentFrame = stack[currentFrame.ord - 1];
assert lastFP == currentFrame.lastSubFP;
// if (DEBUG) {
// System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
@@ -168,7 +166,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
}
- while(true) {
+ while (true) {
if (currentFrame.next()) {
// Push to new block:
currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length());
@@ -207,9 +205,11 @@ final class SegmentTermsEnum extends BaseTermsEnum {
private SegmentTermsEnumFrame getFrame(int ord) throws IOException {
if (ord >= stack.length) {
- final SegmentTermsEnumFrame[] next = new SegmentTermsEnumFrame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ final SegmentTermsEnumFrame[] next =
+ new SegmentTermsEnumFrame
+ [ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(stack, 0, next, 0, stack.length);
- for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
+ for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
next[stackOrd] = new SegmentTermsEnumFrame(this, stackOrd);
}
stack = next;
@@ -220,10 +220,11 @@ final class SegmentTermsEnum extends BaseTermsEnum {
private FST.Arc<BytesRef> getArc(int ord) {
if (ord >= arcs.length) {
- @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<BytesRef>[] next =
- new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ final FST.Arc<BytesRef>[] next =
+ new FST.Arc[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(arcs, 0, next, 0, arcs.length);
- for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
+ for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) {
next[arcOrd] = new FST.Arc<>();
}
arcs = next;
@@ -232,11 +233,12 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// Pushes a frame we seek'd to
- SegmentTermsEnumFrame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length) throws IOException {
+ SegmentTermsEnumFrame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length)
+ throws IOException {
scratchReader.reset(frameData.bytes, frameData.offset, frameData.length);
final long code = scratchReader.readVLong();
final long fpSeek = code >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
- final SegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
+ final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.hasTerms = (code & BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS) != 0;
f.hasTermsOrig = f.hasTerms;
f.isFloor = (code & BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0;
@@ -251,11 +253,14 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// Pushes next'd frame or seek'd frame; we later
// lazy-load the frame only when needed
SegmentTermsEnumFrame pushFrame(FST.Arc<BytesRef> arc, long fp, int length) throws IOException {
- final SegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
+ final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) {
- //if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + term.length + " vs prefix=" + f.prefix);
- //if (f.prefix > targetBeforeCurrentLength) {
+ // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
+ // isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
+ // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
+ // term.length + " vs prefix=" + f.prefix);
+ // if (f.prefix > targetBeforeCurrentLength) {
if (f.ord > targetBeforeCurrentLength) {
f.rewind();
} else {
@@ -273,7 +278,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// final int sav = term.length;
// term.length = length;
- // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
+ // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
+ // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// term.length = sav;
// }
}
@@ -322,7 +328,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
if (fr.size() > 0 && (target.compareTo(fr.getMin()) < 0 || target.compareTo(fr.getMax()) > 0)) {
- return false;
+ return false;
}
term.grow(1 + target.length);
@@ -330,7 +336,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert clearEOF();
// if (DEBUG) {
- // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
+ // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
+ // fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
+ // + termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@@ -357,7 +365,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert arc.isFinal();
output = arc.output();
targetUpto = 0;
-
+
SegmentTermsEnumFrame lastFrame = stack[0];
assert validIndexPrefix <= term.length();
@@ -370,20 +378,27 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// First compare up to valid seek frames:
while (targetUpto < targetLimit) {
- cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+ cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
- // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
+ // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit
+ // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
+ // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
+ // + " output=" + output);
// }
if (cmp != 0) {
break;
}
- arc = arcs[1+targetUpto];
- assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+ arc = arcs[1 + targetUpto];
+ assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF)
+ : "arc.label="
+ + (char) arc.label()
+ + " targetLabel="
+ + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) {
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
}
if (arc.isFinal()) {
- lastFrame = stack[1+lastFrame.ord];
+ lastFrame = stack[1 + lastFrame.ord];
}
targetUpto++;
}
@@ -397,9 +412,12 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// equal or after the current term
final int targetLimit2 = Math.min(target.length, term.length());
while (targetUpto < targetLimit2) {
- cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+ cmp =
+ (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
- // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+ // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" +
+ // targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset +
+ // targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
// }
if (cmp != 0) {
break;
@@ -418,7 +436,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// term, ie, app is seeking multiple terms
// in sorted order
// if (DEBUG) {
- // System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord);
+ // System.out.println(" target is after current (shares prefixLen=" + targetUpto + ");
+ // frame.ord=" + lastFrame.ord);
// }
currentFrame = lastFrame;
@@ -429,7 +448,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// (so we scan from the start)
targetBeforeCurrentLength = lastFrame.ord;
// if (DEBUG) {
- // System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
+ // System.out.println(" target is before current (shares prefixLen=" + targetUpto + ");
+ // rewind frame ord=" + lastFrame.ord);
// }
currentFrame = lastFrame;
currentFrame.rewind();
@@ -446,9 +466,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// System.out.println(" target is same as current but term doesn't exist");
// }
}
- //validIndexPrefix = currentFrame.depth;
- //term.length = target.length;
- //return termExists;
+ // validIndexPrefix = currentFrame.depth;
+ // term.length = target.length;
+ // return termExists;
}
} else {
@@ -468,38 +488,44 @@ final class SegmentTermsEnum extends BaseTermsEnum {
currentFrame = staticFrame;
- //term.length = 0;
+ // term.length = 0;
targetUpto = 0;
- currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
+ currentFrame =
+ pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
}
// if (DEBUG) {
- // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
+ // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
+ // currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
+ // targetBeforeCurrentLength);
// }
- // We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
+ // We are done sharing the common prefix with the incoming target and where we are currently
+ // seek'd; now continue walking the index:
while (targetUpto < target.length) {
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
- final FST.Arc<BytesRef> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
+ final FST.Arc<BytesRef> nextArc =
+ fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader);
if (nextArc == null) {
// Index is exhausted
// if (DEBUG) {
- // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
+ // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " +
+ // toHex(targetLabel));
// }
-
+
validIndexPrefix = currentFrame.prefix;
- //validIndexPrefix = targetUpto;
+ // validIndexPrefix = targetUpto;
currentFrame.scanToFloorFrame(target);
if (!currentFrame.hasTerms) {
termExists = false;
term.setByteAt(targetUpto, (byte) targetLabel);
- term.setLength(1+targetUpto);
+ term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// }
@@ -508,7 +534,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
currentFrame.loadBlock();
- final SeekStatus result = currentFrame.scanToTerm(target, true);
+ final SeekStatus result = currentFrame.scanToTerm(target, true);
if (result == SeekStatus.FOUND) {
// if (DEBUG) {
// System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
@@ -516,7 +542,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return true;
} else {
// if (DEBUG) {
- // System.out.println(" got " + result + "; return NOT_FOUND term=" + brToString(term));
+ // System.out.println(" got " + result + "; return NOT_FOUND term=" +
+ // brToString(term));
// }
return false;
}
@@ -531,19 +558,25 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
- // System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+ // System.out.println(" index: follow label=" + toHex(target.bytes[target.offset +
+ // targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
// }
targetUpto++;
if (arc.isFinal()) {
- //if (DEBUG) System.out.println(" arc is final!");
- currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
- //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+ // if (DEBUG) System.out.println(" arc is final!");
+ currentFrame =
+ pushFrame(
+ arc,
+ BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()),
+ targetUpto);
+ // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
+ // currentFrame.hasTerms);
}
}
}
- //validIndexPrefix = targetUpto;
+ // validIndexPrefix = targetUpto;
validIndexPrefix = currentFrame.prefix;
currentFrame.scanToFloorFrame(target);
@@ -560,7 +593,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
currentFrame.loadBlock();
- final SeekStatus result = currentFrame.scanToTerm(target, true);
+ final SeekStatus result = currentFrame.scanToTerm(target, true);
if (result == SeekStatus.FOUND) {
// if (DEBUG) {
// System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
@@ -568,7 +601,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return true;
} else {
// if (DEBUG) {
- // System.out.println(" got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
+ // System.out.println(" got result " + result + "; return NOT_FOUND term=" +
+ // term.utf8ToString());
// }
return false;
@@ -587,7 +621,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert clearEOF();
// if (DEBUG) {
- // System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
+ // System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
+ // fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
+ // + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(System.out);
// }
@@ -606,15 +642,15 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// seeks to foobaz, we can re-use the seek state
// for the first 5 bytes.
- //if (DEBUG) {
- //System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
- //}
+ // if (DEBUG) {
+ // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
+ // }
arc = arcs[0];
assert arc.isFinal();
output = arc.output();
targetUpto = 0;
-
+
SegmentTermsEnumFrame lastFrame = stack[0];
assert validIndexPrefix <= term.length();
@@ -627,15 +663,22 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// First compare up to valid seek frames:
while (targetUpto < targetLimit) {
- cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
- //if (DEBUG) {
- //System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + " output=" + output);
- //}
+ cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
+ // if (DEBUG) {
+ // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
+ // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
+ // vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
+ // " output=" + output);
+ // }
if (cmp != 0) {
break;
}
- arc = arcs[1+targetUpto];
- assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+ arc = arcs[1 + targetUpto];
+ assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF)
+ : "arc.label="
+ + (char) arc.label()
+ + " targetLabel="
+ + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
// TODO: we could save the outputs in local
// byte[][] instead of making new objs ever
// seek; but, often the FST doesn't have any
@@ -645,22 +688,24 @@ final class SegmentTermsEnum extends BaseTermsEnum {
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
}
if (arc.isFinal()) {
- lastFrame = stack[1+lastFrame.ord];
+ lastFrame = stack[1 + lastFrame.ord];
}
targetUpto++;
}
-
if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but
// don't save arc/output/frame:
final int targetLimit2 = Math.min(target.length, term.length());
while (targetUpto < targetLimit2) {
- cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
- //if (DEBUG) {
- //System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")");
- //}
+ cmp =
+ (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
+ // if (DEBUG) {
+ // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit
+ // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto])
+ // + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")");
+ // }
if (cmp != 0) {
break;
}
@@ -677,9 +722,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// Common case: target term is after current
// term, ie, app is seeking multiple terms
// in sorted order
- //if (DEBUG) {
- //System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); clear frame.scanned ord=" + lastFrame.ord);
- //}
+ // if (DEBUG) {
+ // System.out.println(" target is after current (shares prefixLen=" + targetUpto + ");
+ // clear frame.scanned ord=" + lastFrame.ord);
+ // }
currentFrame = lastFrame;
} else if (cmp > 0) {
@@ -688,23 +734,24 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// keep the currentFrame but we must rewind it
// (so we scan from the start)
targetBeforeCurrentLength = 0;
- //if (DEBUG) {
- //System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
- //}
+ // if (DEBUG) {
+ // System.out.println(" target is before current (shares prefixLen=" + targetUpto + ");
+ // rewind frame ord=" + lastFrame.ord);
+ // }
currentFrame = lastFrame;
currentFrame.rewind();
} else {
// Target is exactly the same as current term
assert term.length() == target.length;
if (termExists) {
- //if (DEBUG) {
- //System.out.println(" target is same as current; return FOUND");
- //}
+ // if (DEBUG) {
+ // System.out.println(" target is same as current; return FOUND");
+ // }
return SeekStatus.FOUND;
} else {
- //if (DEBUG) {
- //System.out.println(" target is same as current but term doesn't exist");
- //}
+ // if (DEBUG) {
+ // System.out.println(" target is same as current but term doesn't exist");
+ // }
}
}
@@ -717,65 +764,71 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert arc.isFinal();
assert arc.output() != null;
- //if (DEBUG) {
- //System.out.println(" no seek state; push root frame");
- //}
+ // if (DEBUG) {
+ // System.out.println(" no seek state; push root frame");
+ // }
output = arc.output();
currentFrame = staticFrame;
- //term.length = 0;
+ // term.length = 0;
targetUpto = 0;
- currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
+ currentFrame =
+ pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
}
- //if (DEBUG) {
- //System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
- //}
+ // if (DEBUG) {
+ // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
+ // currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
+ // targetBeforeCurrentLength);
+ // }
- // We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
+ // We are done sharing the common prefix with the incoming target and where we are currently
+ // seek'd; now continue walking the index:
while (targetUpto < target.length) {
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
- final FST.Arc<BytesRef> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
+ final FST.Arc<BytesRef> nextArc =
+ fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader);
if (nextArc == null) {
// Index is exhausted
// if (DEBUG) {
- // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + targetLabel);
+ // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " +
+ // targetLabel);
// }
-
+
validIndexPrefix = currentFrame.prefix;
- //validIndexPrefix = targetUpto;
+ // validIndexPrefix = targetUpto;
currentFrame.scanToFloorFrame(target);
currentFrame.loadBlock();
- //if (DEBUG) System.out.println(" now scanToTerm");
+ // if (DEBUG) System.out.println(" now scanToTerm");
final SeekStatus result = currentFrame.scanToTerm(target, false);
if (result == SeekStatus.END) {
term.copyBytes(target);
termExists = false;
if (next() != null) {
- //if (DEBUG) {
- //System.out.println(" return NOT_FOUND term=" + brToString(term));
- //}
+ // if (DEBUG) {
+ // System.out.println(" return NOT_FOUND term=" + brToString(term));
+ // }
return SeekStatus.NOT_FOUND;
} else {
- //if (DEBUG) {
- //System.out.println(" return END");
- //}
+ // if (DEBUG) {
+ // System.out.println(" return END");
+ // }
return SeekStatus.END;
}
} else {
- //if (DEBUG) {
- //System.out.println(" return " + result + " term=" + brToString(term));
- //}
+ // if (DEBUG) {
+ // System.out.println(" return " + result + " term=" + brToString(term));
+ // }
return result;
}
} else {
@@ -788,20 +841,26 @@ final class SegmentTermsEnum extends BaseTermsEnum {
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
}
- //if (DEBUG) {
- //System.out.println(" index: follow label=" + (target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
- //}
+ // if (DEBUG) {
+ // System.out.println(" index: follow label=" + (target.bytes[target.offset +
+ // targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+ // }
targetUpto++;
if (arc.isFinal()) {
- //if (DEBUG) System.out.println(" arc is final!");
- currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
- //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+ // if (DEBUG) System.out.println(" arc is final!");
+ currentFrame =
+ pushFrame(
+ arc,
+ BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()),
+ targetUpto);
+ // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
+ // currentFrame.hasTerms);
}
}
}
- //validIndexPrefix = targetUpto;
+ // validIndexPrefix = targetUpto;
validIndexPrefix = currentFrame.prefix;
currentFrame.scanToFloorFrame(target);
@@ -814,14 +873,14 @@ final class SegmentTermsEnum extends BaseTermsEnum {
term.copyBytes(target);
termExists = false;
if (next() != null) {
- //if (DEBUG) {
- //System.out.println(" return NOT_FOUND term=" + term.get().utf8ToString() + " " + term);
- //}
+ // if (DEBUG) {
+ // System.out.println(" return NOT_FOUND term=" + term.get().utf8ToString() + " " + term);
+ // }
return SeekStatus.NOT_FOUND;
} else {
- //if (DEBUG) {
- //System.out.println(" return END");
- //}
+ // if (DEBUG) {
+ // System.out.println(" return END");
+ // }
return SeekStatus.END;
}
} else {
@@ -837,19 +896,79 @@ final class SegmentTermsEnum extends BaseTermsEnum {
out.println(" prior seek state:");
int ord = 0;
boolean isSeekFrame = true;
- while(true) {
+ while (true) {
SegmentTermsEnumFrame f = getFrame(ord);
assert f != null;
final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefix);
if (f.nextEnt == -1) {
- out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<< BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_ [...]
+ out.println(
+ " frame "
+ + (isSeekFrame ? "(seek)" : "(next)")
+ + " ord="
+ + ord
+ + " fp="
+ + f.fp
+ + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
+ + " prefixLen="
+ + f.prefix
+ + " prefix="
+ + prefix
+ + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+ + " hasTerms="
+ + f.hasTerms
+ + " isFloor="
+ + f.isFloor
+ + " code="
+ + ((f.fp << BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
+ + (f.hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
+ + (f.isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
+ + " isLastInFloor="
+ + f.isLastInFloor
+ + " mdUpto="
+ + f.metaDataUpto
+ + " tbOrd="
+ + f.getTermBlockOrd());
} else {
- out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<< BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS:0) + (f.i [...]
+ out.println(
+ " frame "
+ + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)")
+ + " ord="
+ + ord
+ + " fp="
+ + f.fp
+ + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
+ + " prefixLen="
+ + f.prefix
+ + " prefix="
+ + prefix
+ + " nextEnt="
+ + f.nextEnt
+ + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+ + " hasTerms="
+ + f.hasTerms
+ + " isFloor="
+ + f.isFloor
+ + " code="
+ + ((f.fp << BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
+ + (f.hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
+ + (f.isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
+ + " lastSubFP="
+ + f.lastSubFP
+ + " isLastInFloor="
+ + f.isLastInFloor
+ + " mdUpto="
+ + f.metaDataUpto
+ + " tbOrd="
+ + f.getTermBlockOrd());
}
if (fr.index != null) {
- assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
- if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix-1)&0xFF)) {
- out.println(" broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
+ assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
+ if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) {
+ out.println(
+ " broken seek state: arc.label="
+ + (char) f.arc.label()
+ + " vs term byte="
+ + (char) (term.byteAt(f.prefix - 1) & 0xFF));
throw new RuntimeException("seek state is broken");
}
BytesRef output = Util.get(fr.index, prefix);
@@ -857,11 +976,19 @@ final class SegmentTermsEnum extends BaseTermsEnum {
out.println(" broken seek state: prefix is not final in index");
throw new RuntimeException("seek state is broken");
} else if (isSeekFrame && !f.isFloor) {
- final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset, output.length);
+ final ByteArrayDataInput reader =
+ new ByteArrayDataInput(output.bytes, output.offset, output.length);
final long codeOrig = reader.readVLong();
- final long code = (f.fp << BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS:0) | (f.isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR:0);
+ final long code =
+ (f.fp << BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
+ | (f.hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
+ | (f.isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
if (codeOrig != code) {
- out.println(" broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
+ out.println(
+ " broken seek state: output code="
+ + codeOrig
+ + " doesn't match frame code="
+ + code);
throw new RuntimeException("seek state is broken");
}
}
@@ -878,8 +1005,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
/* Decodes only the term bytes of the next term. If caller then asks for
- metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
- decode all metadata up to the current term. */
+ metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
+ decode all metadata up to the current term. */
@Override
public BytesRef next() throws IOException {
if (in == null) {
@@ -900,7 +1027,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
- // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + " termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
+ // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
+ // termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
+ // currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@@ -911,7 +1040,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// docFreq, etc. But, if they then call next(),
// this method catches up all internal state so next()
// works properly:
- //if (DEBUG) System.out.println(" re-seek to pending term=" + term.utf8ToString() + " " + term);
+ // if (DEBUG) System.out.println(" re-seek to pending term=" + term.utf8ToString() + " " +
+ // term);
final boolean result = seekExact(term.get());
assert result;
}
@@ -923,9 +1053,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
currentFrame.loadNextFloorBlock();
break;
} else {
- //if (DEBUG) System.out.println(" pop frame");
+ // if (DEBUG) System.out.println(" pop frame");
if (currentFrame.ord == 0) {
- //if (DEBUG) System.out.println(" return null");
+ // if (DEBUG) System.out.println(" return null");
assert setEOF();
term.clear();
validIndexPrefix = 0;
@@ -934,7 +1064,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return null;
}
final long lastFP = currentFrame.fpOrig;
- currentFrame = stack[currentFrame.ord-1];
+ currentFrame = stack[currentFrame.ord - 1];
if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) {
// We popped into a frame that's not loaded
@@ -947,23 +1077,24 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// Note that the seek state (last seek) has been
// invalidated beyond this depth
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
- //if (DEBUG) {
- //System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
- //}
+ // if (DEBUG) {
+ // System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
+ // }
}
}
- while(true) {
+ while (true) {
if (currentFrame.next()) {
// Push to new block:
- //if (DEBUG) System.out.println(" push frame");
+ // if (DEBUG) System.out.println(" push frame");
currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length());
// This is a "next" frame -- even if it's
// floor'd we must pretend it isn't so we don't
// try to scan to the right floor frame:
currentFrame.loadBlock();
} else {
- //if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord=" + currentFrame.ord);
+ // if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
+ // + currentFrame.ord);
return term.get();
}
}
@@ -978,9 +1109,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
@Override
public int docFreq() throws IOException {
assert !eof;
- //if (DEBUG) System.out.println("BTR.docFreq");
+ // if (DEBUG) System.out.println("BTR.docFreq");
currentFrame.decodeMetaData();
- //if (DEBUG) System.out.println(" return " + currentFrame.state.docFreq);
+ // if (DEBUG) System.out.println(" return " + currentFrame.state.docFreq);
return currentFrame.state.docFreq;
}
@@ -994,33 +1125,34 @@ final class SegmentTermsEnum extends BaseTermsEnum {
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
assert !eof;
- //if (DEBUG) {
- //System.out.println("BTTR.docs seg=" + segment);
- //}
+ // if (DEBUG) {
+ // System.out.println("BTTR.docs seg=" + segment);
+ // }
currentFrame.decodeMetaData();
- //if (DEBUG) {
- //System.out.println(" state=" + currentFrame.state);
- //}
+ // if (DEBUG) {
+ // System.out.println(" state=" + currentFrame.state);
+ // }
return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, reuse, flags);
}
@Override
public ImpactsEnum impacts(int flags) throws IOException {
assert !eof;
- //if (DEBUG) {
- //System.out.println("BTTR.docs seg=" + segment);
- //}
+ // if (DEBUG) {
+ // System.out.println("BTTR.docs seg=" + segment);
+ // }
currentFrame.decodeMetaData();
- //if (DEBUG) {
- //System.out.println(" state=" + currentFrame.state);
- //}
+ // if (DEBUG) {
+ // System.out.println(" state=" + currentFrame.state);
+ // }
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags);
}
@Override
public void seekExact(BytesRef target, TermState otherState) {
// if (DEBUG) {
- // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
+ // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" +
+ // target.utf8ToString() + " " + target + " state=" + otherState);
// }
assert clearEOF();
if (target.compareTo(term.get()) != 0 || !termExists) {
@@ -1037,13 +1169,13 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// }
}
}
-
+
@Override
public TermState termState() throws IOException {
assert !eof;
currentFrame.decodeMetaData();
TermState ts = currentFrame.state.clone();
- //if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
+ // if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
return ts;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java
index 19c321c..eb91b04 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java
@@ -16,10 +16,8 @@
*/
package org.apache.lucene.codecs.blocktree;
-
import java.io.IOException;
import java.util.Arrays;
-
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexOptions;
@@ -39,7 +37,7 @@ final class SegmentTermsEnumFrame {
FST.Arc<BytesRef> arc;
- //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+ // static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
// File pointer where this block was loaded from
long fp;
@@ -118,13 +116,15 @@ final class SegmentTermsEnumFrame {
if (numBytes > floorData.length) {
floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
}
- System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
+ System.arraycopy(source.bytes, source.offset + in.getPosition(), floorData, 0, numBytes);
floorDataReader.reset(floorData, 0, numBytes);
numFollowFloorBlocks = floorDataReader.readVInt();
nextFloorLabel = floorDataReader.readByte() & 0xff;
- //if (DEBUG) {
- //System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
- //}
+ // if (DEBUG) {
+ // System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
+ // BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks="
+ // + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
+ // }
}
public int getTermBlockOrd() {
@@ -132,24 +132,24 @@ final class SegmentTermsEnumFrame {
}
void loadNextFloorBlock() throws IOException {
- //if (DEBUG) {
- //System.out.println(" loadNextFloorBlock fp=" + fp + " fpEnd=" + fpEnd);
- //}
- assert arc == null || isFloor: "arc=" + arc + " isFloor=" + isFloor;
+ // if (DEBUG) {
+ // System.out.println(" loadNextFloorBlock fp=" + fp + " fpEnd=" + fpEnd);
+ // }
+ assert arc == null || isFloor : "arc=" + arc + " isFloor=" + isFloor;
fp = fpEnd;
nextEnt = -1;
loadBlock();
}
/* Does initial decode of next block of terms; this
- doesn't actually decode the docFreq, totalTermFreq,
- postings details (frq/prx offset, etc.) metadata;
- it just loads them as byte[] blobs which are then
- decoded on-demand if the metadata is ever requested
- for any term in this block. This enables terms-only
- intensive consumes (eg certain MTQs, respelling) to
- not pay the price of decoding metadata they won't
- use. */
+ doesn't actually decode the docFreq, totalTermFreq,
+ postings details (frq/prx offset, etc.) metadata;
+ it just loads them as byte[] blobs which are then
+ decoded on-demand if the metadata is ever requested
+ for any term in this block. This enables terms-only
+ intensive consumes (eg certain MTQs, respelling) to
+ not pay the price of decoding metadata they won't
+ use. */
void loadBlock() throws IOException {
// Clone the IndexInput lazily, so that consumers
@@ -161,7 +161,7 @@ final class SegmentTermsEnumFrame {
// Already loaded
return;
}
- //System.out.println("blc=" + blockLoadCount);
+ // System.out.println("blc=" + blockLoadCount);
ste.in.seek(fp);
int code = ste.in.readVInt();
@@ -169,7 +169,8 @@ final class SegmentTermsEnumFrame {
assert entCount > 0;
isLastInFloor = (code & 1) != 0;
- assert arc == null || (isLastInFloor || isFloor): "fp=" + fp + " arc=" + arc + " isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor;
+ assert arc == null || (isLastInFloor || isFloor)
+ : "fp=" + fp + " arc=" + arc + " isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor;
// TODO: if suffixes were stored in random-access
// array structure, then we could do binary search
@@ -218,12 +219,12 @@ final class SegmentTermsEnumFrame {
totalSuffixBytes = ste.in.getFilePointer() - startSuffixFP;
/*if (DEBUG) {
- if (arc == null) {
- System.out.println(" loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
- } else {
- System.out.println(" loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
- }
- }*/
+ if (arc == null) {
+ System.out.println(" loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
+ } else {
+ System.out.println(" loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
+ }
+ }*/
// stats
int numBytes = ste.in.readVInt();
@@ -316,8 +317,10 @@ final class SegmentTermsEnumFrame {
}
public void nextLeaf() {
- //if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
- assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+ // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
+ // entCount=" + entCount);
+ assert nextEnt != -1 && nextEnt < entCount
+ : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
suffix = suffixLengthsReader.readVInt();
startBytePos = suffixesReader.getPosition();
@@ -328,10 +331,12 @@ final class SegmentTermsEnumFrame {
}
public boolean nextNonLeaf() throws IOException {
- //if (DEBUG) System.out.println(" stef.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + suffixesReader.getPosition());
+ // if (DEBUG) System.out.println(" stef.next ord=" + ord + " nextEnt=" + nextEnt + " entCount="
+ // + entCount + " fp=" + suffixesReader.getPosition());
while (true) {
if (nextEnt == entCount) {
- assert arc == null || (isFloor && isLastInFloor == false): "isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor;
+ assert arc == null || (isFloor && isLastInFloor == false)
+ : "isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor;
loadNextFloorBlock();
if (isLeafBlock) {
nextLeaf();
@@ -340,8 +345,9 @@ final class SegmentTermsEnumFrame {
continue;
}
}
-
- assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+
+ assert nextEnt != -1 && nextEnt < entCount
+ : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
final int code = suffixLengthsReader.readVInt();
suffix = code >>> 1;
@@ -360,14 +366,14 @@ final class SegmentTermsEnumFrame {
ste.termExists = false;
subCode = suffixLengthsReader.readVLong();
lastSubFP = fp - subCode;
- //if (DEBUG) {
- //System.out.println(" lastSubFP=" + lastSubFP);
- //}
+ // if (DEBUG) {
+ // System.out.println(" lastSubFP=" + lastSubFP);
+ // }
return true;
}
}
}
-
+
// TODO: make this array'd so we can do bin search?
// likely not worth it? need to measure how many
// floor blocks we "typically" get
@@ -375,7 +381,8 @@ final class SegmentTermsEnumFrame {
if (!isFloor || target.length <= prefix) {
// if (DEBUG) {
- // System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + target.length + " vs prefix=" + prefix);
+ // System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" +
+ // target.length + " vs prefix=" + prefix);
// }
return;
}
@@ -383,7 +390,9 @@ final class SegmentTermsEnumFrame {
final int targetLabel = target.bytes[target.offset + prefix] & 0xFF;
// if (DEBUG) {
- // System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + toHex(targetLabel) + " vs nextFloorLabel=" + toHex(nextFloorLabel) + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+ // System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" +
+ // toHex(targetLabel) + " vs nextFloorLabel=" + toHex(nextFloorLabel) + " numFollowFloorBlocks="
+ // + numFollowFloorBlocks);
// }
if (targetLabel < nextFloorLabel) {
@@ -401,16 +410,18 @@ final class SegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
- // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
+ // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
+ // hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
-
+
isLastInFloor = numFollowFloorBlocks == 1;
numFollowFloorBlocks--;
if (isLastInFloor) {
nextFloorLabel = 256;
// if (DEBUG) {
- // System.out.println(" stop! last block nextFloorLabel=" + toHex(nextFloorLabel));
+ // System.out.println(" stop! last block nextFloorLabel=" +
+ // toHex(nextFloorLabel));
// }
break;
} else {
@@ -437,10 +448,11 @@ final class SegmentTermsEnumFrame {
// }
}
}
-
+
public void decodeMetaData() throws IOException {
- //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
+ // if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" +
+ // metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
// lazily catch up on metadata decode:
final int limit = getTermBlockOrd();
@@ -482,12 +494,12 @@ final class SegmentTermsEnumFrame {
} else {
assert statsSingletonRunLength == 0;
state.docFreq = statsReader.readVInt();
- //if (DEBUG) System.out.println(" dF=" + state.docFreq);
+ // if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
state.totalTermFreq = state.docFreq; // all postings have freq=1
} else {
state.totalTermFreq = state.docFreq + statsReader.readVLong();
- //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
+ // if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
}
@@ -502,7 +514,7 @@ final class SegmentTermsEnumFrame {
// Used only by assert
private boolean prefixMatches(BytesRef target) {
- for(int bytePos=0;bytePos<prefix;bytePos++) {
+ for (int bytePos = 0; bytePos < prefix; bytePos++) {
if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) {
return false;
}
@@ -516,16 +528,17 @@ final class SegmentTermsEnumFrame {
// startBytePos/suffix as a side effect
public void scanToSubBlock(long subFP) {
assert !isLeafBlock;
- //if (DEBUG) System.out.println(" scanToSubBlock fp=" + fp + " subFP=" + subFP + " entCount=" + entCount + " lastSubFP=" + lastSubFP);
- //assert nextEnt == 0;
+ // if (DEBUG) System.out.println(" scanToSubBlock fp=" + fp + " subFP=" + subFP + " entCount="
+ // + entCount + " lastSubFP=" + lastSubFP);
+ // assert nextEnt == 0;
if (lastSubFP == subFP) {
- //if (DEBUG) System.out.println(" already positioned");
+ // if (DEBUG) System.out.println(" already positioned");
return;
}
assert subFP < fp : "fp=" + fp + " subFP=" + subFP;
final long targetSubCode = fp - subFP;
- //if (DEBUG) System.out.println(" targetSubCode=" + targetSubCode);
- while(true) {
+ // if (DEBUG) System.out.println(" targetSubCode=" + targetSubCode);
+ while (true) {
assert nextEnt < entCount;
nextEnt++;
final int code = suffixLengthsReader.readVInt();
@@ -533,7 +546,7 @@ final class SegmentTermsEnumFrame {
if ((code & 1) != 0) {
final long subCode = suffixLengthsReader.readVLong();
if (targetSubCode == subCode) {
- //if (DEBUG) System.out.println(" match!");
+ // if (DEBUG) System.out.println(" match!");
lastSubFP = subFP;
return;
}
@@ -572,7 +585,9 @@ final class SegmentTermsEnumFrame {
// scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
- // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
+ // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
+ // nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
+ // brToString(term));
assert nextEnt != -1;
@@ -601,16 +616,22 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.bytes = suffixBytes;
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
- // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
+ // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
+ // + brToString(suffixBytesRef));
// }
startBytePos = suffixesReader.getPosition();
suffixesReader.skipBytes(suffix);
// Loop over bytes in the suffix, comparing to the target
- final int cmp = Arrays.compareUnsigned(
- suffixBytes, startBytePos, startBytePos + suffix,
- target.bytes, target.offset + prefix, target.offset + target.length);
+ final int cmp =
+ Arrays.compareUnsigned(
+ suffixBytes,
+ startBytePos,
+ startBytePos + suffix,
+ target.bytes,
+ target.offset + prefix,
+ target.offset + target.length);
if (cmp < 0) {
// Current entry is still before the target;
@@ -620,7 +641,7 @@ final class SegmentTermsEnumFrame {
// return NOT_FOUND:
fillTerm();
- //if (DEBUG) System.out.println(" not found");
+ // if (DEBUG) System.out.println(" not found");
return SeekStatus.NOT_FOUND;
} else {
// Exact match!
@@ -631,7 +652,7 @@ final class SegmentTermsEnumFrame {
assert ste.termExists;
fillTerm();
- //if (DEBUG) System.out.println(" found!");
+ // if (DEBUG) System.out.println(" found!");
return SeekStatus.FOUND;
}
} while (nextEnt < entCount);
@@ -645,7 +666,7 @@ final class SegmentTermsEnumFrame {
// to the foo* block, but the last term in this block
// was fooz (and, eg, first term in the next block will
// bee fop).
- //if (DEBUG) System.out.println(" block end");
+ // if (DEBUG) System.out.println(" block end");
if (exactOnly) {
fillTerm();
}
@@ -660,7 +681,9 @@ final class SegmentTermsEnumFrame {
// scan the entries check if the suffix matches.
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
- //if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(target));
+ // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
+ // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
+ // brToString(target));
assert nextEnt != -1;
@@ -675,20 +698,21 @@ final class SegmentTermsEnumFrame {
assert prefixMatches(target);
// Loop over each entry (term or sub-block) in this block:
- while(nextEnt < entCount) {
+ while (nextEnt < entCount) {
nextEnt++;
final int code = suffixLengthsReader.readVInt();
suffix = code >>> 1;
- //if (DEBUG) {
+ // if (DEBUG) {
// BytesRef suffixBytesRef = new BytesRef();
// suffixBytesRef.bytes = suffixBytes;
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
- // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
- //}
+ // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
+ // (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
+ // }
final int termLen = prefix + suffix;
startBytePos = suffixesReader.getPosition();
@@ -702,9 +726,14 @@ final class SegmentTermsEnumFrame {
lastSubFP = fp - subCode;
}
- final int cmp = Arrays.compareUnsigned(
- suffixBytes, startBytePos, startBytePos + suffix,
- target.bytes, target.offset + prefix, target.offset + target.length);
+ final int cmp =
+ Arrays.compareUnsigned(
+ suffixBytes,
+ startBytePos,
+ startBytePos + suffix,
+ target.bytes,
+ target.offset + prefix,
+ target.offset + target.length);
if (cmp < 0) {
// Current entry is still before the target;
@@ -714,11 +743,12 @@ final class SegmentTermsEnumFrame {
// return NOT_FOUND:
fillTerm();
- //if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " ste.termExists=" + ste.termExists);
+ // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
+ // ste.termExists=" + ste.termExists);
if (!exactOnly && !ste.termExists) {
- //System.out.println(" now pushFrame");
- // TODO this
+ // System.out.println(" now pushFrame");
+ // TODO this
// We are on a sub-block, and caller wants
// us to position to the next term after
// the target, so we must recurse into the
@@ -731,7 +761,7 @@ final class SegmentTermsEnumFrame {
}
}
- //if (DEBUG) System.out.println(" not found");
+ // if (DEBUG) System.out.println(" not found");
return SeekStatus.NOT_FOUND;
} else {
// Exact match!
@@ -742,7 +772,7 @@ final class SegmentTermsEnumFrame {
assert ste.termExists;
fillTerm();
- //if (DEBUG) System.out.println(" found!");
+ // if (DEBUG) System.out.println(" found!");
return SeekStatus.FOUND;
}
}
@@ -756,7 +786,7 @@ final class SegmentTermsEnumFrame {
// to the foo* block, but the last term in this block
// was fooz (and, eg, first term in the next block will
// bee fop).
- //if (DEBUG) System.out.println(" block end");
+ // if (DEBUG) System.out.println(" block end");
if (exactOnly) {
fillTerm();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
index c6d1293..4ea4357 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
@@ -16,20 +16,18 @@
*/
package org.apache.lucene.codecs.blocktree;
-
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Locale;
-
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
- * BlockTree statistics for a single field
- * returned by {@link FieldReader#getStats()}.
+ * BlockTree statistics for a single field returned by {@link FieldReader#getStats()}.
+ *
* @lucene.internal
*/
public class Stats {
@@ -45,23 +43,22 @@ public class Stats {
/** The number of normal (non-floor) blocks in the terms file. */
public int nonFloorBlockCount;
- /** The number of floor blocks (meta-blocks larger than the
- * allowed {@code maxItemsPerBlock}) in the terms file. */
+ /**
+ * The number of floor blocks (meta-blocks larger than the allowed {@code maxItemsPerBlock}) in
+ * the terms file.
+ */
public int floorBlockCount;
-
+
/** The number of sub-blocks within the floor blocks. */
public int floorSubBlockCount;
- /** The number of "internal" blocks (that have both
- * terms and sub-blocks). */
+ /** The number of "internal" blocks (that have both terms and sub-blocks). */
public int mixedBlockCount;
- /** The number of "leaf" blocks (blocks that have only
- * terms). */
+ /** The number of "leaf" blocks (blocks that have only terms). */
public int termsOnlyBlockCount;
- /** The number of "internal" blocks that do not contain
- * terms (have only sub-blocks). */
+ /** The number of "internal" blocks that do not contain terms (have only sub-blocks). */
public int subBlocksOnlyBlockCount;
/** Total number of blocks. */
@@ -69,6 +66,7 @@ public class Stats {
/** Number of blocks at each prefix depth. */
public int[] blockCountByPrefixLen = new int[10];
+
private int startBlockCount;
private int endBlockCount;
@@ -76,23 +74,24 @@ public class Stats {
public long totalBlockSuffixBytes;
/**
- * Number of times each compression method has been used.
- * 0 = uncompressed
- * 1 = lowercase_ascii
- * 2 = LZ4
+ * Number of times each compression method has been used. 0 = uncompressed 1 = lowercase_ascii 2 =
+ * LZ4
*/
public final long[] compressionAlgorithms = new long[3];
/** Total number of suffix bytes before compression. */
public long totalUncompressedBlockSuffixBytes;
- /** Total number of bytes used to store term stats (not
- * including what the {@link PostingsReaderBase}
- * stores. */
+ /**
+ * Total number of bytes used to store term stats (not including what the {@link
+ * PostingsReaderBase} stores.
+ */
public long totalBlockStatsBytes;
- /** Total bytes stored by the {@link PostingsReaderBase},
- * plus the other few vInts stored in the frame. */
+ /**
+ * Total bytes stored by the {@link PostingsReaderBase}, plus the other few vInts stored in the
+ * frame.
+ */
public long totalBlockOtherBytes;
/** Segment name. */
@@ -118,7 +117,7 @@ public class Stats {
}
if (blockCountByPrefixLen.length <= frame.prefix) {
- blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
+ blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1 + frame.prefix);
}
blockCountByPrefixLen[frame.prefix]++;
startBlockCount++;
@@ -145,8 +144,10 @@ public class Stats {
throw new IllegalStateException();
}
endBlockCount++;
- final long otherBytes = frame.fpEnd - frame.fp - frame.totalSuffixBytes - frame.statsReader.length();
- assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
+ final long otherBytes =
+ frame.fpEnd - frame.fp - frame.totalSuffixBytes - frame.statsReader.length();
+ assert otherBytes > 0
+ : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
totalBlockOtherBytes += otherBytes;
}
@@ -155,9 +156,24 @@ public class Stats {
}
void finish() {
- assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
- assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount;
- assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
+ assert startBlockCount == endBlockCount
+ : "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
+ assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount
+ : "floorSubBlockCount="
+ + floorSubBlockCount
+ + " nonFloorBlockCount="
+ + nonFloorBlockCount
+ + " totalBlockCount="
+ + totalBlockCount;
+ assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount
+ : "totalBlockCount="
+ + totalBlockCount
+ + " mixedBlockCount="
+ + mixedBlockCount
+ + " subBlocksOnlyBlockCount="
+ + subBlocksOnlyBlockCount
+ + " termsOnlyBlockCount="
+ + termsOnlyBlockCount;
}
@Override
@@ -169,21 +185,38 @@ public class Stats {
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
-
+
out.println(" index FST:");
out.println(" " + indexNumBytes + " bytes");
out.println(" terms:");
out.println(" " + totalTermCount + " terms");
- out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
+ out.println(
+ " "
+ + totalTermBytes
+ + " bytes"
+ + (totalTermCount != 0
+ ? " ("
+ + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes) / totalTermCount)
+ + " bytes/term)"
+ : ""));
out.println(" blocks:");
out.println(" " + totalBlockCount + " blocks");
out.println(" " + termsOnlyBlockCount + " terms-only blocks");
out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks");
out.println(" " + mixedBlockCount + " mixed blocks");
out.println(" " + floorBlockCount + " floor blocks");
- out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
+ out.println(" " + (totalBlockCount - floorSubBlockCount) + " non-floor blocks");
out.println(" " + floorSubBlockCount + " floor sub-blocks");
- out.println(" " + totalUncompressedBlockSuffixBytes + " term suffix bytes before compression" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
+ out.println(
+ " "
+ + totalUncompressedBlockSuffixBytes
+ + " term suffix bytes before compression"
+ + (totalBlockCount != 0
+ ? " ("
+ + String.format(
+ Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes) / totalBlockCount)
+ + " suffix-bytes/block)"
+ : ""));
StringBuilder compressionCounts = new StringBuilder();
for (int code = 0; code < compressionAlgorithms.length; ++code) {
if (compressionAlgorithms[code] == 0) {
@@ -196,14 +229,44 @@ public class Stats {
compressionCounts.append(": ");
compressionCounts.append(compressionAlgorithms[code]);
}
- out.println(" " + totalBlockSuffixBytes + " compressed term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.2f", ((double) totalBlockSuffixBytes)/totalUncompressedBlockSuffixBytes) +
- " compression ratio - compression count by algorithm: " + compressionCounts : "") + ")");
- out.println(" " + totalBlockStatsBytes + " term stats bytes " + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
- out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
+ out.println(
+ " "
+ + totalBlockSuffixBytes
+ + " compressed term suffix bytes"
+ + (totalBlockCount != 0
+ ? " ("
+ + String.format(
+ Locale.ROOT,
+ "%.2f",
+ ((double) totalBlockSuffixBytes) / totalUncompressedBlockSuffixBytes)
+ + " compression ratio - compression count by algorithm: "
+ + compressionCounts
+ : "")
+ + ")");
+ out.println(
+ " "
+ + totalBlockStatsBytes
+ + " term stats bytes "
+ + (totalBlockCount != 0
+ ? " ("
+ + String.format(
+ Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes) / totalBlockCount)
+ + " stats-bytes/block)"
+ : ""));
+ out.println(
+ " "
+ + totalBlockOtherBytes
+ + " other bytes"
+ + (totalBlockCount != 0
+ ? " ("
+ + String.format(
+ Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes) / totalBlockCount)
+ + " other-bytes/block)"
+ : ""));
if (totalBlockCount != 0) {
out.println(" by prefix length:");
int total = 0;
- for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) {
+ for (int prefix = 0; prefix < blockCountByPrefixLen.length; prefix++) {
final int blockCount = blockCountByPrefixLen[prefix];
total += blockCount;
if (blockCount != 0) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/package-info.java
index 9cdbb02..dec1f2c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/package-info.java
@@ -17,18 +17,12 @@
/**
* BlockTree terms dictionary.
- *
- * <p>
- * This terms dictionary organizes all terms into blocks according to
- * shared prefix, such that each block has enough terms, and then stores
- * the prefix trie in memory as an FST as the index structure. It allows
- * you to plug in your own {@link
- * org.apache.lucene.codecs.PostingsWriterBase} to implement the
- * postings.
- * </p>
- *
- * <p>See {@link org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter}
- * for the file format.
- * </p>
+ *
+ * <p>This terms dictionary organizes all terms into blocks according to shared prefix, such that
+ * each block has enough terms, and then stores the prefix trie in memory as an FST as the index
+ * structure. It allows you to plug in your own {@link org.apache.lucene.codecs.PostingsWriterBase}
+ * to implement the postings.
+ *
+ * <p>See {@link org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter} for the file format.
*/
package org.apache.lucene.codecs.blocktree;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
index 5773c16..bc63504 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
@@ -16,9 +16,7 @@
*/
package org.apache.lucene.codecs.compressing;
-
import java.io.IOException;
-
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
@@ -30,17 +28,16 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
-
/**
- * A {@link StoredFieldsFormat} that compresses documents in chunks in
- * order to improve the compression ratio.
- * <p>
- * For a chunk size of <var>chunkSize</var> bytes, this {@link StoredFieldsFormat}
- * does not support documents larger than (<code>2<sup>31</sup> - chunkSize</code>)
- * bytes.
- * <p>
- * For optimal performance, you should use a {@link MergePolicy} that returns
- * segments that have the biggest byte size first.
+ * A {@link StoredFieldsFormat} that compresses documents in chunks in order to improve the
+ * compression ratio.
+ *
+ * <p>For a chunk size of <var>chunkSize</var> bytes, this {@link StoredFieldsFormat} does not
+ * support documents larger than (<code>2<sup>31</sup> - chunkSize</code>) bytes.
+ *
+ * <p>For optimal performance, you should use a {@link MergePolicy} that returns segments that have
+ * the biggest byte size first.
+ *
* @lucene.experimental
*/
public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
@@ -53,42 +50,42 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
private final int blockShift;
/**
- * Create a new {@link CompressingStoredFieldsFormat} with an empty segment
- * suffix.
- *
- * @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(String, String, CompressionMode, int, int, int)
+ * Create a new {@link CompressingStoredFieldsFormat} with an empty segment suffix.
+ *
+ * @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(String, String,
+ * CompressionMode, int, int, int)
*/
- public CompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockShift) {
+ public CompressingStoredFieldsFormat(
+ String formatName,
+ CompressionMode compressionMode,
+ int chunkSize,
+ int maxDocsPerChunk,
+ int blockShift) {
this(formatName, "", compressionMode, chunkSize, maxDocsPerChunk, blockShift);
}
-
+
/**
* Create a new {@link CompressingStoredFieldsFormat}.
- * <p>
- * <code>formatName</code> is the name of the format. This name will be used
- * in the file formats to perform
- * {@link CodecUtil#checkIndexHeader codec header checks}.
- * <p>
- * <code>segmentSuffix</code> is the segment suffix. This suffix is added to
- * the result file name only if it's not the empty string.
- * <p>
- * The <code>compressionMode</code> parameter allows you to choose between
- * compression algorithms that have various compression and decompression
- * speeds so that you can pick the one that best fits your indexing and
- * searching throughput. You should never instantiate two
- * {@link CompressingStoredFieldsFormat}s that have the same name but
- * different {@link CompressionMode}s.
- * <p>
- * <code>chunkSize</code> is the minimum byte size of a chunk of documents.
- * A value of <code>1</code> can make sense if there is redundancy across
- * fields.
- * <code>maxDocsPerChunk</code> is an upperbound on how many docs may be stored
- * in a single chunk. This is to bound the cpu costs for highly compressible data.
- * <p>
- * Higher values of <code>chunkSize</code> should improve the compression
- * ratio but will require more memory at indexing time and might make document
- * loading a little slower (depending on the size of your OS cache compared
- * to the size of your index).
+ *
+ * <p><code>formatName</code> is the name of the format. This name will be used in the file
+ * formats to perform {@link CodecUtil#checkIndexHeader codec header checks}.
+ *
+ * <p><code>segmentSuffix</code> is the segment suffix. This suffix is added to the result file
+ * name only if it's not the empty string.
+ *
+ * <p>The <code>compressionMode</code> parameter allows you to choose between compression
+ * algorithms that have various compression and decompression speeds so that you can pick the one
+ * that best fits your indexing and searching throughput. You should never instantiate two {@link
+ * CompressingStoredFieldsFormat}s that have the same name but different {@link CompressionMode}s.
+ *
+ * <p><code>chunkSize</code> is the minimum byte size of a chunk of documents. A value of <code>1
+ * </code> can make sense if there is redundancy across fields. <code>maxDocsPerChunk</code> is an
+ * upperbound on how many docs may be stored in a single chunk. This is to bound the cpu costs for
+ * highly compressible data.
+ *
+ * <p>Higher values of <code>chunkSize</code> should improve the compression ratio but will
+ * require more memory at indexing time and might make document loading a little slower (depending
+ * on the size of your OS cache compared to the size of your index).
*
* @param formatName the name of the {@link StoredFieldsFormat}
* @param compressionMode the {@link CompressionMode} to use
@@ -97,8 +94,13 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
* @param blockShift the log in base 2 of number of chunks to store in an index block
* @see CompressionMode
*/
- public CompressingStoredFieldsFormat(String formatName, String segmentSuffix,
- CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockShift) {
+ public CompressingStoredFieldsFormat(
+ String formatName,
+ String segmentSuffix,
+ CompressionMode compressionMode,
+ int chunkSize,
+ int maxDocsPerChunk,
+ int blockShift) {
this.formatName = formatName;
this.segmentSuffix = segmentSuffix;
this.compressionMode = compressionMode;
@@ -110,31 +112,52 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
throw new IllegalArgumentException("maxDocsPerChunk must be >= 1");
}
this.maxDocsPerChunk = maxDocsPerChunk;
- if (blockShift < DirectMonotonicWriter.MIN_BLOCK_SHIFT || blockShift > DirectMonotonicWriter.MAX_BLOCK_SHIFT) {
- throw new IllegalArgumentException("blockSize must be in " + DirectMonotonicWriter.MIN_BLOCK_SHIFT + "-" +
- DirectMonotonicWriter.MAX_BLOCK_SHIFT + ", got " + blockShift);
+ if (blockShift < DirectMonotonicWriter.MIN_BLOCK_SHIFT
+ || blockShift > DirectMonotonicWriter.MAX_BLOCK_SHIFT) {
+ throw new IllegalArgumentException(
+ "blockSize must be in "
+ + DirectMonotonicWriter.MIN_BLOCK_SHIFT
+ + "-"
+ + DirectMonotonicWriter.MAX_BLOCK_SHIFT
+ + ", got "
+ + blockShift);
}
this.blockShift = blockShift;
}
@Override
- public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
- FieldInfos fn, IOContext context) throws IOException {
- return new CompressingStoredFieldsReader(directory, si, segmentSuffix, fn,
- context, formatName, compressionMode);
+ public StoredFieldsReader fieldsReader(
+ Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
+ return new CompressingStoredFieldsReader(
+ directory, si, segmentSuffix, fn, context, formatName, compressionMode);
}
@Override
- public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
- IOContext context) throws IOException {
- return new CompressingStoredFieldsWriter(directory, si, segmentSuffix, context,
- formatName, compressionMode, chunkSize, maxDocsPerChunk, blockShift);
+ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context)
+ throws IOException {
+ return new CompressingStoredFieldsWriter(
+ directory,
+ si,
+ segmentSuffix,
+ context,
+ formatName,
+ compressionMode,
+ chunkSize,
+ maxDocsPerChunk,
+ blockShift);
}
@Override
public String toString() {
- return getClass().getSimpleName() + "(compressionMode=" + compressionMode
- + ", chunkSize=" + chunkSize + ", maxDocsPerChunk=" + maxDocsPerChunk + ", blockShift=" + blockShift + ")";
+ return getClass().getSimpleName()
+ + "(compressionMode="
+ + compressionMode
+ + ", chunkSize="
+ + chunkSize
+ + ", maxDocsPerChunk="
+ + maxDocsPerChunk
+ + ", blockShift="
+ + blockShift
+ + ")";
}
-
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
index 1258eb7..850a023 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.codecs.compressing;
-
... 18272 lines suppressed ...