You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2021/04/19 12:10:56 UTC
[lucene] branch main updated: LUCENE-9907: Remove unused methods in
PackedInts (#94)
This is an automated email from the ASF dual-hosted git repository.
ivera pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 2a7951c LUCENE-9907: Remove unused methods in PackedInts (#94)
2a7951c is described below
commit 2a7951cd30fca245fe949a9f1903a7ef6b815842
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Mon Apr 19 14:10:49 2021 +0200
LUCENE-9907: Remove unused methods in PackedInts (#94)
---
.../org/apache/lucene/util/packed/PackedInts.java | 147 +--------------------
.../apache/lucene/util/packed/TestPackedInts.java | 93 +++++--------
2 files changed, 38 insertions(+), 202 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
index 77a45e6..f02deb2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
@@ -18,10 +18,8 @@ package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.RamUsageEstimator;
@@ -506,13 +504,13 @@ public class PackedInts {
}
/**
- * Save this mutable into <code>out</code>. Instantiating a reader from the generated data will
- * return a reader with the same number of bits per value.
+ * Save this mutable into <code>out</code>. This method does not write any metadata to the
+ * stream, meaning that it is your responsibility to store it somewhere else in order to be able
+ * to recover data from the stream later on.
*/
public void save(DataOutput out) throws IOException {
Writer writer =
getWriterNoHeader(out, getFormat(), size(), getBitsPerValue(), DEFAULT_BUFFER_SIZE);
- writer.writeHeader();
for (int i = 0; i < size(); ++i) {
writer.add(get(i));
}
@@ -632,14 +630,6 @@ public class PackedInts {
this.bitsPerValue = bitsPerValue;
}
- void writeHeader() throws IOException {
- assert valueCount != -1;
- CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
- out.writeVInt(bitsPerValue);
- out.writeVInt(valueCount);
- out.writeVInt(getFormat().getId());
- }
-
/** The format used to serialize values. */
protected abstract PackedInts.Format getFormat();
@@ -717,24 +707,6 @@ public class PackedInts {
}
/**
- * Restore a {@link Reader} from a stream.
- *
- * @param in the stream to read data from
- * @return a Reader
- * @throws IOException If there is a low-level I/O error
- * @lucene.internal
- */
- public static Reader getReader(DataInput in) throws IOException {
- final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
- final int bitsPerValue = in.readVInt();
- assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
- final int valueCount = in.readVInt();
- final Format format = Format.byId(in.readVInt());
-
- return getReaderNoHeader(in, format, version, valueCount, bitsPerValue);
- }
-
- /**
* Expert: Restore a {@link ReaderIterator} from a stream without reading metadata at the
* beginning of the stream. This method is useful to restore data from streams which have been
* created using {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
@@ -757,76 +729,6 @@ public class PackedInts {
}
/**
- * Retrieve PackedInts as a {@link ReaderIterator}
- *
- * @param in positioned at the beginning of a stored packed int structure.
- * @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up
- * iteration)
- * @return an iterator to access the values
- * @throws IOException if the structure could not be retrieved.
- * @lucene.internal
- */
- public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
- final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
- final int bitsPerValue = in.readVInt();
- assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
- final int valueCount = in.readVInt();
- final Format format = Format.byId(in.readVInt());
- return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
- }
-
- /**
- * Expert: Construct a direct {@link Reader} from a stream without reading metadata at the
- * beginning of the stream. This method is useful to restore data from streams which have been
- * created using {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
- *
- * <p>The returned reader will have very little memory overhead, but every call to {@link
- * Reader#get(int)} is likely to perform a disk seek.
- *
- * @param in the stream to read data from
- * @param format the format used to serialize
- * @param version the version used to serialize the data
- * @param valueCount how many values the stream holds
- * @param bitsPerValue the number of bits per value
- * @return a direct Reader
- * @lucene.internal
- */
- public static Reader getDirectReaderNoHeader(
- final IndexInput in, Format format, int version, int valueCount, int bitsPerValue) {
- checkVersion(version);
- switch (format) {
- case PACKED:
- return new DirectPackedReader(bitsPerValue, valueCount, in);
- case PACKED_SINGLE_BLOCK:
- return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
- default:
- throw new AssertionError("Unknown format: " + format);
- }
- }
-
- /**
- * Construct a direct {@link Reader} from an {@link IndexInput}. This method is useful to restore
- * data from streams which have been created using {@link PackedInts#getWriter(DataOutput, int,
- * int, float)}.
- *
- * <p>The returned reader will have very little memory overhead, but every call to {@link
- * Reader#get(int)} is likely to perform a disk seek.
- *
- * @param in the stream to read data from
- * @return a direct Reader
- * @throws IOException If there is a low-level I/O error
- * @lucene.internal
- */
- public static Reader getDirectReader(IndexInput in) throws IOException {
- final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
- final int bitsPerValue = in.readVInt();
- assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
- final int valueCount = in.readVInt();
- final Format format = Format.byId(in.readVInt());
- return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue);
- }
-
- /**
* Create a packed integer array with the given amount of values initialized to 0. the valueCount
* and the bitsPerValue cannot be changed after creation. All Mutables known by this factory are
* kept fully in RAM.
@@ -914,49 +816,6 @@ public class PackedInts {
}
/**
- * Create a packed integer array writer for the given output, format, value count, and number of
- * bits per value.
- *
- * <p>The resulting stream will be long-aligned. This means that depending on the format which is
- * used under the hoods, up to 63 bits will be wasted. An easy way to make sure that no space is
- * lost is to always use a <code>valueCount</code> that is a multiple of 64.
- *
- * <p>This method writes metadata to the stream, so that the resulting stream is sufficient to
- * restore a {@link Reader} from it. You don't need to track <code>valueCount</code> or <code>
- * bitsPerValue</code> by yourself. In case this is a problem, you should probably look at {@link
- * #getWriterNoHeader(DataOutput, Format, int, int, int)}.
- *
- * <p>The <code>acceptableOverheadRatio</code> parameter controls how readers that will be
- * restored from this stream trade space for speed by selecting a faster but potentially less
- * memory-efficient implementation. An <code>acceptableOverheadRatio</code> of {@link
- * PackedInts#COMPACT} will make sure that the most memory-efficient implementation is selected
- * whereas {@link PackedInts#FASTEST} will make sure that the fastest implementation is selected.
- * In case you are only interested in reading this stream sequentially later on, you should
- * probably use {@link PackedInts#COMPACT}.
- *
- * @param out the data output
- * @param valueCount the number of values
- * @param bitsPerValue the number of bits per value
- * @param acceptableOverheadRatio an acceptable overhead ratio per value
- * @return a Writer
- * @throws IOException If there is a low-level I/O error
- * @lucene.internal
- */
- public static Writer getWriter(
- DataOutput out, int valueCount, int bitsPerValue, float acceptableOverheadRatio)
- throws IOException {
- assert valueCount >= 0;
-
- final FormatAndBits formatAndBits =
- fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
- final Writer writer =
- getWriterNoHeader(
- out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE);
- writer.writeHeader();
- return writer;
- }
-
- /**
* Returns how many bits are required to hold values up to and including maxValue NOTE: This
* method returns at least 1.
*
diff --git a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
index baff6ae..e16288d 100644
--- a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
+++ b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
@@ -25,7 +25,6 @@ import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Random;
-import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.DataInput;
@@ -103,14 +102,9 @@ public class TestPackedInts extends LuceneTestCase {
final Directory d = newDirectory();
IndexOutput out = d.createOutput("out.bin", newIOContext(random()));
- final float acceptableOverhead;
- if (iter == 0) {
- // have the first iteration go through exact nbits
- acceptableOverhead = 0.0f;
- } else {
- acceptableOverhead = random().nextFloat();
- }
- PackedInts.Writer w = PackedInts.getWriter(out, valueCount, nbits, acceptableOverhead);
+ final int mem = random().nextInt(2 * PackedInts.DEFAULT_BUFFER_SIZE);
+ PackedInts.Writer w =
+ PackedInts.getWriterNoHeader(out, PackedInts.Format.PACKED, valueCount, nbits, mem);
final long startFp = out.getFilePointer();
final int actualValueCount =
@@ -133,24 +127,11 @@ public class TestPackedInts extends LuceneTestCase {
w.getFormat().byteCount(PackedInts.VERSION_CURRENT, valueCount, w.bitsPerValue);
assertEquals(bytes, fp - startFp);
- { // test header
- IndexInput in = d.openInput("out.bin", newIOContext(random()));
- // header = codec header | bitsPerValue | valueCount | format
- CodecUtil.checkHeader(
- in,
- PackedInts.CODEC_NAME,
- PackedInts.VERSION_START,
- PackedInts.VERSION_CURRENT); // codec header
- assertEquals(w.bitsPerValue, in.readVInt());
- assertEquals(valueCount, in.readVInt());
- assertEquals(w.getFormat().getId(), in.readVInt());
- assertEquals(startFp, in.getFilePointer());
- in.close();
- }
-
{ // test reader
IndexInput in = d.openInput("out.bin", newIOContext(random()));
- PackedInts.Reader r = PackedInts.getReader(in);
+ PackedInts.Reader r =
+ PackedInts.getReaderNoHeader(
+ in, PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, valueCount, nbits);
assertEquals(fp, in.getFilePointer());
for (int i = 0; i < valueCount; i++) {
assertEquals(
@@ -177,7 +158,14 @@ public class TestPackedInts extends LuceneTestCase {
{ // test reader iterator next
IndexInput in = d.openInput("out.bin", newIOContext(random()));
- PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
+ PackedInts.ReaderIterator r =
+ PackedInts.getReaderIteratorNoHeader(
+ in,
+ PackedInts.Format.PACKED,
+ PackedInts.VERSION_CURRENT,
+ valueCount,
+ nbits,
+ bufferSize);
for (int i = 0; i < valueCount; i++) {
assertEquals(
"index="
@@ -198,7 +186,14 @@ public class TestPackedInts extends LuceneTestCase {
{ // test reader iterator bulk next
IndexInput in = d.openInput("out.bin", newIOContext(random()));
- PackedInts.ReaderIterator r = PackedInts.getReaderIterator(in, bufferSize);
+ PackedInts.ReaderIterator r =
+ PackedInts.getReaderIteratorNoHeader(
+ in,
+ PackedInts.Format.PACKED,
+ PackedInts.VERSION_CURRENT,
+ valueCount,
+ nbits,
+ bufferSize);
int i = 0;
while (i < valueCount) {
final int count = TestUtil.nextInt(random(), 1, 95);
@@ -221,27 +216,6 @@ public class TestPackedInts extends LuceneTestCase {
assertEquals(fp, in.getFilePointer());
in.close();
}
-
- { // test direct reader get
- IndexInput in = d.openInput("out.bin", newIOContext(random()));
- PackedInts.Reader intsEnum = PackedInts.getDirectReader(in);
- for (int i = 0; i < valueCount; i++) {
- final String msg =
- "index="
- + i
- + " valueCount="
- + valueCount
- + " nbits="
- + nbits
- + " for "
- + intsEnum.getClass().getSimpleName();
- final int index = random().nextInt(valueCount);
- assertEquals(msg, values[index], intsEnum.get(index));
- }
- intsEnum.get(intsEnum.size() - 1);
- assertEquals(fp, in.getFilePointer());
- in.close();
- }
d.close();
}
}
@@ -288,13 +262,6 @@ public class TestPackedInts extends LuceneTestCase {
}
assertEquals(msg, byteCount, in.getFilePointer());
- // test direct reader
- in.seek(0L);
- final PackedInts.Reader directReader =
- PackedInts.getDirectReaderNoHeader(in, format, version, valueCount, bpv);
- directReader.get(valueCount - 1);
- assertEquals(msg, byteCount, in.getFilePointer());
-
// test reader
in.seek(0L);
PackedInts.getReaderNoHeader(in, format, version, valueCount, bpv);
@@ -480,7 +447,9 @@ public class TestPackedInts extends LuceneTestCase {
for (int bitsPerValue = 1; bitsPerValue <= 64; ++bitsPerValue) {
Directory dir = newDirectory();
IndexOutput out = dir.createOutput("out", newIOContext(random()));
- PackedInts.Writer w = PackedInts.getWriter(out, 1, bitsPerValue, PackedInts.DEFAULT);
+ PackedInts.Writer w =
+ PackedInts.getWriterNoHeader(
+ out, PackedInts.Format.PACKED, 1, bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
long value = 17L & PackedInts.maxValue(bitsPerValue);
w.add(value);
w.finish();
@@ -488,7 +457,9 @@ public class TestPackedInts extends LuceneTestCase {
out.close();
IndexInput in = dir.openInput("out", newIOContext(random()));
- Reader reader = PackedInts.getReader(in);
+ Reader reader =
+ PackedInts.getReaderNoHeader(
+ in, PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, 1, bitsPerValue);
String msg = "Impl=" + w.getClass().getSimpleName() + ", bitsPerValue=" + bitsPerValue;
assertEquals(msg, 1, reader.size());
assertEquals(msg, value, reader.get(0));
@@ -910,7 +881,13 @@ public class TestPackedInts extends LuceneTestCase {
out.close();
IndexInput in = directory.openInput("packed-ints.bin", IOContext.DEFAULT);
- PackedInts.Reader reader = PackedInts.getReader(in);
+ PackedInts.Reader reader =
+ PackedInts.getReaderNoHeader(
+ in,
+ mutable.getFormat(),
+ PackedInts.VERSION_CURRENT,
+ mutable.size(),
+ mutable.getBitsPerValue());
assertEquals(valueCount, reader.size());
if (mutable instanceof Packed64SingleBlock) {
// make sure that we used the right format so that the reader has