You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/20 14:15:27 UTC
lucene-solr:master: LUCENE-7121: don't write ord for single-valued
points, saving 4 bytes per point
Repository: lucene-solr
Updated Branches:
refs/heads/master 3a4e1d114 -> d39294009
LUCENE-7121: don't write ord for single-valued points, saving 4 bytes per point
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d3929400
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d3929400
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d3929400
Branch: refs/heads/master
Commit: d392940092187ba88be0d2b0882c23800f44a74e
Parents: 3a4e1d1
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Mar 20 09:16:43 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Mar 20 09:16:43 2016 -0400
----------------------------------------------------------------------
.../simpletext/SimpleTextPointsWriter.java | 5 +-
.../org/apache/lucene/codecs/PointsWriter.java | 5 +-
.../codecs/lucene60/Lucene60PointsWriter.java | 11 +++-
.../apache/lucene/index/PointValuesWriter.java | 8 ++-
.../org/apache/lucene/util/bkd/BKDWriter.java | 65 +++++++++++++-------
.../apache/lucene/util/bkd/HeapPointReader.java | 8 ++-
.../apache/lucene/util/bkd/HeapPointWriter.java | 53 ++++++++++------
.../lucene/util/bkd/OfflinePointReader.java | 39 ++++++++----
.../lucene/util/bkd/OfflinePointWriter.java | 22 ++++---
.../apache/lucene/util/bkd/Test2BBKDPoints.java | 19 +-----
.../org/apache/lucene/util/bkd/TestBKD.java | 18 +++---
.../lucene/index/BasePointsFormatTestCase.java | 2 +-
12 files changed, 158 insertions(+), 97 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
index 33af33e..e54e20a 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
@@ -69,6 +69,8 @@ class SimpleTextPointsWriter extends PointsWriter {
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
+ boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
+
// We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk:
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
@@ -77,7 +79,8 @@ class SimpleTextPointsWriter extends PointsWriter {
fieldInfo.getPointNumBytes(),
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
- values.size(fieldInfo.name)) {
+ values.size(fieldInfo.name),
+ singleValuePerDoc) {
@Override
protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
index 000d713..43b4416 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
@@ -42,16 +42,19 @@ public abstract class PointsWriter implements Closeable {
* a faster but more complex implementation. */
protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException {
long maxPointCount = 0;
+ int docCount = 0;
for (int i=0;i<mergeState.pointsReaders.length;i++) {
PointsReader pointsReader = mergeState.pointsReaders[i];
if (pointsReader != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null) {
maxPointCount += pointsReader.size(fieldInfo.name);
+ docCount += pointsReader.getDocCount(fieldInfo.name);
}
}
}
final long finalMaxPointCount = maxPointCount;
+ final int finalDocCount = docCount;
writeField(fieldInfo,
new PointsReader() {
@Override
@@ -141,7 +144,7 @@ public abstract class PointsWriter implements Closeable {
@Override
public int getDocCount(String fieldName) {
- throw new UnsupportedOperationException();
+ return finalDocCount;
}
});
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
index 1148fb0..7bb1faf 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
@@ -82,6 +82,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
+ boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
+
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
@@ -89,7 +91,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
maxMBSortInHeap,
- values.size(fieldInfo.name))) {
+ values.size(fieldInfo.name),
+ singleValuePerDoc)) {
values.intersect(fieldInfo.name, new IntersectVisitor() {
@Override
@@ -133,6 +136,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
if (fieldInfo.getPointDimensionCount() != 0) {
if (fieldInfo.getPointDimensionCount() == 1) {
+ boolean singleValuePerDoc = true;
+
// Worst case total maximum size (if none of the points are deleted):
long totMaxSize = 0;
for(int i=0;i<mergeState.pointsReaders.length;i++) {
@@ -142,6 +147,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null) {
totMaxSize += reader.size(fieldInfo.name);
+ singleValuePerDoc &= reader.size(fieldInfo.name) == reader.getDocCount(fieldInfo.name);
}
}
}
@@ -157,7 +163,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
maxMBSortInHeap,
- totMaxSize)) {
+ totMaxSize,
+ singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>();
List<MergeState.DocMap> docMaps = new ArrayList<>();
List<Integer> docIDBases = new ArrayList<>();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
index 694ccf6..f259c5b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
@@ -32,6 +32,8 @@ class PointValuesWriter {
private final Counter iwBytesUsed;
private int[] docIDs;
private int numPoints;
+ private int numDocs;
+ private int lastDocID = -1;
private final byte[] packedValue;
public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
@@ -57,6 +59,10 @@ class PointValuesWriter {
}
bytes.append(value);
docIDs[numPoints] = docID;
+ if (docID != lastDocID) {
+ numDocs++;
+ lastDocID = docID;
+ }
numPoints++;
}
@@ -116,7 +122,7 @@ class PointValuesWriter {
@Override
public int getDocCount(String fieldName) {
- throw new UnsupportedOperationException();
+ return numDocs;
}
});
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 796f611..488fa43 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -134,11 +134,16 @@ public class BKDWriter implements Closeable {
/** An upper bound on how many points the caller will add (includes deletions) */
private final long totalPointCount;
- public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, long totalPointCount) throws IOException {
- this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP, totalPointCount);
+ /** True if every document has at most one value. We specialize this case by not bothering to store the ord since it's redundant with docID. */
+ private final boolean singleValuePerDoc;
+
+ private final int maxDoc;
+
+ public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, long totalPointCount, boolean singleValuePerDoc) throws IOException {
+ this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP, totalPointCount, singleValuePerDoc);
}
- public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) throws IOException {
+ public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount, boolean singleValuePerDoc) throws IOException {
verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount);
// We use tracking dir to deal with removing files on exception, so each place that
// creates temp files doesn't need crazy try/finally/sucess logic:
@@ -148,6 +153,7 @@ public class BKDWriter implements Closeable {
this.numDims = numDims;
this.bytesPerDim = bytesPerDim;
this.totalPointCount = totalPointCount;
+ this.maxDoc = maxDoc;
docsSeen = new FixedBitSet(maxDoc);
packedBytesLength = numDims * bytesPerDim;
@@ -162,9 +168,14 @@ public class BKDWriter implements Closeable {
// If we may have more than 1+Integer.MAX_VALUE values, then we must encode ords with long (8 bytes), else we can use int (4 bytes).
longOrds = totalPointCount > Integer.MAX_VALUE;
+ this.singleValuePerDoc = singleValuePerDoc;
// dimensional values (numDims * bytesPerDim) + ord (int or long) + docID (int)
- if (longOrds) {
+ if (singleValuePerDoc) {
+ // Lucene only supports up to 2.1 docs, so we better not need longOrds in this case:
+ assert longOrds == false;
+ bytesPerDoc = packedBytesLength + Integer.BYTES;
+ } else if (longOrds) {
bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES;
} else {
bytesPerDoc = packedBytesLength + Integer.BYTES + Integer.BYTES;
@@ -187,7 +198,7 @@ public class BKDWriter implements Closeable {
}
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
- heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds);
+ heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds, singleValuePerDoc);
this.maxMBSortInHeap = maxMBSortInHeap;
}
@@ -216,7 +227,7 @@ public class BKDWriter implements Closeable {
private void spillToOffline() throws IOException {
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
- offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill");
+ offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill", singleValuePerDoc);
tempInput = offlinePointWriter.out;
PointReader reader = heapPointWriter.getReader(0, pointCount);
for(int i=0;i<pointCount;i++) {
@@ -622,14 +633,16 @@ public class BKDWriter implements Closeable {
writer.docIDs[i] = writer.docIDs[j];
writer.docIDs[j] = docID;
- if (longOrds) {
- long ord = writer.ordsLong[i];
- writer.ordsLong[i] = writer.ordsLong[j];
- writer.ordsLong[j] = ord;
- } else {
- int ord = writer.ords[i];
- writer.ords[i] = writer.ords[j];
- writer.ords[j] = ord;
+ if (singleValuePerDoc == false) {
+ if (longOrds) {
+ long ord = writer.ordsLong[i];
+ writer.ordsLong[i] = writer.ordsLong[j];
+ writer.ordsLong[j] = ord;
+ } else {
+ int ord = writer.ords[i];
+ writer.ords[i] = writer.ords[j];
+ writer.ords[j] = ord;
+ }
}
byte[] blockI = writer.blocks.get(i / writer.valuesPerBlock);
@@ -681,7 +694,7 @@ public class BKDWriter implements Closeable {
sorted = heapPointWriter;
} else {
// Subsequent dims need a private copy
- sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds);
+ sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds, singleValuePerDoc);
sorted.copyFrom(heapPointWriter);
}
@@ -713,7 +726,9 @@ public class BKDWriter implements Closeable {
// Tie-break by docID:
int offset;
- if (longOrds) {
+ if (singleValuePerDoc) {
+ offset = 0;
+ } else if (longOrds) {
offset = Long.BYTES;
} else {
offset = Integer.BYTES;
@@ -769,7 +784,7 @@ public class BKDWriter implements Closeable {
assert lastWriter[0] != null;
- return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount, longOrds);
+ return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount, longOrds, singleValuePerDoc);
}
}
@@ -800,7 +815,11 @@ public class BKDWriter implements Closeable {
LongBitSet ordBitSet;
if (numDims > 1) {
- ordBitSet = new LongBitSet(pointCount);
+ if (singleValuePerDoc) {
+ ordBitSet = new LongBitSet(maxDoc);
+ } else {
+ ordBitSet = new LongBitSet(pointCount);
+ }
} else {
ordBitSet = null;
}
@@ -867,7 +886,11 @@ public class BKDWriter implements Closeable {
success = true;
} finally {
if (success == false) {
+ if (tempInput != null) {
+ IOUtils.closeWhileHandlingException(tempInput);
+ }
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempDir.getCreatedFiles());
+ tempInput = null;
}
}
@@ -1049,7 +1072,7 @@ public class BKDWriter implements Closeable {
private PathSlice switchToHeap(PathSlice source) throws IOException {
int count = Math.toIntExact(source.count);
try (
- PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds);
+ PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds, singleValuePerDoc);
PointReader reader = source.writer.getReader(source.start, source.count);
) {
for(int i=0;i<count;i++) {
@@ -1252,9 +1275,9 @@ public class BKDWriter implements Closeable {
PointWriter getPointWriter(long count, String desc) throws IOException {
if (count <= maxPointsSortInHeap) {
int size = Math.toIntExact(count);
- return new HeapPointWriter(size, size, packedBytesLength, longOrds);
+ return new HeapPointWriter(size, size, packedBytesLength, longOrds, singleValuePerDoc);
} else {
- return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc);
+ return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc, singleValuePerDoc);
}
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
index cd9152e..0cd4bd2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
@@ -28,10 +28,12 @@ final class HeapPointReader extends PointReader {
final int[] docIDs;
final int end;
final byte[] scratch;
+ final boolean singleValuePerDoc;
- HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end) {
+ HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end, boolean singleValuePerDoc) {
this.blocks = blocks;
this.valuesPerBlock = valuesPerBlock;
+ this.singleValuePerDoc = singleValuePerDoc;
this.ords = ords;
this.ordsLong = ordsLong;
this.docIDs = docIDs;
@@ -75,7 +77,9 @@ final class HeapPointReader extends PointReader {
@Override
public long ord() {
- if (ordsLong != null) {
+ if (singleValuePerDoc) {
+ return docIDs[curRead];
+ } else if (ordsLong != null) {
return ordsLong[curRead];
} else {
return ords[curRead];
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
index a4aedd0..1bcf836 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
@@ -31,17 +31,24 @@ final class HeapPointWriter implements PointWriter {
final int maxSize;
final int valuesPerBlock;
final int packedBytesLength;
+ final boolean singleValuePerDoc;
// NOTE: can't use ByteBlockPool because we need random-write access when sorting in heap
final List<byte[]> blocks = new ArrayList<>();
- public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds) {
+ public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds, boolean singleValuePerDoc) {
docIDs = new int[initSize];
this.maxSize = maxSize;
this.packedBytesLength = packedBytesLength;
- if (longOrds) {
- this.ordsLong = new long[initSize];
+ this.singleValuePerDoc = singleValuePerDoc;
+ if (singleValuePerDoc) {
+ this.ordsLong = null;
+ this.ords = null;
} else {
- this.ords = new int[initSize];
+ if (longOrds) {
+ this.ordsLong = new long[initSize];
+ } else {
+ this.ords = new int[initSize];
+ }
}
// 4K per page, unless each value is > 4K:
valuesPerBlock = Math.max(1, 4096/packedBytesLength);
@@ -52,12 +59,14 @@ final class HeapPointWriter implements PointWriter {
throw new IllegalStateException("docIDs.length=" + docIDs.length + " other.nextWrite=" + other.nextWrite);
}
System.arraycopy(other.docIDs, 0, docIDs, 0, other.nextWrite);
- if (other.ords != null) {
- assert this.ords != null;
- System.arraycopy(other.ords, 0, ords, 0, other.nextWrite);
- } else {
- assert this.ordsLong != null;
- System.arraycopy(other.ordsLong, 0, ordsLong, 0, other.nextWrite);
+ if (singleValuePerDoc == false) {
+ if (other.ords != null) {
+ assert this.ords != null;
+ System.arraycopy(other.ords, 0, ords, 0, other.nextWrite);
+ } else {
+ assert this.ordsLong != null;
+ System.arraycopy(other.ordsLong, 0, ordsLong, 0, other.nextWrite);
+ }
}
for(byte[] block : other.blocks) {
@@ -117,18 +126,22 @@ final class HeapPointWriter implements PointWriter {
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, Integer.BYTES));
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
docIDs = growExact(docIDs, nextSize);
- if (ordsLong != null) {
- ordsLong = growExact(ordsLong, nextSize);
- } else {
- ords = growExact(ords, nextSize);
+ if (singleValuePerDoc == false) {
+ if (ordsLong != null) {
+ ordsLong = growExact(ordsLong, nextSize);
+ } else {
+ ords = growExact(ords, nextSize);
+ }
}
}
writePackedValue(nextWrite, packedValue);
- if (ordsLong != null) {
- ordsLong[nextWrite] = ord;
- } else {
- assert ord <= Integer.MAX_VALUE;
- ords[nextWrite] = (int) ord;
+ if (singleValuePerDoc == false) {
+ if (ordsLong != null) {
+ ordsLong[nextWrite] = ord;
+ } else {
+ assert ord <= Integer.MAX_VALUE;
+ ords[nextWrite] = (int) ord;
+ }
}
docIDs[nextWrite] = docID;
nextWrite++;
@@ -137,7 +150,7 @@ final class HeapPointWriter implements PointWriter {
@Override
public PointReader getReader(long start, long length) {
assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
- return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, nextWrite);
+ return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, nextWrite, singleValuePerDoc);
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 15274fb..18afda4 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -32,18 +32,23 @@ final class OfflinePointReader extends PointReader {
long countLeft;
final IndexInput in;
private final byte[] packedValue;
+ final boolean singleValuePerDoc;
private long ord;
private int docID;
// true if ords are written as long (8 bytes), else 4 bytes
private boolean longOrds;
private boolean checked;
- OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length, boolean longOrds) throws IOException {
+ OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
+ boolean longOrds, boolean singleValuePerDoc) throws IOException {
+ this.singleValuePerDoc = singleValuePerDoc;
int bytesPerDoc = packedBytesLength + Integer.BYTES;
- if (longOrds) {
- bytesPerDoc += Long.BYTES;
- } else {
- bytesPerDoc += Integer.BYTES;
+ if (singleValuePerDoc == false) {
+ if (longOrds) {
+ bytesPerDoc += Long.BYTES;
+ } else {
+ bytesPerDoc += Integer.BYTES;
+ }
}
if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
@@ -84,12 +89,16 @@ final class OfflinePointReader extends PointReader {
assert countLeft == -1;
return false;
}
- if (longOrds) {
- ord = in.readLong();
+ if (singleValuePerDoc == false) {
+ if (longOrds) {
+ ord = in.readLong();
+ } else {
+ ord = in.readInt();
+ }
+ docID = in.readInt();
} else {
- ord = in.readInt();
+ ord = docID = in.readInt();
}
- docID = in.readInt();
return true;
}
@@ -135,10 +144,12 @@ final class OfflinePointReader extends PointReader {
int packedBytesLength = packedValue.length;
int bytesPerDoc = packedBytesLength + Integer.BYTES;
- if (longOrds) {
- bytesPerDoc += Long.BYTES;
- } else {
- bytesPerDoc += Integer.BYTES;
+ if (singleValuePerDoc == false) {
+ if (longOrds) {
+ bytesPerDoc += Long.BYTES;
+ } else {
+ bytesPerDoc += Integer.BYTES;
+ }
}
long rightCount = 0;
@@ -159,8 +170,10 @@ final class OfflinePointReader extends PointReader {
if (longOrds) {
ord = readLong(buffer, packedBytesLength);
} else {
+ // This is either ord (multi-valued case) or docID (which we use as ord in the single valued case):
ord = readInt(buffer, packedBytesLength);
}
+
if (rightTree.get(ord)) {
rightOut.writeBytes(buffer, 0, bytesPerDoc);
if (doClearBits) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index af974c7..5314876 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -19,7 +19,6 @@ package org.apache.lucene.util.bkd;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
@@ -30,37 +29,42 @@ final class OfflinePointWriter implements PointWriter {
final Directory tempDir;
final IndexOutput out;
final int packedBytesLength;
+ final boolean singleValuePerDoc;
long count;
private boolean closed;
// true if ords are written as long (8 bytes), else 4 bytes
private boolean longOrds;
- public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength, boolean longOrds, String desc) throws IOException {
+ public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength, boolean longOrds, String desc, boolean singleValuePerDoc) throws IOException {
this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd_" + desc, IOContext.DEFAULT);
this.tempDir = tempDir;
this.packedBytesLength = packedBytesLength;
this.longOrds = longOrds;
+ this.singleValuePerDoc = singleValuePerDoc;
}
/** Initializes on an already written/closed file, just so consumers can use {@link #getReader} to read the file. */
- public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long count, boolean longOrds) {
+ public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long count, boolean longOrds, boolean singleValuePerDoc) {
this.out = out;
this.tempDir = tempDir;
this.packedBytesLength = packedBytesLength;
this.count = count;
closed = true;
this.longOrds = longOrds;
+ this.singleValuePerDoc = singleValuePerDoc;
}
@Override
public void append(byte[] packedValue, long ord, int docID) throws IOException {
assert packedValue.length == packedBytesLength;
out.writeBytes(packedValue, 0, packedValue.length);
- if (longOrds) {
- out.writeLong(ord);
- } else {
- assert ord <= Integer.MAX_VALUE;
- out.writeInt((int) ord);
+ if (singleValuePerDoc == false) {
+ if (longOrds) {
+ out.writeLong(ord);
+ } else {
+ assert ord <= Integer.MAX_VALUE;
+ out.writeInt((int) ord);
+ }
}
out.writeInt(docID);
count++;
@@ -70,7 +74,7 @@ final class OfflinePointWriter implements PointWriter {
public PointReader getReader(long start, long length) throws IOException {
assert closed;
assert start + length <= count: "start=" + start + " length=" + length + " count=" + count;
- return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, length, longOrds);
+ return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, length, longOrds, singleValuePerDoc);
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
index cfb98b0..ffcbcf8 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
@@ -16,29 +16,14 @@
*/
package org.apache.lucene.util.bkd;
-import java.io.IOException;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.FilterCodec;
-import org.apache.lucene.codecs.PointsFormat;
-import org.apache.lucene.codecs.PointsReader;
-import org.apache.lucene.codecs.PointsWriter;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.LongPoint;
-import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.LuceneTestCase.Monster;
-import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TimeUnits;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@@ -55,7 +40,7 @@ public class Test2BBKDPoints extends LuceneTestCase {
final int numDocs = (Integer.MAX_VALUE / 26) + 100;
- BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, Long.BYTES, 26L * numDocs);
+ BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, Long.BYTES, 26L * numDocs, false);
int counter = 0;
byte[] packedBytes = new byte[Long.BYTES];
for (int docID = 0; docID < numDocs; docID++) {
@@ -88,7 +73,7 @@ public class Test2BBKDPoints extends LuceneTestCase {
final int numDocs = (Integer.MAX_VALUE / 26) + 100;
- BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, 26L * numDocs);
+ BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, 26L * numDocs, false);
int counter = 0;
byte[] packedBytes = new byte[2*Long.BYTES];
for (int docID = 0; docID < numDocs; docID++) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index acc049c..dc7f8f7 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -43,8 +43,8 @@ import org.apache.lucene.util.TestUtil;
public class TestBKD extends LuceneTestCase {
- private long randomPointCount() {
- if (random().nextBoolean()) {
+ private long randomPointCount(boolean singleValuePerDoc) {
+ if (singleValuePerDoc || random().nextBoolean()) {
return random().nextInt(Integer.MAX_VALUE);
} else {
return random().nextLong() & Long.MAX_VALUE;
@@ -53,7 +53,7 @@ public class TestBKD extends LuceneTestCase {
public void testBasicInts1D() throws Exception {
try (Directory dir = getDirectory(100)) {
- BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, randomPointCount());
+ BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, randomPointCount(true), true);
byte[] scratch = new byte[4];
for(int docID=0;docID<100;docID++) {
NumericUtils.intToSortableBytes(docID, scratch, 0);
@@ -128,7 +128,7 @@ public class TestBKD extends LuceneTestCase {
int numDims = TestUtil.nextInt(random(), 1, 5);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
float maxMB = (float) 3.0 + (3*random().nextFloat());
- BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB, randomPointCount());
+ BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB, randomPointCount(true), true);
if (VERBOSE) {
System.out.println("TEST: numDims=" + numDims + " numDocs=" + numDocs);
@@ -269,7 +269,7 @@ public class TestBKD extends LuceneTestCase {
int numDims = TestUtil.nextInt(random(), 1, 5);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
float maxMB = (float) 3.0 + (3*random().nextFloat());
- BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount());
+ BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(true), true);
BigInteger[][] docs = new BigInteger[numDocs][];
byte[] scratch = new byte[numBytesPerDim*numDims];
@@ -442,7 +442,7 @@ public class TestBKD extends LuceneTestCase {
public void testTooLittleHeap() throws Exception {
try (Directory dir = getDirectory(0)) {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
- new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001, randomPointCount());
+ new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001, randomPointCount(true), true);
});
assertTrue(expected.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
}
@@ -565,7 +565,7 @@ public class TestBKD extends LuceneTestCase {
List<Integer> docIDBases = null;
int seg = 0;
- BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount());
+ BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
IndexInput in = null;
@@ -619,7 +619,7 @@ public class TestBKD extends LuceneTestCase {
seg++;
maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
maxMB = (float) 3.0 + (3*random().nextDouble());
- w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount());
+ w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
lastDocIDBase = docID;
}
}
@@ -634,7 +634,7 @@ public class TestBKD extends LuceneTestCase {
out.close();
in = dir.openInput("bkd", IOContext.DEFAULT);
seg++;
- w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount());
+ w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
List<BKDReader> readers = new ArrayList<>();
for(long fp : toMerge) {
in.seek(fp);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3929400/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java
index ecb3a61..7c42d1c 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java
@@ -229,7 +229,7 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
throw ise;
}
} catch (AssertionError ae) {
- if (ae.getMessage().contains("does not exist; files=")) {
+ if (ae.getMessage() != null && ae.getMessage().contains("does not exist; files=")) {
// OK: likely we threw the random IOExc when IW was asserting the commit files exist
done = true;
} else {