You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/26 08:45:33 UTC
lucene-solr:master: LUCENE-7141: switch OfflineSorter's
ByteSequencesReader to BytesRefIterator
Repository: lucene-solr
Updated Branches:
refs/heads/master c93c88dfb -> 78d5cfefe
LUCENE-7141: switch OfflineSorter's ByteSequencesReader to BytesRefIterator
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/78d5cfef
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/78d5cfef
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/78d5cfef
Branch: refs/heads/master
Commit: 78d5cfefe2453345c498984bf0e405d254a9d5bc
Parents: c93c88d
Author: Mike McCandless <mi...@apache.org>
Authored: Sat Mar 26 03:47:06 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sat Mar 26 03:47:06 2016 -0400
----------------------------------------------------------------------
lucene/CHANGES.txt | 5 +++
.../lucene/analysis/hunspell/Dictionary.java | 13 ++++---
.../org/apache/lucene/util/OfflineSorter.java | 40 ++++++++++----------
.../org/apache/lucene/util/bkd/BKDWriter.java | 13 +++----
.../search/suggest/SortedInputIterator.java | 6 +--
.../suggest/analyzing/AnalyzingSuggester.java | 16 +++++---
.../search/suggest/fst/ExternalRefSorter.java | 18 +++------
.../search/suggest/fst/FSTCompletionLookup.java | 18 +++++----
.../search/suggest/fst/BytesRefSortersTest.java | 11 +++---
9 files changed, 72 insertions(+), 68 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index bc272e4..a3368ff 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -16,6 +16,11 @@ New Features
* LUCENE-7140: Add PlanetModel.bisection to spatial3d (Karl Wright via
Mike McCandless)
+API Changes
+
+* LUCENE-7141: Switch OfflineSorter's ByteSequencesReader to
+ BytesRefIterator (Mike McCandless)
+
Optimizations
* LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index 562e5cb..7885daa 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -888,17 +888,20 @@ public class Dictionary {
boolean success2 = false;
try (ByteSequencesReader reader = new ByteSequencesReader(tempDir.openChecksumInput(sorted, IOContext.READONCE), sorted)) {
- BytesRefBuilder scratchLine = new BytesRefBuilder();
// TODO: the flags themselves can be double-chars (long) or also numeric
// either way the trick is to encode them as char... but they must be parsed differently
String currentEntry = null;
IntsRefBuilder currentOrds = new IntsRefBuilder();
-
- String line;
- while (reader.read(scratchLine)) {
- line = scratchLine.get().utf8ToString();
+
+ while (true) {
+ BytesRef scratch = reader.next();
+ if (scratch == null) {
+ break;
+ }
+
+ String line = scratch.utf8ToString();
String entry;
char wordForm[];
int end;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
index 7549cbc..386cd0c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
@@ -346,7 +346,7 @@ public class OfflineSorter {
PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(segmentsToMerge.size()) {
@Override
protected boolean lessThan(FileAndTop a, FileAndTop b) {
- return comparator.compare(a.current.get(), b.current.get()) < 0;
+ return comparator.compare(a.current, b.current) < 0;
}
};
@@ -361,15 +361,14 @@ public class OfflineSorter {
// Open streams and read the top for each file
for (int i = 0; i < segmentsToMerge.size(); i++) {
streams[i] = getReader(dir.openChecksumInput(segmentsToMerge.get(i), IOContext.READONCE), segmentsToMerge.get(i));
- BytesRefBuilder bytes = new BytesRefBuilder();
- boolean result = false;
+ BytesRef item = null;
try {
- result = streams[i].read(bytes);
+ item = streams[i].next();
} catch (Throwable t) {
verifyChecksum(t, streams[i]);
}
- assert result;
- queue.insertWithOverflow(new FileAndTop(i, bytes));
+ assert item != null;
+ queue.insertWithOverflow(new FileAndTop(i, item));
}
// Unix utility sort() uses ordered array of files to pick the next line from, updating
@@ -378,15 +377,14 @@ public class OfflineSorter {
// so it shouldn't make much of a difference (didn't check).
FileAndTop top;
while ((top = queue.top()) != null) {
- writer.write(top.current.bytes(), 0, top.current.length());
- boolean result = false;
+ writer.write(top.current);
try {
- result = streams[top.fd].read(top.current);
+ top.current = streams[top.fd].next();
} catch (Throwable t) {
verifyChecksum(t, streams[top.fd]);
}
- if (result) {
+ if (top.current != null) {
queue.updateTop();
} else {
queue.pop();
@@ -416,18 +414,17 @@ public class OfflineSorter {
/** Read in a single partition of data */
int readPartition(ByteSequencesReader reader) throws IOException {
long start = System.currentTimeMillis();
- final BytesRefBuilder scratch = new BytesRefBuilder();
while (true) {
- boolean result = false;
+ BytesRef item = null;
try {
- result = reader.read(scratch);
+ item = reader.next();
} catch (Throwable t) {
verifyChecksum(t, reader);
}
- if (result == false) {
+ if (item == null) {
break;
}
- buffer.append(scratch.get());
+ buffer.append(item);
// Account for the created objects.
// (buffer slots do not account to buffer size.)
if (bufferBytesUsed.get() > ramBufferSize.bytes) {
@@ -440,9 +437,9 @@ public class OfflineSorter {
static class FileAndTop {
final int fd;
- final BytesRefBuilder current;
+ BytesRef current;
- FileAndTop(int fd, BytesRefBuilder firstLine) {
+ FileAndTop(int fd, BytesRef firstLine) {
this.fd = fd;
this.current = firstLine;
}
@@ -518,10 +515,11 @@ public class OfflineSorter {
* Utility class to read length-prefixed byte[] entries from an input.
* Complementary to {@link ByteSequencesWriter}.
*/
- public static class ByteSequencesReader implements Closeable {
+ public static class ByteSequencesReader implements BytesRefIterator, Closeable {
protected final String name;
protected final ChecksumIndexInput in;
protected final long end;
+ private final BytesRefBuilder ref = new BytesRefBuilder();
/** Constructs a ByteSequencesReader from the provided IndexInput */
public ByteSequencesReader(ChecksumIndexInput in, String name) {
@@ -538,16 +536,16 @@ public class OfflineSorter {
* the header of the next sequence. Returns <code>true</code> otherwise.
* @throws EOFException if the file ends before the full sequence is read.
*/
- public boolean read(BytesRefBuilder ref) throws IOException {
+ public BytesRef next() throws IOException {
if (in.getFilePointer() >= end) {
- return false;
+ return null;
}
short length = in.readShort();
ref.grow(length);
ref.setLength(length);
in.readBytes(ref.bytes(), 0, length);
- return true;
+ return ref.get();
}
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 5002e50..e075ced 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -33,7 +33,6 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntroSorter;
@@ -736,6 +735,8 @@ public class BKDWriter implements Closeable {
// TODO: this is sort of sneaky way to get the final OfflinePointWriter from OfflineSorter:
IndexOutput[] lastWriter = new IndexOutput[1];
+ final BytesRef scratch = new BytesRef(new byte[bytesPerDoc]);
+
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, OfflineSorter.BufferSize.megabytes(Math.max(1, (long) maxMBSortInHeap)), OfflineSorter.MAX_TEMPFILES) {
/** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
@@ -756,14 +757,12 @@ public class BKDWriter implements Closeable {
protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
return new ByteSequencesReader(in, name) {
@Override
- public boolean read(BytesRefBuilder ref) throws IOException {
+ public BytesRef next() throws IOException {
if (in.getFilePointer() >= end) {
- return false;
+ return null;
}
- ref.grow(bytesPerDoc);
- in.readBytes(ref.bytes(), 0, bytesPerDoc);
- ref.setLength(bytesPerDoc);
- return true;
+ in.readBytes(scratch.bytes, 0, bytesPerDoc);
+ return scratch;
}
};
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java
index 38fbbab..977df37 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java
@@ -29,7 +29,6 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
@@ -53,7 +52,6 @@ public class SortedInputIterator implements InputIterator {
private boolean done = false;
private long weight;
- private final BytesRefBuilder scratch = new BytesRefBuilder();
private BytesRef payload = new BytesRef();
private Set<BytesRef> contexts = null;
@@ -86,8 +84,8 @@ public class SortedInputIterator implements InputIterator {
}
try {
ByteArrayDataInput input = new ByteArrayDataInput();
- if (reader.read(scratch)) {
- final BytesRef bytes = scratch.get();
+ BytesRef bytes = reader.next();
+ if (bytes != null) {
weight = decode(bytes, input);
if (hasPayloads) {
payload = decodePayload(bytes, input);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
index d7f17a0..19982a5 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
@@ -509,8 +509,12 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
Set<BytesRef> seenSurfaceForms = new HashSet<>();
int dedup = 0;
- while (reader.read(scratch)) {
- input.reset(scratch.bytes(), 0, scratch.length());
+ while (true) {
+ BytesRef bytes = reader.next();
+ if (bytes == null) {
+ break;
+ }
+ input.reset(bytes.bytes, bytes.offset, bytes.length);
short analyzedLength = input.readShort();
analyzed.grow(analyzedLength+2);
input.readBytes(analyzed.bytes(), 0, analyzedLength);
@@ -518,13 +522,13 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
long cost = input.readInt();
- surface.bytes = scratch.bytes();
+ surface.bytes = bytes.bytes;
if (hasPayloads) {
surface.length = input.readShort();
surface.offset = input.getPosition();
} else {
surface.offset = input.getPosition();
- surface.length = scratch.length() - surface.offset;
+ surface.length = bytes.length - surface.offset;
}
if (previousAnalyzed == null) {
@@ -566,11 +570,11 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
} else {
int payloadOffset = input.getPosition() + surface.length;
- int payloadLength = scratch.length() - payloadOffset;
+ int payloadLength = bytes.length - payloadOffset;
BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
br.bytes[surface.length] = PAYLOAD_SEP;
- System.arraycopy(scratch.bytes(), payloadOffset, br.bytes, surface.length+1, payloadLength);
+ System.arraycopy(bytes.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
br.length = br.bytes.length;
builder.add(scratchInts.get(), outputs.newPair(cost, br));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
index 9852eec..fb876d2 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
@@ -24,7 +24,6 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OfflineSorter;
@@ -105,9 +104,10 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
/**
* Iterate over byte refs in a file.
*/
+ // TODO: this class is a bit silly ... sole purpose is to "remove" Closeable from what #iterator returns:
class ByteSequenceIterator implements BytesRefIterator {
private final OfflineSorter.ByteSequencesReader reader;
- private BytesRefBuilder scratch = new BytesRefBuilder();
+ private BytesRef scratch;
public ByteSequenceIterator(OfflineSorter.ByteSequencesReader reader) {
this.reader = reader;
@@ -115,20 +115,14 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
@Override
public BytesRef next() throws IOException {
- if (scratch == null) {
- return null;
- }
boolean success = false;
try {
- if (reader.read(scratch) == false) {
- IOUtils.close(reader);
- scratch = null;
- }
- success = true;
+ scratch = reader.next();
if (scratch == null) {
- return null;
+ reader.close();
}
- return scratch.get();
+ success = true;
+ return scratch;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(reader);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
index df6b1c5..7db97a8 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
@@ -39,7 +39,6 @@ import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OfflineSorter;
@@ -215,10 +214,13 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
int previousBucket = 0;
int previousScore = 0;
ByteArrayDataInput input = new ByteArrayDataInput();
- BytesRefBuilder tmp1 = new BytesRefBuilder();
BytesRef tmp2 = new BytesRef();
- while (reader.read(tmp1)) {
- input.reset(tmp1.bytes());
+ while (true) {
+ BytesRef scratch = reader.next();
+ if (scratch == null) {
+ break;
+ }
+ input.reset(scratch.bytes, scratch.offset, scratch.length);
int currentScore = input.readInt();
int bucket;
@@ -231,9 +233,9 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
previousBucket = bucket;
// Only append the input, discard the weight.
- tmp2.bytes = tmp1.bytes();
- tmp2.offset = input.getPosition();
- tmp2.length = tmp1.length() - input.getPosition();
+ tmp2.bytes = scratch.bytes;
+ tmp2.offset = scratch.offset + input.getPosition();
+ tmp2.length = scratch.length - input.getPosition();
builder.add(tmp2, bucket);
line++;
@@ -293,7 +295,7 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
@Override
public synchronized boolean store(DataOutput output) throws IOException {
output.writeVLong(count);
- if (this.normalCompletion == null || normalCompletion.getFST() == null) {
+ if (normalCompletion == null || normalCompletion.getFST() == null) {
return false;
}
normalCompletion.getFST().save(output);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78d5cfef/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
index b64d283..b26a2ad 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
@@ -57,12 +57,13 @@ public class BytesRefSortersTest extends LuceneTestCase {
sorter.add(new BytesRef(new byte [1]));
});
- BytesRef spare1;
- BytesRef spare2;
- while ((spare1 = i1.next()) != null && (spare2 = i2.next()) != null) {
+ while (true) {
+ BytesRef spare1 = i1.next();
+ BytesRef spare2 = i2.next();
assertEquals(spare1, spare2);
+ if (spare1 == null) {
+ break;
+ }
}
- assertNull(i1.next());
- assertNull(i2.next());
}
}