You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/11 12:47:41 UTC
lucene-solr git commit: fix int overflow bug in BKDWriter that
prevented it from indexing > 2.1B points;
try to improve runtime of Test2BPoints
Repository: lucene-solr
Updated Branches:
refs/heads/master fafbb2b6c -> 1e05d3be7
fix int overflow bug in BKDWriter that prevented it from indexing > 2.1B points; try to improve runtime of Test2BPoints
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1e05d3be
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1e05d3be
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1e05d3be
Branch: refs/heads/master
Commit: 1e05d3be76e0dcd7d0e1a2bb2bb89eb4019e33cc
Parents: fafbb2b
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Mar 11 06:48:30 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Mar 11 06:48:30 2016 -0500
----------------------------------------------------------------------
.../org/apache/lucene/util/bkd/BKDWriter.java | 4 +-
.../org/apache/lucene/index/Test2BPoints.java | 62 +++++++++++++++-----
2 files changed, 50 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1e05d3be/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index aa7e5dc..f5a2d81 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -1082,7 +1082,7 @@ public class BKDWriter implements Closeable {
// Second pass: write the full values:
byte[] lastPackedValue = new byte[bytesPerDim];
- for (int i=0;i<source.count;i++) {
+ for (int i=0;i<count;i++) {
// TODO: we could do bulk copying here, avoiding the intermediate copy:
heapSource.readPackedValue(Math.toIntExact(source.start + i), scratchPackedValue);
assert numDims != 1 || valueInOrder(i, lastPackedValue, scratchPackedValue);
@@ -1143,7 +1143,7 @@ public class BKDWriter implements Closeable {
// Partition this source according to how the splitDim split the values:
int nextRightCount = 0;
- for (int i=0;i<source.count;i++) {
+ for (long i=0;i<source.count;i++) {
boolean result = reader.next();
assert result;
byte[] packedValue = reader.packedValue();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1e05d3be/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
index bfe387e..43207b8 100644
--- a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
+++ b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
@@ -16,8 +16,16 @@
*/
package org.apache.lucene.index;
+import java.io.IOException;
+
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.codecs.PointsReader;
+import org.apache.lucene.codecs.PointsWriter;
+import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
+import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.search.IndexSearcher;
@@ -33,10 +41,10 @@ import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
// e.g. run like this: ant test -Dtestcase=Test2BPoints -Dtests.nightly=true -Dtests.verbose=true -Dtests.monster=true
//
-// or: python -u /l/util/src/python/repeatLuceneTest.py -once -nolog -tmpDir /b/tmp -logDir /l/logs Test2BPoints.test1D -verbose
+// or: python -u /l/util/src/python/repeatLuceneTest.py -heap 6g -once -nolog -tmpDir /b/tmp -logDir /l/logs Test2BPoints.test2D -verbose
@SuppressCodecs({ "SimpleText", "Memory", "Direct", "Compressing" })
-@TimeoutSuite(millis = 16 * TimeUnits.HOUR)
+@TimeoutSuite(millis = 365 * 24 * TimeUnits.HOUR) // hopefully ~1 year is long enough ;)
@Monster("takes at least 4 hours and consumes many GB of temp disk space")
public class Test2BPoints extends LuceneTestCase {
public void test1D() throws Exception {
@@ -44,12 +52,14 @@ public class Test2BPoints extends LuceneTestCase {
System.out.println("DIR: " + ((FSDirectory) dir).getDirectory());
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()))
- .setCodec(Codec.forName("Lucene60"))
- .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
- .setRAMBufferSizeMB(64.0)
- .setMergeScheduler(new ConcurrentMergeScheduler())
- .setMergePolicy(newLogMergePolicy(false, 10))
- .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+ .setCodec(getCodec())
+ .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
+ .setRAMBufferSizeMB(256.0)
+ .setMergeScheduler(new ConcurrentMergeScheduler())
+ .setMergePolicy(newLogMergePolicy(false, 10))
+ .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+
+ ((ConcurrentMergeScheduler) iwc.getMergeScheduler()).setMaxMergesAndThreads(6, 3);
IndexWriter w = new IndexWriter(dir, iwc);
@@ -88,13 +98,15 @@ public class Test2BPoints extends LuceneTestCase {
Directory dir = FSDirectory.open(createTempDir("2BPoints2D"));
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()))
- .setCodec(Codec.forName("Lucene60"))
- .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
- .setRAMBufferSizeMB(64.0)
- .setMergeScheduler(new ConcurrentMergeScheduler())
- .setMergePolicy(newLogMergePolicy(false, 10))
- .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+ .setCodec(getCodec())
+ .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
+ .setRAMBufferSizeMB(256.0)
+ .setMergeScheduler(new ConcurrentMergeScheduler())
+ .setMergePolicy(newLogMergePolicy(false, 10))
+ .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+ ((ConcurrentMergeScheduler) iwc.getMergeScheduler()).setMaxMergesAndThreads(6, 3);
+
IndexWriter w = new IndexWriter(dir, iwc);
MergePolicy mp = w.getConfig().getMergePolicy();
@@ -127,4 +139,26 @@ public class Test2BPoints extends LuceneTestCase {
TestUtil.checkIndex(dir);
dir.close();
}
+
+ private static Codec getCodec() {
+
+ return new FilterCodec("Lucene60", Codec.forName("Lucene60")) {
+ @Override
+ public PointsFormat pointsFormat() {
+ return new PointsFormat() {
+ @Override
+ public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
+ int maxPointsInLeafNode = 1024;
+ double maxMBSortInHeap = 256.0;
+ return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
+ }
+
+ @Override
+ public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
+ return new Lucene60PointsReader(readState);
+ }
+ };
+ }
+ };
+ }
}