You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/11/23 20:10:24 UTC
svn commit: r1715920 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/lucene54/
lucene/core/src/java/org/apache/lucene/util/packed/
lucene/core/src/test/org/apache/lucene/codecs/lucene54/ lucene...
Author: jpountz
Date: Mon Nov 23 19:10:23 2015
New Revision: 1715920
URL: http://svn.apache.org/viewvc?rev=1715920&view=rev
Log:
LUCENE-6906: Fix Lucene54DocValuesFormat on large empty segments.
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicWriter.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java
lucene/dev/branches/branch_5x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java?rev=1715920&r1=1715919&r2=1715920&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java Mon Nov 23 19:10:23 2015
@@ -319,7 +319,7 @@ final class Lucene54DocValuesProducer ex
// sparse bits need a bit more metadata
entry.numDocsWithValue = meta.readVLong();
final int blockShift = meta.readVInt();
- entry.monotonicMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithValue + 1, blockShift);
+ entry.monotonicMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithValue, blockShift);
ramBytesUsed.addAndGet(entry.monotonicMeta.ramBytesUsed());
directAddressesMeta.put(info.name, entry.monotonicMeta);
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicWriter.java?rev=1715920&r1=1715919&r2=1715920&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicWriter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicWriter.java Mon Nov 23 19:10:23 2015
@@ -123,7 +123,9 @@ public final class DirectMonotonicWriter
if (finished) {
throw new IllegalStateException("#finish has been called already");
}
- flush();
+ if (bufferSize > 0) {
+ flush();
+ }
finished = true;
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java?rev=1715920&r1=1715919&r2=1715920&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java Mon Nov 23 19:10:23 2015
@@ -17,6 +17,7 @@ package org.apache.lucene.codecs.lucene5
* limitations under the License.
*/
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -24,6 +25,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
@@ -137,7 +139,7 @@ public class TestLucene54DocValuesFormat
@Slow
public void testSparseDocValuesVsStoredFields() throws Exception {
- int numIterations = atLeast(2);
+ int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSparseDocValuesVsStoredFields();
}
@@ -157,7 +159,7 @@ public class TestLucene54DocValuesFormat
// sparse compression is only enabled if less than 1% of docs have a value
final int avgGap = 100;
- final int numDocs = atLeast(100);
+ final int numDocs = atLeast(200);
for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
writer.addDocument(new Document());
}
@@ -184,7 +186,7 @@ public class TestLucene54DocValuesFormat
writer.addDocument(doc);
// add a gap
- for (int j = random().nextInt(avgGap * 2); j >= 0; --j) {
+ for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
writer.addDocument(new Document());
}
}
@@ -501,4 +503,5 @@ public class TestLucene54DocValuesFormat
}
}
}
+
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java?rev=1715920&r1=1715919&r2=1715920&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java Mon Nov 23 19:10:23 2015
@@ -32,6 +32,28 @@ import org.apache.lucene.util.TestUtil;
public class TestDirectMonotonic extends LuceneTestCase {
+ public void testEmpty() throws IOException {
+ Directory dir = newDirectory();
+ final int blockShift = TestUtil.nextInt(random(), DirectMonotonicWriter.MIN_BLOCK_SHIFT, DirectMonotonicWriter.MAX_BLOCK_SHIFT);
+
+ final long dataLength;
+ try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT);
+ IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) {
+ DirectMonotonicWriter w = DirectMonotonicWriter.getInstance(metaOut, dataOut, 0, blockShift);
+ w.finish();
+ dataLength = dataOut.getFilePointer();
+ }
+
+ try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE);
+ IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) {
+ DirectMonotonicReader.Meta meta = DirectMonotonicReader.loadMeta(metaIn, 0, blockShift);
+ DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
+ // no exception
+ }
+
+ dir.close();
+ }
+
public void testSimple() throws IOException {
Directory dir = newDirectory();
final int blockShift = 2;
@@ -100,38 +122,52 @@ public class TestDirectMonotonic extends
}
public void testRandom() throws IOException {
- Directory dir = newDirectory();
- final int blockShift = TestUtil.nextInt(random(), DirectMonotonicWriter.MIN_BLOCK_SHIFT, DirectMonotonicWriter.MAX_BLOCK_SHIFT);
- final int numValues = TestUtil.nextInt(random(), 1, 1 << 20);
- List<Long> actualValues = new ArrayList<>();
- long previous = random().nextLong();
- actualValues.add(previous);
- for (int i = 1; i < numValues; ++i) {
- previous += random().nextInt(1 << random().nextInt(20));
- actualValues.add(previous);
- }
-
- final long dataLength;
- try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT);
- IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) {
- DirectMonotonicWriter w = DirectMonotonicWriter.getInstance(metaOut, dataOut, numValues, blockShift);
- for (long v : actualValues) {
- w.add(v);
+ final int iters = atLeast(3);
+ for (int iter = 0; iter < iters; ++iter) {
+ Directory dir = newDirectory();
+ final int blockShift = TestUtil.nextInt(random(), DirectMonotonicWriter.MIN_BLOCK_SHIFT, DirectMonotonicWriter.MAX_BLOCK_SHIFT);
+ final int maxNumValues = 1 << 20;
+ final int numValues;
+ if (random().nextBoolean()) {
+ // random number
+ numValues = TestUtil.nextInt(random(), 1, maxNumValues);
+ } else {
+ // multiple of the block size
+ final int numBlocks = TestUtil.nextInt(random(), 0, maxNumValues >>> blockShift);
+ numValues = TestUtil.nextInt(random(), 0, numBlocks) << blockShift;
+ }
+ List<Long> actualValues = new ArrayList<>();
+ long previous = random().nextLong();
+ if (numValues > 0) {
+ actualValues.add(previous);
+ }
+ for (int i = 1; i < numValues; ++i) {
+ previous += random().nextInt(1 << random().nextInt(20));
+ actualValues.add(previous);
+ }
+
+ final long dataLength;
+ try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT);
+ IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) {
+ DirectMonotonicWriter w = DirectMonotonicWriter.getInstance(metaOut, dataOut, numValues, blockShift);
+ for (long v : actualValues) {
+ w.add(v);
+ }
+ w.finish();
+ dataLength = dataOut.getFilePointer();
+ }
+
+ try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE);
+ IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) {
+ DirectMonotonicReader.Meta meta = DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift);
+ LongValues values = DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
+ for (int i = 0; i < numValues; ++i) {
+ assertEquals(actualValues.get(i).longValue(), values.get(i));
+ }
}
- w.finish();
- dataLength = dataOut.getFilePointer();
+
+ dir.close();
}
-
- try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE);
- IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) {
- DirectMonotonicReader.Meta meta = DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift);
- LongValues values = DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
- for (int i = 0; i < numValues; ++i) {
- assertEquals(actualValues.get(i).longValue(), values.get(i));
- }
- }
-
- dir.close();
}
}
Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java?rev=1715920&r1=1715919&r2=1715920&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java Mon Nov 23 19:10:23 2015
@@ -3149,6 +3149,172 @@ public abstract class BaseDocValuesForma
assertEquals(term2.get(), enum2.term());
}
+ // same as testSortedMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
+ public void testSortedMergeAwayAllValuesLargeSegment() throws IOException {
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+ final int numEmptyDocs = atLeast(1024);
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ iwriter.addDocument(new Document());
+ }
+ iwriter.commit();
+ iwriter.deleteDocuments(new Term("id", "1"));
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ assertEquals(-1, dv.getOrd(i));
+ }
+
+ ireader.close();
+ directory.close();
+ }
+
+ // same as testSortedSetMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
+ public void testSortedSetMergeAwayAllValuesLargeSegment() throws IOException {
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+ final int numEmptyDocs = atLeast(1024);
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ iwriter.addDocument(new Document());
+ }
+ iwriter.commit();
+ iwriter.deleteDocuments(new Term("id", "1"));
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ dv.setDocument(i);
+ assertEquals(-1L, dv.nextOrd());
+ }
+
+ ireader.close();
+ directory.close();
+ }
+
+ // same as testNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
+ public void testNumericMergeAwayAllValuesLargeSegment() throws IOException {
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new NumericDocValuesField("field", 42L));
+ iwriter.addDocument(doc);
+ final int numEmptyDocs = atLeast(1024);
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ iwriter.addDocument(new Document());
+ }
+ iwriter.commit();
+ iwriter.deleteDocuments(new Term("id", "1"));
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ NumericDocValues dv = getOnlySegmentReader(ireader).getNumericDocValues("field");
+ Bits docsWithField = getOnlySegmentReader(ireader).getDocsWithField("field");
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ assertEquals(0, dv.get(i));
+ assertFalse(docsWithField.get(i));
+ }
+
+ ireader.close();
+ directory.close();
+ }
+
+ // same as testSortedNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
+ public void testSortedNumericMergeAwayAllValuesLargeSegment() throws IOException {
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new SortedNumericDocValuesField("field", 42L));
+ iwriter.addDocument(doc);
+ final int numEmptyDocs = atLeast(1024);
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ iwriter.addDocument(new Document());
+ }
+ iwriter.commit();
+ iwriter.deleteDocuments(new Term("id", "1"));
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedNumericDocValues dv = getOnlySegmentReader(ireader).getSortedNumericDocValues("field");
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ dv.setDocument(i);
+ assertEquals(0, dv.count());
+ }
+
+ ireader.close();
+ directory.close();
+ }
+
+ // same as testBinaryMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
+ public void testBinaryMergeAwayAllValuesLargeSegment() throws IOException {
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new BinaryDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+ final int numEmptyDocs = atLeast(1024);
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ iwriter.addDocument(new Document());
+ }
+ iwriter.commit();
+ iwriter.deleteDocuments(new Term("id", "1"));
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ BinaryDocValues dv = getOnlySegmentReader(ireader).getBinaryDocValues("field");
+ Bits docsWithField = getOnlySegmentReader(ireader).getDocsWithField("field");
+ for (int i = 0; i < numEmptyDocs; ++i) {
+ assertEquals(new BytesRef(), dv.get(i));
+ assertFalse(docsWithField.get(i));
+ }
+
+ ireader.close();
+ directory.close();
+ }
+
protected boolean codecAcceptsHugeBinaryValues(String field) {
return true;
}