You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2016/10/20 19:31:17 UTC
[08/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7489: Better
sparsity support for Lucene70DocValuesFormat.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java
index a4b9049..4e9f0e0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java
@@ -44,12 +44,12 @@ public abstract class EmptyDocValuesProducer extends DocValuesProducer {
}
@Override
- public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
+ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
throw new UnsupportedOperationException();
}
@Override
- public SortedSetDocValues getSortedSet(FieldInfo field) {
+ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
throw new UnsupportedOperationException();
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java
index dae1179..0c6c809 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java
@@ -17,6 +17,8 @@
package org.apache.lucene.index;
+import java.io.IOException;
+
import org.apache.lucene.util.BytesRef;
/**
@@ -103,7 +105,7 @@ public abstract class LegacySortedSetDocValues {
* Returns a {@link TermsEnum} over the values.
* The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
*/
- public TermsEnum termsEnum() {
+ public TermsEnum termsEnum() throws IOException {
throw new UnsupportedOperationException();
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
index 225b6a6..cc7360e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
@@ -95,7 +95,7 @@ final class SingletonSortedSetDocValues extends SortedSetDocValues {
}
@Override
- public TermsEnum termsEnum() {
+ public TermsEnum termsEnum() throws IOException {
return in.termsEnum();
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
index ee70a64..7ff084f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
@@ -104,7 +104,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
* Returns a {@link TermsEnum} over the values.
* The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
*/
- public TermsEnum termsEnum() {
+ public TermsEnum termsEnum() throws IOException {
return new SortedDocValuesTermsEnum(this);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
index e53a0e7..439843b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
@@ -98,7 +98,7 @@ public abstract class SortedSetDocValues extends DocIdSetIterator {
* Returns a {@link TermsEnum} over the values.
* The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
*/
- public TermsEnum termsEnum() {
+ public TermsEnum termsEnum() throws IOException {
return new SortedSetDocValuesTermsEnum(this);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
index 0bf7a8e..46afe0d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@@ -81,7 +81,7 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
@Override
- public TermsEnum iterator() {
+ public TermsEnum iterator() throws IOException {
return fcsi.termsEnum();
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java b/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java
index bdefdf3..676efcd 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java
@@ -46,7 +46,6 @@ public final class DirectMonotonicReader {
public static class Meta implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Meta.class);
- final long numValues;
final int blockShift;
final int numBlocks;
final long[] mins;
@@ -55,7 +54,6 @@ public final class DirectMonotonicReader {
final long[] offsets;
Meta(long numValues, int blockShift) {
- this.numValues = numValues;
this.blockShift = blockShift;
long numBlocks = numValues >>> blockShift;
if ((numBlocks << blockShift) < numValues) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
index fae82e0..5ad701e 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
@@ -25,14 +25,13 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
+import java.util.function.Supplier;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
-import org.apache.lucene.codecs.lucene70.Lucene70DocValuesProducer.SparseNumericDocValues;
-import org.apache.lucene.codecs.lucene70.Lucene70DocValuesProducer.SparseNumericDocValuesRandomAccessWrapper;
import org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
@@ -62,7 +61,6 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum.SeekStatus;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMFile;
@@ -70,7 +68,6 @@ import org.apache.lucene.store.RAMInputStream;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.TestUtil;
/**
@@ -123,7 +120,7 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
public void testTermsEnumFixedWidth() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
- doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), 10, 10);
+ doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), () -> TestUtil.randomSimpleString(random(), 10, 10));
}
}
@@ -131,7 +128,7 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
public void testTermsEnumVariableWidth() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
- doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), 1, 500);
+ doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), () -> TestUtil.randomSimpleString(random(), 1, 500));
}
}
@@ -139,7 +136,21 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
public void testTermsEnumRandomMany() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
- doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 8121), 1, 500);
+ doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 8121), () -> TestUtil.randomSimpleString(random(), 1, 500));
+ }
+ }
+
+ public void testTermsEnumLongSharedPrefixes() throws Exception {
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), () -> {
+ char[] chars = new char[random().nextInt(500)];
+ Arrays.fill(chars, 'a');
+ if (chars.length > 0) {
+ chars[random().nextInt(chars.length)] = 'b';
+ }
+ return new String(chars);
+ });
}
}
@@ -269,7 +280,7 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
// TODO: try to refactor this and some termsenum tests into the base class.
// to do this we need to fix the test class to get a DVF not a Codec so we can setup
// the postings format correctly.
- private void doTestTermsEnumRandom(int numDocs, int minLength, int maxLength) throws Exception {
+ private void doTestTermsEnumRandom(int numDocs, Supplier<String> valuesProducer) throws Exception {
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
@@ -294,12 +305,11 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
- final int length = TestUtil.nextInt(random(), minLength, maxLength);
int numValues = random().nextInt(17);
// create a random list of strings
List<String> values = new ArrayList<>();
for (int v = 0; v < numValues; v++) {
- values.add(TestUtil.randomSimpleString(random(), minLength, length));
+ values.add(valuesProducer.get());
}
// add in any order to the indexed field
@@ -429,92 +439,6 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
}
}
- public void testSparseLongValues() throws IOException {
- final int iters = atLeast(5);
- for (int iter = 0; iter < iters; ++iter) {
- final int numDocs = TestUtil.nextInt(random(), 0, 100);
- final int[] docIds = new int[numDocs];
- final long[] values = new long[numDocs];
- final int maxDoc;
- if (numDocs == 0) {
- maxDoc = 1 + random().nextInt(10);
- } else {
- docIds[0] = random().nextInt(10);
- for (int i = 1; i < docIds.length; ++i) {
- docIds[i] = docIds[i - 1] + 1 + random().nextInt(100);
- }
- maxDoc = docIds[numDocs - 1] + 1 + random().nextInt(10);
- }
- for (int i = 0; i < values.length; ++i) {
- values[i] = random().nextLong();
- }
- final long missingValue = random().nextLong();
- final LongValues docIdsValues = new LongValues() {
- @Override
- public long get(long index) {
- return docIds[Math.toIntExact(index)];
- }
- };
- final LongValues valuesValues = new LongValues() {
- @Override
- public long get(long index) {
- return values[Math.toIntExact(index)];
- }
- };
- final SparseNumericDocValues sparseValues = new SparseNumericDocValues(numDocs, docIdsValues, valuesValues);
-
- // sequential access
- assertEquals(-1, sparseValues.docID());
- for (int i = 0; i < docIds.length; ++i) {
- assertEquals(docIds[i], sparseValues.nextDoc());
- }
- assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
-
- // advance
- for (int i = 0; i < 2000; ++i) {
- final int target = TestUtil.nextInt(random(), 0, maxDoc);
- int index = Arrays.binarySearch(docIds, target);
- if (index < 0) {
- index = -1 - index;
- }
- sparseValues.reset();
- if (index > 0) {
- assertEquals(docIds[index - 1], sparseValues.advance(Math.toIntExact(docIds[index - 1])));
- }
- if (index == docIds.length) {
- assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.advance(target));
- } else {
- assertEquals(docIds[index], sparseValues.advance(target));
- }
- }
-
- final SparseNumericDocValuesRandomAccessWrapper raWrapper = new SparseNumericDocValuesRandomAccessWrapper(sparseValues, missingValue);
-
- // random-access
- for (int i = 0; i < 2000; ++i) {
- final int docId = TestUtil.nextInt(random(), 0, maxDoc - 1);
- final int idx = Arrays.binarySearch(docIds, docId);
- final long value = raWrapper.get(docId);
- if (idx >= 0) {
- assertEquals(values[idx], value);
- } else {
- assertEquals(missingValue, value);
- }
- }
-
- // sequential access
- for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
- final int idx = Arrays.binarySearch(docIds, docId);
- final long value = raWrapper.get(docId);
- if (idx >= 0) {
- assertEquals(values[idx], value);
- } else {
- assertEquals(missingValue, value);
- }
- }
- }
- }
-
@Slow
public void testSortedSetAroundBlockSize() throws IOException {
final int frontier = 1 << Lucene70DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;