You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2016/10/20 19:31:17 UTC

[08/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7489: Better sparsity support for Lucene70DocValuesFormat.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java
index a4b9049..4e9f0e0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/EmptyDocValuesProducer.java
@@ -44,12 +44,12 @@ public abstract class EmptyDocValuesProducer extends DocValuesProducer {
   }
 
   @Override
-  public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
+  public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
     throw new UnsupportedOperationException();
   }
   
   @Override
-  public SortedSetDocValues getSortedSet(FieldInfo field) {
+  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
     throw new UnsupportedOperationException();
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java
index dae1179..0c6c809 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LegacySortedSetDocValues.java
@@ -17,6 +17,8 @@
 package org.apache.lucene.index;
 
 
+import java.io.IOException;
+
 import org.apache.lucene.util.BytesRef;
 
 /**
@@ -103,7 +105,7 @@ public abstract class LegacySortedSetDocValues {
    * Returns a {@link TermsEnum} over the values.
    * The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
    */
-  public TermsEnum termsEnum() {
+  public TermsEnum termsEnum() throws IOException {
     throw new UnsupportedOperationException();
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
index 225b6a6..cc7360e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
@@ -95,7 +95,7 @@ final class SingletonSortedSetDocValues extends SortedSetDocValues {
   }
 
   @Override
-  public TermsEnum termsEnum() {
+  public TermsEnum termsEnum() throws IOException {
     return in.termsEnum();
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
index ee70a64..7ff084f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
@@ -104,7 +104,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
    * Returns a {@link TermsEnum} over the values.
    * The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
    */
-  public TermsEnum termsEnum() {
+  public TermsEnum termsEnum() throws IOException {
     return new SortedDocValuesTermsEnum(this);
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
index e53a0e7..439843b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
@@ -98,7 +98,7 @@ public abstract class SortedSetDocValues extends DocIdSetIterator {
    * Returns a {@link TermsEnum} over the values.
    * The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
    */
-  public TermsEnum termsEnum() {
+  public TermsEnum termsEnum() throws IOException {
     return new SortedSetDocValuesTermsEnum(this);
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
index 0bf7a8e..46afe0d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@@ -81,7 +81,7 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
           TermsEnum termsEnum = query.getTermsEnum(new Terms() {
             
             @Override
-            public TermsEnum iterator() {
+            public TermsEnum iterator() throws IOException {
               return fcsi.termsEnum();
             }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java b/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java
index bdefdf3..676efcd 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java
@@ -46,7 +46,6 @@ public final class DirectMonotonicReader {
   public static class Meta implements Accountable {
     private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Meta.class);
 
-    final long numValues;
     final int blockShift;
     final int numBlocks;
     final long[] mins;
@@ -55,7 +54,6 @@ public final class DirectMonotonicReader {
     final long[] offsets;
 
     Meta(long numValues, int blockShift) {
-      this.numValues = numValues;
       this.blockShift = blockShift;
       long numBlocks = numValues >>> blockShift;
       if ((numBlocks << blockShift) < numValues) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/927fd51d/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
index fae82e0..5ad701e 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
@@ -25,14 +25,13 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.function.Supplier;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.asserting.AssertingCodec;
-import org.apache.lucene.codecs.lucene70.Lucene70DocValuesProducer.SparseNumericDocValues;
-import org.apache.lucene.codecs.lucene70.Lucene70DocValuesProducer.SparseNumericDocValuesRandomAccessWrapper;
 import org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
@@ -62,7 +61,6 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMFile;
@@ -70,7 +68,6 @@ import org.apache.lucene.store.RAMInputStream;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.LongValues;
 import org.apache.lucene.util.TestUtil;
 
 /**
@@ -123,7 +120,7 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
   public void testTermsEnumFixedWidth() throws Exception {
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
-      doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), 10, 10);
+      doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), () -> TestUtil.randomSimpleString(random(), 10, 10));
     }
   }
   
@@ -131,7 +128,7 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
   public void testTermsEnumVariableWidth() throws Exception {
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
-      doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), 1, 500);
+      doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), () -> TestUtil.randomSimpleString(random(), 1, 500));
     }
   }
   
@@ -139,7 +136,21 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
   public void testTermsEnumRandomMany() throws Exception {
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
-      doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 8121), 1, 500);
+      doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 8121), () -> TestUtil.randomSimpleString(random(), 1, 500));
+    }
+  }
+
+  public void testTermsEnumLongSharedPrefixes() throws Exception {
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      doTestTermsEnumRandom(TestUtil.nextInt(random(), 1025, 5121), () -> {
+        char[] chars = new char[random().nextInt(500)];
+        Arrays.fill(chars, 'a');
+        if (chars.length > 0) {
+          chars[random().nextInt(chars.length)] = 'b';
+        }
+        return new String(chars);
+      });
     }
   }
 
@@ -269,7 +280,7 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
   // TODO: try to refactor this and some termsenum tests into the base class.
   // to do this we need to fix the test class to get a DVF not a Codec so we can setup
   // the postings format correctly.
-  private void doTestTermsEnumRandom(int numDocs, int minLength, int maxLength) throws Exception {
+  private void doTestTermsEnumRandom(int numDocs, Supplier<String> valuesProducer) throws Exception {
     Directory dir = newFSDirectory(createTempDir());
     IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
     conf.setMergeScheduler(new SerialMergeScheduler());
@@ -294,12 +305,11 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
       Document doc = new Document();
       Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
       doc.add(idField);
-      final int length = TestUtil.nextInt(random(), minLength, maxLength);
       int numValues = random().nextInt(17);
       // create a random list of strings
       List<String> values = new ArrayList<>();
       for (int v = 0; v < numValues; v++) {
-        values.add(TestUtil.randomSimpleString(random(), minLength, length));
+        values.add(valuesProducer.get());
       }
       
       // add in any order to the indexed field
@@ -429,92 +439,6 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
     }
   }
 
-  public void testSparseLongValues() throws IOException {
-    final int iters = atLeast(5);
-    for (int iter = 0; iter < iters; ++iter) {
-      final int numDocs = TestUtil.nextInt(random(), 0, 100);
-      final int[] docIds = new int[numDocs];
-      final long[] values = new long[numDocs];
-      final int maxDoc;
-      if (numDocs == 0) {
-        maxDoc = 1 + random().nextInt(10);
-      } else {
-        docIds[0] = random().nextInt(10);
-        for (int i = 1; i < docIds.length; ++i) {
-          docIds[i] = docIds[i - 1] + 1 + random().nextInt(100);
-        }
-        maxDoc = docIds[numDocs - 1] + 1 + random().nextInt(10);
-      }
-      for (int i = 0; i < values.length; ++i) {
-        values[i] = random().nextLong();
-      }
-      final long missingValue = random().nextLong();
-      final LongValues docIdsValues = new LongValues() {
-        @Override
-        public long get(long index) {
-          return docIds[Math.toIntExact(index)];
-        }
-      };
-      final LongValues valuesValues = new LongValues() {
-        @Override
-        public long get(long index) {
-          return values[Math.toIntExact(index)];
-        }
-      };
-      final SparseNumericDocValues sparseValues = new SparseNumericDocValues(numDocs, docIdsValues, valuesValues);
-
-      // sequential access
-      assertEquals(-1, sparseValues.docID());
-      for (int i = 0; i < docIds.length; ++i) {
-        assertEquals(docIds[i], sparseValues.nextDoc());
-      }
-      assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
-
-      // advance
-      for (int i = 0; i < 2000; ++i) {
-        final int target = TestUtil.nextInt(random(), 0, maxDoc);
-        int index = Arrays.binarySearch(docIds, target);
-        if (index < 0) {
-          index = -1 - index;
-        }
-        sparseValues.reset();
-        if (index > 0) {
-          assertEquals(docIds[index - 1], sparseValues.advance(Math.toIntExact(docIds[index - 1])));
-        }
-        if (index == docIds.length) {
-          assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.advance(target));
-        } else {
-          assertEquals(docIds[index], sparseValues.advance(target));
-        }
-      }
-
-      final SparseNumericDocValuesRandomAccessWrapper raWrapper = new SparseNumericDocValuesRandomAccessWrapper(sparseValues, missingValue);
-
-      // random-access
-      for (int i = 0; i < 2000; ++i) {
-        final int docId = TestUtil.nextInt(random(), 0, maxDoc - 1);
-        final int idx = Arrays.binarySearch(docIds, docId);
-        final long value = raWrapper.get(docId);
-        if (idx >= 0) {
-          assertEquals(values[idx], value);
-        } else {
-          assertEquals(missingValue, value);
-        }
-      }
-
-      // sequential access
-      for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
-        final int idx = Arrays.binarySearch(docIds, docId);
-        final long value = raWrapper.get(docId);
-        if (idx >= 0) {
-          assertEquals(values[idx], value);
-        } else {
-          assertEquals(missingValue, value);
-        }
-      }
-    }
-  }
-
   @Slow
   public void testSortedSetAroundBlockSize() throws IOException {
     final int frontier = 1 << Lucene70DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;