You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ji...@apache.org on 2017/04/20 21:46:18 UTC

lucene-solr:branch_6_5: LUCENE-7791: Fixed index sorting to work with sparse numeric and binary docvalues field.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6_5 c357151db -> 7477dcd11


LUCENE-7791: Fixed index sorting to work with sparse numeric and binary docvalues field.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7477dcd1
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7477dcd1
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7477dcd1

Branch: refs/heads/branch_6_5
Commit: 7477dcd111b6950d4105623ad2cfe60faea463da
Parents: c357151
Author: Jim Ferenczi <ji...@apache.org>
Authored: Thu Apr 20 23:29:35 2017 +0200
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Thu Apr 20 23:46:00 2017 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 +
 .../lucene/index/BinaryDocValuesWriter.java     |  6 +-
 .../lucene/index/NumericDocValuesWriter.java    | 14 ++--
 .../apache/lucene/index/TestIndexSorting.java   | 76 ++++++++++++++++++++
 4 files changed, 88 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7477dcd1/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 98f2dfa..710d744 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -20,6 +20,9 @@ Bug Fixes
   ArrayIndexOutOfBoundsException when byte blocks larger than 32 KB
   were added (Mike McCandless)
 
+* LUCENE-7791: Fixed index sorting to work with sparse numeric and binary docvalues field.
+  (Przemyslaw Szeremiota via Jim Ferenczi)
+
 Other
 
 * LUCENE-7763: Remove outdated comment in IndexWriterConfig.setIndexSort javadocs.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7477dcd1/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index 0a59278..aa21d32 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -227,15 +227,15 @@ class BinaryDocValuesWriter extends DocValuesWriter {
         throw new NoSuchElementException();
       }
       final BytesRef v;
-      if (upto < size) {
-        int oldID = sortMap.newToOld(upto);
+      int oldID = sortMap.newToOld(upto);
+      if (oldID < values.size()) {
         int length = (int) values.get(oldID);
         long pos = starts[oldID];
         bytesIterator.setPosition(pos);
         value.grow(length);
         value.setLength(length);
         bytesIterator.readBytes(value.bytes(), 0, value.length());
-        if (docsWithField.get(upto)) {
+        if (docsWithField.get(oldID)) {
           v = value.get();
         } else {
           v = null;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7477dcd1/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index a307afd..15105a5e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -108,7 +108,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
 
   @Override
   Sorter.DocComparator getDocComparator(int numDoc, SortField sortField) throws IOException {
-    return getDocComparator(sortField, sortField.getType(), (docID) -> docsWithField.get(docID), (docID) -> finalValues.get(docID));
+    return getDocComparator(sortField, sortField.getType(),
+        (docID) -> docID < docsWithField.length() ? docsWithField.get(docID) : false,
+        (docID) -> finalValues.get(docID));
   }
 
   static Sorter.DocComparator getDocComparator(SortField sortField, SortField.Type sortType, IntPredicate docsWithField, IntToLongFunction docValueFunction) {
@@ -252,13 +254,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
         throw new NoSuchElementException();
       }
       Long value;
-      if (upto < size) {
-        int old = sortMap.newToOld(upto);
-        if (docsWithField.get(old)) {
-          value = values.get(old);
-        } else {
-          value = null;
-        }
+      int old = sortMap.newToOld(upto);
+      if (old < size && docsWithField.get(old)) {
+        value = values.get(old);
       } else {
         value = null;
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7477dcd1/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 1287dd2..897ab6d 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -2332,4 +2332,80 @@ public class TestIndexSorting extends LuceneTestCase {
     }
     IOUtils.close(r, w, dir);
   }
+
+  public void testIndexSortWithSparseField() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    SortField sortField = new SortField("dense_int", SortField.Type.INT, true);
+    Sort indexSort = new Sort(sortField);
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    for (int i = 0; i < 128; i++) {
+      Document doc = new Document();
+      doc.add(new NumericDocValuesField("dense_int", i));
+      if (i < 64) {
+        doc.add(new NumericDocValuesField("sparse_int", i));
+        doc.add(new BinaryDocValuesField("sparse_binary", new BytesRef(Integer.toString(i))));
+      }
+      w.addDocument(doc);
+    }
+    w.commit();
+    w.forceMerge(1);
+    DirectoryReader r = DirectoryReader.open(w);
+    assertEquals(1, r.leaves().size());
+    LeafReader leafReader = r.leaves().get(0).reader();
+    NumericDocValues denseValues = leafReader.getNumericDocValues("dense_int");
+    NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse_int");
+    BinaryDocValues sparseBinaryValues = leafReader.getBinaryDocValues("sparse_binary");
+
+    Bits docsWithField = r.leaves().get(0).reader().getDocsWithField("sparse_int");
+    Bits docsWithBinaryField = r.leaves().get(0).reader().getDocsWithField("sparse_binary");
+    for(int docID = 0; docID < 128; docID++) {
+      assertEquals(127-docID, denseValues.get(docID));
+      if (docID >= 64) {
+        assertTrue(docsWithField.get(docID));
+        assertTrue(docsWithBinaryField.get(docID));
+        assertEquals(127-docID, sparseValues.get(docID));
+        assertEquals(new BytesRef(Integer.toString(127-docID)), sparseBinaryValues.get(docID));
+      } else {
+        assertFalse(docsWithField.get(docID));
+        assertFalse(docsWithBinaryField.get(docID));
+      }
+    }
+    IOUtils.close(r, w, dir);
+  }
+
+  public void testIndexSortOnSparseField() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    SortField sortField = new SortField("sparse", SortField.Type.INT, false);
+    sortField.setMissingValue(Integer.MIN_VALUE);
+    Sort indexSort = new Sort(sortField);
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    for (int i = 0; i < 128; i++) {
+      Document doc = new Document();
+      if (i < 64) {
+        doc.add(new NumericDocValuesField("sparse", i));
+      }
+      w.addDocument(doc);
+    }
+    w.commit();
+    w.forceMerge(1);
+    DirectoryReader r = DirectoryReader.open(w);
+    assertEquals(1, r.leaves().size());
+    LeafReader leafReader = r.leaves().get(0).reader();
+    NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse");
+    Bits docsWithField = r.leaves().get(0).reader().getDocsWithField("sparse");
+    for(int docID = 0; docID < 128; docID++) {
+      if (docID >= 64) {
+        assertTrue(docsWithField.get(docID));
+        assertEquals(docID-64, sparseValues.get(docID));
+      } else {
+        assertFalse(docsWithField.get(docID));
+      }
+    }
+    IOUtils.close(r, w, dir);
+  }
+
 }