You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ji...@apache.org on 2017/04/20 21:29:51 UTC
lucene-solr:branch_6x: LUCENE-7791: Fixed index sorting to work with
sparse numeric and binary docvalues field.
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 65088a7a5 -> 0902c9440
LUCENE-7791: Fixed index sorting to work with sparse numeric and binary docvalues field.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0902c944
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0902c944
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0902c944
Branch: refs/heads/branch_6x
Commit: 0902c9440ef10b02e909a6c58411356fea97bb5f
Parents: 65088a7
Author: Jim Ferenczi <ji...@apache.org>
Authored: Thu Apr 20 23:29:35 2017 +0200
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Thu Apr 20 23:29:35 2017 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../lucene/index/BinaryDocValuesWriter.java | 6 +-
.../lucene/index/NumericDocValuesWriter.java | 14 ++--
.../apache/lucene/index/TestIndexSorting.java | 76 ++++++++++++++++++++
4 files changed, 88 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0902c944/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index eb24726..7c38828 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -51,6 +51,9 @@ Bug Fixes
* LUCENE-7769: The UnifiedHighligter wasn't highlighting portions of the query
wrapped in BoostQuery or SpanBoostQuery. (David Smiley, Dmitry Malinin)
+* LUCENE-7791: Fixed index sorting to work with sparse numeric and binary docvalues field.
+ (Przemyslaw Szeremiota via Jim Ferenczi)
+
Other
* LUCENE-7763: Remove outdated comment in IndexWriterConfig.setIndexSort javadocs.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0902c944/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index 0a59278..aa21d32 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -227,15 +227,15 @@ class BinaryDocValuesWriter extends DocValuesWriter {
throw new NoSuchElementException();
}
final BytesRef v;
- if (upto < size) {
- int oldID = sortMap.newToOld(upto);
+ int oldID = sortMap.newToOld(upto);
+ if (oldID < values.size()) {
int length = (int) values.get(oldID);
long pos = starts[oldID];
bytesIterator.setPosition(pos);
value.grow(length);
value.setLength(length);
bytesIterator.readBytes(value.bytes(), 0, value.length());
- if (docsWithField.get(upto)) {
+ if (docsWithField.get(oldID)) {
v = value.get();
} else {
v = null;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0902c944/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index a307afd..15105a5e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -108,7 +108,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
@Override
Sorter.DocComparator getDocComparator(int numDoc, SortField sortField) throws IOException {
- return getDocComparator(sortField, sortField.getType(), (docID) -> docsWithField.get(docID), (docID) -> finalValues.get(docID));
+ return getDocComparator(sortField, sortField.getType(),
+ (docID) -> docID < docsWithField.length() ? docsWithField.get(docID) : false,
+ (docID) -> finalValues.get(docID));
}
static Sorter.DocComparator getDocComparator(SortField sortField, SortField.Type sortType, IntPredicate docsWithField, IntToLongFunction docValueFunction) {
@@ -252,13 +254,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
throw new NoSuchElementException();
}
Long value;
- if (upto < size) {
- int old = sortMap.newToOld(upto);
- if (docsWithField.get(old)) {
- value = values.get(old);
- } else {
- value = null;
- }
+ int old = sortMap.newToOld(upto);
+ if (old < size && docsWithField.get(old)) {
+ value = values.get(old);
} else {
value = null;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0902c944/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 1287dd2..897ab6d 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -2332,4 +2332,80 @@ public class TestIndexSorting extends LuceneTestCase {
}
IOUtils.close(r, w, dir);
}
+
+ public void testIndexSortWithSparseField() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("dense_int", SortField.Type.INT, true);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ for (int i = 0; i < 128; i++) {
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("dense_int", i));
+ if (i < 64) {
+ doc.add(new NumericDocValuesField("sparse_int", i));
+ doc.add(new BinaryDocValuesField("sparse_binary", new BytesRef(Integer.toString(i))));
+ }
+ w.addDocument(doc);
+ }
+ w.commit();
+ w.forceMerge(1);
+ DirectoryReader r = DirectoryReader.open(w);
+ assertEquals(1, r.leaves().size());
+ LeafReader leafReader = r.leaves().get(0).reader();
+ NumericDocValues denseValues = leafReader.getNumericDocValues("dense_int");
+ NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse_int");
+ BinaryDocValues sparseBinaryValues = leafReader.getBinaryDocValues("sparse_binary");
+
+ Bits docsWithField = r.leaves().get(0).reader().getDocsWithField("sparse_int");
+ Bits docsWithBinaryField = r.leaves().get(0).reader().getDocsWithField("sparse_binary");
+ for(int docID = 0; docID < 128; docID++) {
+ assertEquals(127-docID, denseValues.get(docID));
+ if (docID >= 64) {
+ assertTrue(docsWithField.get(docID));
+ assertTrue(docsWithBinaryField.get(docID));
+ assertEquals(127-docID, sparseValues.get(docID));
+ assertEquals(new BytesRef(Integer.toString(127-docID)), sparseBinaryValues.get(docID));
+ } else {
+ assertFalse(docsWithField.get(docID));
+ assertFalse(docsWithBinaryField.get(docID));
+ }
+ }
+ IOUtils.close(r, w, dir);
+ }
+
+ public void testIndexSortOnSparseField() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("sparse", SortField.Type.INT, false);
+ sortField.setMissingValue(Integer.MIN_VALUE);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ for (int i = 0; i < 128; i++) {
+ Document doc = new Document();
+ if (i < 64) {
+ doc.add(new NumericDocValuesField("sparse", i));
+ }
+ w.addDocument(doc);
+ }
+ w.commit();
+ w.forceMerge(1);
+ DirectoryReader r = DirectoryReader.open(w);
+ assertEquals(1, r.leaves().size());
+ LeafReader leafReader = r.leaves().get(0).reader();
+ NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse");
+ Bits docsWithField = r.leaves().get(0).reader().getDocsWithField("sparse");
+ for(int docID = 0; docID < 128; docID++) {
+ if (docID >= 64) {
+ assertTrue(docsWithField.get(docID));
+ assertEquals(docID-64, sparseValues.get(docID));
+ } else {
+ assertFalse(docsWithField.get(docID));
+ }
+ }
+ IOUtils.close(r, w, dir);
+ }
+
}