You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2017/04/17 08:29:44 UTC
lucene-solr:master: SOLR-10047: Mismatched Docvalues segments cause
exception in Sorting/Faceting. Solr now uninverts per segment to avoid such
exceptions
Repository: lucene-solr
Updated Branches:
refs/heads/master 4df4c52c0 -> 4da901a07
SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment to avoid such exceptions
Squashed commit of the following:
commit c38f4cabc2828ee83b53b931dd829e29a3e1701c
Author: Keith Laban <ke...@gmail.com>
Date: Tue Apr 11 17:17:05 2017 -0400
reverted tests to using old wrap interface
commit 806f33e092491cc6a2ee292d2934c76171e40dc7
Author: Keith Laban <ke...@gmail.com>
Date: Tue Apr 11 17:13:34 2017 -0400
updated UninvertingReader.wrap / tests
commit b10bcab338b362b909491fea1cf13de66f5f17c0
Author: Keith Laban <kl...@bloomberg.net>
Date: Wed Apr 5 14:57:28 2017 -0400
SOLR-10047 - Updated javadoc/renamed class/added getReaderCacheHelper
commit 90ecf5a4ae4feaf3efc42a1ed8643ad21e1c73ce
Author: Keith Laban <kl...@bloomberg.net>
Date: Wed Jan 18 16:39:51 2017 -0500
SOLR-10047 - SolrIndexSearcher, UninvertingReader, uninvert docvalues per segment
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4da901a0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4da901a0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4da901a0
Branch: refs/heads/master
Commit: 4da901a0728239ac4e87b662533f966158991948
Parents: 4df4c52
Author: Shalin Shekhar Mangar <sh...@apache.org>
Authored: Mon Apr 17 13:59:26 2017 +0530
Committer: Shalin Shekhar Mangar <sh...@apache.org>
Committed: Mon Apr 17 13:59:26 2017 +0530
----------------------------------------------------------------------
solr/CHANGES.txt | 3 +
.../apache/solr/search/SolrIndexSearcher.java | 17 ++++-
.../solr/uninverting/UninvertingReader.java | 24 +++++--
.../org/apache/solr/schema/DocValuesTest.java | 72 ++++++++++++++++++++
4 files changed, 108 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4da901a0/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7d0933d..3e4a922 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -202,6 +202,9 @@ Bug Fixes
* SOLR-10473: Correct LBHttpSolrClient's confusing SolrServerException message when timeAllowed is exceeded.
(Christine Poerschke)
+* SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment
+ to avoid such exceptions. (Keith Laban via shalin)
+
Other Changes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4da901a0/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index f1ad36a..9d63aad 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -155,11 +155,26 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
assert reader != null;
return ExitableDirectoryReader.wrap(
- UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMap(reader)),
+ wrapUninvertingReaderPerSegment(core, reader),
SolrQueryTimeoutImpl.getInstance());
}
/**
+ * If docvalues are enabled or disabled after data has already been indexed for a field, such that
+ * only some segments have docvalues, uninverting on the top level reader will cause
+ * IllegalStateException to be thrown when trying to use a field with such mixed data. This is because
+ * the {@link IndexSchema#getUninversionMap(IndexReader)} method decides to put a field
+ * into the uninverteding map only if *NO* segment in the index contains docvalues for that field.
+ *
+ * Therefore, this class provides a uninverting map per segment such that for any field,
+ * DocValues are used from segments if they exist and uninversion of the field is performed on the rest
+ * of the segments.
+ */
+ private static DirectoryReader wrapUninvertingReaderPerSegment(SolrCore core, DirectoryReader reader) throws IOException {
+ return UninvertingReader.wrap(reader, r -> core.getLatestSchema().getUninversionMap(r));
+ }
+
+ /**
* Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
* into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers
* for dealing with options specified in the QueryCommand.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4da901a0/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
index 0ba0b81..695ad0e 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@@ -19,6 +19,7 @@ package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
+import java.util.function.Function;
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
import org.apache.lucene.document.NumericDocValuesField; // javadocs
@@ -202,30 +203,39 @@ public class UninvertingReader extends FilterLeafReader {
}
/**
+ *
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
* and so on.
+ *
+ * @param in input directory reader
+ * @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type
+ * @return a wrapped directory reader
*/
+ public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException {
+ return new UninvertingDirectoryReader(in, perSegmentMapper);
+ }
+
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
- return new UninvertingDirectoryReader(in, mapping);
+ return UninvertingReader.wrap(in, (r) -> mapping);
}
static class UninvertingDirectoryReader extends FilterDirectoryReader {
- final Map<String,Type> mapping;
+ final Function<LeafReader, Map<String,Type>> mapper;
- public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
+ public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException {
super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
- return new UninvertingReader(reader, mapping);
+ return new UninvertingReader(reader, mapper.apply(reader));
}
});
- this.mapping = mapping;
+ this.mapper = mapper;
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
- return new UninvertingDirectoryReader(in, mapping);
+ return new UninvertingDirectoryReader(in, mapper);
}
// NOTE: delegating the cache helpers is wrong since this wrapper alters the
@@ -244,7 +254,7 @@ public class UninvertingReader extends FilterLeafReader {
/**
* Create a new UninvertingReader with the specified mapping
* <p>
- * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
+ * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)}
* instead.
*
* @lucene.internal
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4da901a0/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java b/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
index cf43a68..2d8afee 100644
--- a/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
@@ -25,10 +25,14 @@ import java.util.List;
import java.util.function.Function;
import java.util.function.Supplier;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues;
@@ -151,6 +155,74 @@ public class DocValuesTest extends SolrTestCaseJ4 {
}
}
+
+ public void testHalfAndHalfDocValues() throws Exception {
+ // Insert two docs without docvalues
+ String fieldname = "string_add_dv_later";
+ assertU(adoc("id", "3", fieldname, "c"));
+ assertU(commit());
+ assertU(adoc("id", "1", fieldname, "a"));
+ assertU(commit());
+
+
+ try (SolrCore core = h.getCoreInc()) {
+ assertFalse(core.getLatestSchema().getField(fieldname).hasDocValues());
+ // Add docvalues to the field type
+ IndexSchema schema = core.getLatestSchema();
+ SchemaField oldField = schema.getField(fieldname);
+ int newProperties = oldField.getProperties() | SchemaField.DOC_VALUES;
+
+ SchemaField sf = new SchemaField( fieldname, oldField.getType(), newProperties, null);
+ schema.getFields().put( fieldname, sf );
+
+ // Insert a new doc with docvalues
+ assertU(adoc("id", "2", fieldname, "b"));
+ assertU(commit());
+
+
+ // Check there are a mix of segments with and without docvalues
+ final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
+ final SolrIndexSearcher searcher = searcherRef.get();
+ try {
+ final DirectoryReader topReader = searcher.getRawReader();
+
+ //Assert no merges
+
+ assertEquals(3, topReader.numDocs());
+ assertEquals(3, topReader.leaves().size());
+
+ final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
+ //The global field type should have docValues because a document with dvs was added
+ assertEquals(DocValuesType.SORTED, infos.fieldInfo(fieldname).getDocValuesType());
+
+ for(LeafReaderContext ctx: topReader.leaves()) {
+ LeafReader r = ctx.reader();
+ //Make sure there were no merges
+ assertEquals(1, r.numDocs());
+ Document doc = r.document(0);
+ String id = doc.getField("id").stringValue();
+
+ if(id.equals("1") || id.equals("3")) {
+ assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
+ } else {
+ assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
+ }
+
+ }
+ } finally {
+ searcherRef.decref();
+ }
+ }
+
+ // Assert sort order is correct
+ assertQ(req("q", "string_add_dv_later:*", "sort", "string_add_dv_later asc"),
+ "//*[@numFound='3']",
+ "//result/doc[1]/int[@name='id'][.=1]",
+ "//result/doc[2]/int[@name='id'][.=2]",
+ "//result/doc[3]/int[@name='id'][.=3]"
+ );
+ }
+
private void tstToObj(SchemaField sf, Object o) {
List<IndexableField> fields = sf.createFields(o);
for (IndexableField field : fields) {