You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2017/04/17 08:29:44 UTC

lucene-solr:master: SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment to avoid such exceptions

Repository: lucene-solr
Updated Branches:
  refs/heads/master 4df4c52c0 -> 4da901a07


SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment to avoid such exceptions

Squashed commit of the following:

commit c38f4cabc2828ee83b53b931dd829e29a3e1701c
Author: Keith Laban <ke...@gmail.com>
Date:   Tue Apr 11 17:17:05 2017 -0400

    reverted tests to using old wrap interface

commit 806f33e092491cc6a2ee292d2934c76171e40dc7
Author: Keith Laban <ke...@gmail.com>
Date:   Tue Apr 11 17:13:34 2017 -0400

    updated UninvertingReader.wrap / tests

commit b10bcab338b362b909491fea1cf13de66f5f17c0
Author: Keith Laban <kl...@bloomberg.net>
Date:   Wed Apr 5 14:57:28 2017 -0400

    SOLR-10047 - Updated javadoc/renamed class/added getReaderCacheHelper

commit 90ecf5a4ae4feaf3efc42a1ed8643ad21e1c73ce
Author: Keith Laban <kl...@bloomberg.net>
Date:   Wed Jan 18 16:39:51 2017 -0500

    SOLR-10047 - SolrIndexSearcher, UninvertingReader, uninvert docvalues per segment


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4da901a0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4da901a0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4da901a0

Branch: refs/heads/master
Commit: 4da901a0728239ac4e87b662533f966158991948
Parents: 4df4c52
Author: Shalin Shekhar Mangar <sh...@apache.org>
Authored: Mon Apr 17 13:59:26 2017 +0530
Committer: Shalin Shekhar Mangar <sh...@apache.org>
Committed: Mon Apr 17 13:59:26 2017 +0530

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 +
 .../apache/solr/search/SolrIndexSearcher.java   | 17 ++++-
 .../solr/uninverting/UninvertingReader.java     | 24 +++++--
 .../org/apache/solr/schema/DocValuesTest.java   | 72 ++++++++++++++++++++
 4 files changed, 108 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4da901a0/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7d0933d..3e4a922 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -202,6 +202,9 @@ Bug Fixes
 * SOLR-10473: Correct LBHttpSolrClient's confusing SolrServerException message when timeAllowed is exceeded.
   (Christine Poerschke)
 
+* SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment
+  to avoid such exceptions. (Keith Laban via shalin)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4da901a0/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index f1ad36a..9d63aad 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -155,11 +155,26 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
   private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
     assert reader != null;
     return ExitableDirectoryReader.wrap(
-        UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMap(reader)),
+        wrapUninvertingReaderPerSegment(core, reader),
         SolrQueryTimeoutImpl.getInstance());
   }
 
   /**
+   * If docvalues are enabled or disabled after data has already been indexed for a field, such that
+   * only some segments have docvalues, uninverting on the top level reader will cause 
+   * IllegalStateException to be thrown when trying to use a field with such mixed data. This is because
+   * the {@link IndexSchema#getUninversionMap(IndexReader)} method decides to put a field 
+   * into the uninverteding map only if *NO* segment in the index contains docvalues for that field.
+   * 
+   * Therefore, this class provides a uninverting map per segment such that for any field, 
+   * DocValues are used from segments if they exist and uninversion of the field is performed on the rest
+   * of the segments.
+   */
+   private static DirectoryReader wrapUninvertingReaderPerSegment(SolrCore core, DirectoryReader reader) throws IOException {
+     return UninvertingReader.wrap(reader, r -> core.getLatestSchema().getUninversionMap(r));
+   }
+
+  /**
    * Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
    * into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers
    * for dealing with options specified in the QueryCommand.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4da901a0/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
index 0ba0b81..695ad0e 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@@ -19,6 +19,7 @@ package org.apache.solr.uninverting;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Map;
+import java.util.function.Function;
 
 import org.apache.lucene.document.BinaryDocValuesField; // javadocs
 import org.apache.lucene.document.NumericDocValuesField; // javadocs
@@ -202,30 +203,39 @@ public class UninvertingReader extends FilterLeafReader {
   }
   
   /**
+   * 
    * Wraps a provided DirectoryReader. Note that for convenience, the returned reader
    * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
    * and so on. 
+   * 
+   * @param in input directory reader
+   * @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type
+   * @return a wrapped directory reader
    */
+  public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException {
+    return new UninvertingDirectoryReader(in, perSegmentMapper);
+  }
+  
   public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
-    return new UninvertingDirectoryReader(in, mapping);
+    return UninvertingReader.wrap(in, (r) -> mapping);
   }
   
   static class UninvertingDirectoryReader extends FilterDirectoryReader {
-    final Map<String,Type> mapping;
+    final Function<LeafReader, Map<String,Type>> mapper;
     
-    public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
+    public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException {
       super(in, new FilterDirectoryReader.SubReaderWrapper() {
         @Override
         public LeafReader wrap(LeafReader reader) {
-          return new UninvertingReader(reader, mapping);
+          return new UninvertingReader(reader, mapper.apply(reader));
         }
       });
-      this.mapping = mapping;
+      this.mapper = mapper;
     }
 
     @Override
     protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
-      return new UninvertingDirectoryReader(in, mapping);
+      return new UninvertingDirectoryReader(in, mapper);
     }
 
     // NOTE: delegating the cache helpers is wrong since this wrapper alters the
@@ -244,7 +254,7 @@ public class UninvertingReader extends FilterLeafReader {
   /** 
    * Create a new UninvertingReader with the specified mapping 
    * <p>
-   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
+   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)}
    * instead.
    *  
    * @lucene.internal

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4da901a0/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java b/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
index cf43a68..2d8afee 100644
--- a/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
@@ -25,10 +25,14 @@ import java.util.List;
 import java.util.function.Function;
 import java.util.function.Supplier;
 
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.queries.function.FunctionValues;
@@ -151,6 +155,74 @@ public class DocValuesTest extends SolrTestCaseJ4 {
     }
   }
 
+
+  public void testHalfAndHalfDocValues() throws Exception {
+    // Insert two docs without docvalues
+    String fieldname = "string_add_dv_later";
+    assertU(adoc("id", "3", fieldname, "c"));
+    assertU(commit());
+    assertU(adoc("id", "1", fieldname, "a"));
+    assertU(commit());
+    
+   
+    try (SolrCore core = h.getCoreInc()) {
+        assertFalse(core.getLatestSchema().getField(fieldname).hasDocValues());
+      // Add docvalues to the field type
+      IndexSchema schema = core.getLatestSchema();
+      SchemaField oldField = schema.getField(fieldname);
+      int newProperties = oldField.getProperties() | SchemaField.DOC_VALUES;
+      
+      SchemaField sf = new SchemaField( fieldname, oldField.getType(), newProperties, null);
+      schema.getFields().put( fieldname, sf );
+      
+      // Insert a new doc with docvalues
+      assertU(adoc("id", "2", fieldname, "b"));
+      assertU(commit());
+    
+    
+      // Check there are a mix of segments with and without docvalues
+      final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
+      final SolrIndexSearcher searcher = searcherRef.get();
+      try {
+        final DirectoryReader topReader = searcher.getRawReader();
+
+        //Assert no merges
+        
+        assertEquals(3, topReader.numDocs());
+        assertEquals(3, topReader.leaves().size());
+        
+        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
+        //The global field type should have docValues because a document with dvs was added
+        assertEquals(DocValuesType.SORTED, infos.fieldInfo(fieldname).getDocValuesType());
+        
+        for(LeafReaderContext ctx: topReader.leaves()) {
+          LeafReader r = ctx.reader();
+          //Make sure there were no merges
+          assertEquals(1, r.numDocs());
+          Document doc = r.document(0);
+          String id = doc.getField("id").stringValue();
+          
+          if(id.equals("1") || id.equals("3")) {
+            assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
+          } else {
+            assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
+          }
+          
+        }
+      } finally {
+        searcherRef.decref();
+      }
+    }
+    
+    // Assert sort order is correct
+    assertQ(req("q", "string_add_dv_later:*", "sort", "string_add_dv_later asc"),
+        "//*[@numFound='3']",
+        "//result/doc[1]/int[@name='id'][.=1]",
+        "//result/doc[2]/int[@name='id'][.=2]",
+        "//result/doc[3]/int[@name='id'][.=3]"
+    );
+  }
+
   private void tstToObj(SchemaField sf, Object o) {
     List<IndexableField> fields = sf.createFields(o);
     for (IndexableField field : fields) {