You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/03/23 11:49:51 UTC
[04/46] lucene-solr:jira/solr-9959: SOLR-10046: Add
UninvertDocValuesMergePolicyFactory class. (Keith Laban, Christine Poerschke)
SOLR-10046: Add UninvertDocValuesMergePolicyFactory class. (Keith Laban, Christine Poerschke)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9d56f136
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9d56f136
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9d56f136
Branch: refs/heads/jira/solr-9959
Commit: 9d56f136505098ea5538c5d6eaaf60536848feb9
Parents: 65c695b
Author: Christine Poerschke <cp...@apache.org>
Authored: Wed Mar 15 10:31:10 2017 +0000
Committer: Christine Poerschke <cp...@apache.org>
Committed: Wed Mar 15 12:15:17 2017 +0000
----------------------------------------------------------------------
solr/CHANGES.txt | 2 +
.../UninvertDocValuesMergePolicyFactory.java | 218 +++++++++++++++++
.../solr/collection1/conf/schema-docValues.xml | 1 +
...nfig-uninvertdocvaluesmergepolicyfactory.xml | 38 +++
.../index/UninvertDocValuesMergePolicyTest.java | 243 +++++++++++++++++++
5 files changed, 502 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d56f136/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 0f1f488..6829cd1 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -185,6 +185,8 @@ New Features
* SOLR-10224: Add disk total and disk free metrics. (ab)
+* SOLR-10046: Add UninvertDocValuesMergePolicyFactory class. (Keith Laban, Christine Poerschke)
+
Bug Fixes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d56f136/solr/core/src/java/org/apache/solr/index/UninvertDocValuesMergePolicyFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/index/UninvertDocValuesMergePolicyFactory.java b/solr/core/src/java/org/apache/solr/index/UninvertDocValuesMergePolicyFactory.java
new file mode 100644
index 0000000..b6bfbed
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/index/UninvertDocValuesMergePolicyFactory.java
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.index;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CodecReader;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FilterCodecReader;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.OneMergeWrappingMergePolicy;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.uninverting.UninvertingReader;
+
+/**
+ * A merge policy that can detect schema changes and write docvalues into merging segments when a field has docvalues enabled
+ * Using UninvertingReader.
+ *
+ * This merge policy will delegate to the wrapped merge policy for selecting merge segments
+ *
+ */
+public class UninvertDocValuesMergePolicyFactory extends WrapperMergePolicyFactory {
+
+ final private boolean skipIntegrityCheck;
+
+ /**
+ * Whether or not the wrapped docValues producer should check consistency
+ */
+ public boolean getSkipIntegrityCheck() {
+ return skipIntegrityCheck;
+ }
+
+ public UninvertDocValuesMergePolicyFactory(SolrResourceLoader resourceLoader, MergePolicyFactoryArgs args, IndexSchema schema) {
+ super(resourceLoader, args, schema);
+ final Boolean sic = (Boolean)args.remove("skipIntegrityCheck");
+ if (sic != null) {
+ this.skipIntegrityCheck = sic.booleanValue();
+ } else {
+ this.skipIntegrityCheck = false;
+ }
+ if (!args.keys().isEmpty()) {
+ throw new IllegalArgumentException("Arguments were "+args+" but "+getClass().getSimpleName()+" takes no arguments.");
+ }
+ }
+
+ @Override
+ protected MergePolicy getMergePolicyInstance(MergePolicy wrappedMP) {
+ return new OneMergeWrappingMergePolicy(wrappedMP, (merge) -> new UninvertDocValuesOneMerge(merge.segments));
+ }
+
+ private UninvertingReader.Type getUninversionType(FieldInfo fi) {
+ SchemaField sf = schema.getFieldOrNull(fi.name);
+
+ if (null != sf &&
+ sf.hasDocValues() &&
+ fi.getDocValuesType() == DocValuesType.NONE &&
+ fi.getIndexOptions() != IndexOptions.NONE) {
+ return sf.getType().getUninversionType(sf);
+ } else {
+ return null;
+ }
+ }
+
+ private class UninvertDocValuesOneMerge extends MergePolicy.OneMerge {
+
+ public UninvertDocValuesOneMerge(List<SegmentCommitInfo> segments) {
+ super(segments);
+ }
+
+ @Override
+ public CodecReader wrapForMerge(CodecReader reader) throws IOException {
+ // Wrap the reader with an uninverting reader if any of the fields have no docvalues but the
+ // Schema says there should be
+
+
+ Map<String,UninvertingReader.Type> uninversionMap = null;
+
+ for(FieldInfo fi: reader.getFieldInfos()) {
+ final UninvertingReader.Type type = getUninversionType(fi);
+ if (type != null) {
+ if (uninversionMap == null) {
+ uninversionMap = new HashMap<>();
+ }
+ uninversionMap.put(fi.name, type);
+ }
+
+ }
+
+ if(uninversionMap == null) {
+ return reader; // Default to normal reader if nothing to uninvert
+ } else {
+ return new UninvertingFilterCodecReader(reader, uninversionMap);
+ }
+
+ }
+
+ }
+
+
+ /**
+ * Delegates to an Uninverting for fields with docvalues
+ *
+ * This is going to blow up FieldCache, look into an alternative implementation that uninverts without
+ * fieldcache
+ */
+ private class UninvertingFilterCodecReader extends FilterCodecReader {
+
+ private final UninvertingReader uninvertingReader;
+ private final DocValuesProducer docValuesProducer;
+
+ public UninvertingFilterCodecReader(CodecReader in, Map<String,UninvertingReader.Type> uninversionMap) {
+ super(in);
+
+ this.uninvertingReader = new UninvertingReader(in, uninversionMap);
+ this.docValuesProducer = new DocValuesProducer() {
+
+ @Override
+ public NumericDocValues getNumeric(FieldInfo field) throws IOException {
+ return uninvertingReader.getNumericDocValues(field.name);
+ }
+
+ @Override
+ public BinaryDocValues getBinary(FieldInfo field) throws IOException {
+ return uninvertingReader.getBinaryDocValues(field.name);
+ }
+
+ @Override
+ public SortedDocValues getSorted(FieldInfo field) throws IOException {
+ return uninvertingReader.getSortedDocValues(field.name);
+ }
+
+ @Override
+ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
+ return uninvertingReader.getSortedNumericDocValues(field.name);
+ }
+
+ @Override
+ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+ return uninvertingReader.getSortedSetDocValues(field.name);
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ if (!skipIntegrityCheck) {
+ uninvertingReader.checkIntegrity();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
+ };
+ }
+
+ @Override
+ protected void doClose() throws IOException {
+ docValuesProducer.close();
+ uninvertingReader.close();
+ super.doClose();
+ }
+
+ @Override
+ public DocValuesProducer getDocValuesReader() {
+ return docValuesProducer;
+ }
+
+ @Override
+ public FieldInfos getFieldInfos() {
+ return uninvertingReader.getFieldInfos();
+ }
+
+ @Override
+ public CacheHelper getCoreCacheHelper() {
+ return in.getCoreCacheHelper();
+ }
+
+ @Override
+ public CacheHelper getReaderCacheHelper() {
+ return in.getReaderCacheHelper();
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d56f136/solr/core/src/test-files/solr/collection1/conf/schema-docValues.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-docValues.xml b/solr/core/src/test-files/solr/collection1/conf/schema-docValues.xml
index c7b7de8..9e4286d 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-docValues.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-docValues.xml
@@ -62,6 +62,7 @@
<field name="datedv" type="date" indexed="false" stored="false" docValues="true" default="1995-12-31T23:59:59.999Z"/>
<field name="stringdv" type="string" indexed="false" stored="false" docValues="true" default="solr" />
+ <field name="string_add_dv_later" type="string" indexed="true" stored="true" docValues="false"/>
<field name="booldv" type="boolean" indexed="false" stored="false" docValues="true" default="true" />
<field name="floatdvs" type="float" indexed="false" stored="false" docValues="true" default="1"/>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d56f136/solr/core/src/test-files/solr/collection1/conf/solrconfig-uninvertdocvaluesmergepolicyfactory.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-uninvertdocvaluesmergepolicyfactory.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-uninvertdocvaluesmergepolicyfactory.xml
new file mode 100644
index 0000000..613357b
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-uninvertdocvaluesmergepolicyfactory.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+ <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+ <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+ <indexConfig>
+ <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+ <mergePolicyFactory class="org.apache.solr.index.UninvertDocValuesMergePolicyFactory">
+ <str name="wrapped.prefix">inner</str>
+ <str name="inner.class">org.apache.solr.index.DefaultMergePolicyFactory</str>
+ <bool name="skipIntegrityCheck">${solr.tests.skipIntegrityCheck:false}</bool>
+ </mergePolicyFactory>
+
+ <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
+ </indexConfig>
+
+ <requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
+
+</config>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d56f136/solr/core/src/test/org/apache/solr/index/UninvertDocValuesMergePolicyTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/index/UninvertDocValuesMergePolicyTest.java b/solr/core/src/test/org/apache/solr/index/UninvertDocValuesMergePolicyTest.java
new file mode 100644
index 0000000..17e6b3e
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/index/UninvertDocValuesMergePolicyTest.java
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.index;
+
+import java.util.Random;
+import java.util.function.IntUnaryOperator;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.TestHarness;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+
+public class UninvertDocValuesMergePolicyTest extends SolrTestCaseJ4 {
+
+ private static String SOLR_TESTS_SKIP_INTEGRITY_CHECK = "solr.tests.skipIntegrityCheck";
+ private static String ID_FIELD = "id";
+ private static String TEST_FIELD = "string_add_dv_later";
+
+ @BeforeClass
+ public static void beforeTests() throws Exception {
+ System.setProperty(SOLR_TESTS_SKIP_INTEGRITY_CHECK, (random().nextBoolean() ? "true" : "false"));
+ }
+
+ @AfterClass
+ public static void afterTests() {
+ System.clearProperty(SOLR_TESTS_SKIP_INTEGRITY_CHECK);
+ }
+
+ @After
+ public void after() throws Exception {
+ deleteCore();
+ }
+
+ @Before
+ public void before() throws Exception {
+ initCore("solrconfig-uninvertdocvaluesmergepolicyfactory.xml", "schema-docValues.xml");
+ }
+
+ public void testIndexAndAddDocValues() throws Exception {
+ Random rand = random();
+
+ for(int i=0; i < 100; i++) {
+ assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
+
+ if(rand.nextBoolean()) {
+ assertU(commit());
+ }
+ }
+
+ assertU(commit());
+
+ // Assert everything has been indexed and there are no docvalues
+ withNewRawReader(h, topReader -> {
+ assertEquals(100, topReader.numDocs());
+
+ final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
+
+ // The global field type should not have docValues yet
+ assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
+ });
+
+
+ addDocValuesTo(h, TEST_FIELD);
+
+
+ // Add some more documents with doc values turned on including updating some
+ for(int i=90; i < 110; i++) {
+ assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
+
+ if(rand.nextBoolean()) {
+ assertU(commit());
+ }
+ }
+
+ assertU(commit());
+
+ withNewRawReader(h, topReader -> {
+ assertEquals(110, topReader.numDocs());
+
+ final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
+ // The global field type should have docValues because a document with dvs was added
+ assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
+ });
+
+ int optimizeSegments = 1;
+ assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));
+
+
+ // Assert all docs have the right docvalues
+ withNewRawReader(h, topReader -> {
+ // Assert merged into one segment
+ assertEquals(110, topReader.numDocs());
+ assertEquals(optimizeSegments, topReader.leaves().size());
+
+
+ final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
+ // The global field type should have docValues because a document with dvs was added
+ assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
+
+
+ // Check that all segments have the right docvalues type with the correct value
+ // Also check that other fields (e.g. the id field) didn't mistakenly get docvalues added
+ for (LeafReaderContext ctx : topReader.leaves()) {
+ LeafReader r = ctx.reader();
+ SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
+ for(int i = 0; i < r.numDocs(); ++i) {
+ Document doc = r.document(i);
+ String v = doc.getField(TEST_FIELD).stringValue();
+ String id = doc.getField(ID_FIELD).stringValue();
+ assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
+ assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
+ assertEquals(v, id);
+
+ docvalues.nextDoc();
+ assertEquals(v, docvalues.binaryValue().utf8ToString());
+ }
+ }
+ });
+ }
+
+
+ // When an non-indexed field gets merged, it exhibit the old behavior
+ // The field will be merged, docvalues headers updated, but no docvalues for this field
+ public void testNonIndexedFieldDoesNonFail() throws Exception {
+ // Remove Indexed from fieldType
+ removeIndexFrom(h, TEST_FIELD);
+
+ assertU(adoc(ID_FIELD, String.valueOf(1), TEST_FIELD, String.valueOf(1)));
+ assertU(commit());
+
+ addDocValuesTo(h, TEST_FIELD);
+
+ assertU(adoc(ID_FIELD, String.valueOf(2), TEST_FIELD, String.valueOf(2)));
+ assertU(commit());
+
+ assertU(optimize("maxSegments", "1"));
+
+ withNewRawReader(h, topReader -> {
+ // Assert merged into one segment
+ assertEquals(2, topReader.numDocs());
+ assertEquals(1, topReader.leaves().size());
+
+
+ final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
+ // The global field type should have docValues because a document with dvs was added
+ assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
+
+ for (LeafReaderContext ctx : topReader.leaves()) {
+ LeafReader r = ctx.reader();
+ SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
+ for(int i = 0; i < r.numDocs(); ++i) {
+ Document doc = r.document(i);
+ String v = doc.getField(TEST_FIELD).stringValue();
+ String id = doc.getField(ID_FIELD).stringValue();
+ assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
+ assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
+
+
+ if(id.equals("2")) {
+ assertTrue(docvalues.advanceExact(i));
+ assertEquals(v, docvalues.binaryValue().utf8ToString());
+ } else {
+ assertFalse(docvalues.advanceExact(i));
+ }
+
+ }
+ }
+ });
+ }
+
+
+ private static void addDocValuesTo(TestHarness h, String fieldName) {
+ implUpdateSchemaField(h, fieldName, (p) -> (p | 0x00008000)); // FieldProperties.DOC_VALUES
+ }
+
+ private static void removeIndexFrom(TestHarness h, String fieldName) {
+ implUpdateSchemaField(h, fieldName, (p) -> (p ^ 0x00000001)); // FieldProperties.INDEXED
+ }
+
+ private static void implUpdateSchemaField(TestHarness h, String fieldName, IntUnaryOperator propertiesModifier) {
+ try (SolrCore core = h.getCoreInc()) {
+
+ // Add docvalues to the field type
+ IndexSchema schema = core.getLatestSchema();
+ SchemaField oldSchemaField = schema.getField(fieldName);
+ SchemaField newSchemaField = new SchemaField(
+ fieldName,
+ oldSchemaField.getType(),
+ propertiesModifier.applyAsInt(oldSchemaField.getProperties()),
+ oldSchemaField.getDefaultValue());
+ schema.getFields().put(fieldName, newSchemaField);
+ }
+ }
+
+ private interface DirectoryReaderConsumer {
+ public void accept(DirectoryReader consumer) throws Exception;
+ }
+
+ private static void withNewRawReader(TestHarness h, DirectoryReaderConsumer consumer) {
+ try (SolrCore core = h.getCoreInc()) {
+ final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
+ final SolrIndexSearcher searcher = searcherRef.get();
+ try {
+ try {
+ consumer.accept(searcher.getRawReader());
+ } catch (Exception e) {
+ fail(e.toString());
+ }
+ } finally {
+ searcherRef.decref();
+ }
+ }
+ }
+}