You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/10/04 15:53:13 UTC
lucene-solr:master: LUCENE-7456: PerFieldPostings/DocValuesFormat was
failing to delegate the merge method
Repository: lucene-solr
Updated Branches:
refs/heads/master 6739e075b -> 796ed508f
LUCENE-7456: PerFieldPostings/DocValuesFormat was failing to delegate the merge method
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/796ed508
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/796ed508
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/796ed508
Branch: refs/heads/master
Commit: 796ed508f39683c626d4870a7ab583a222b2c64c
Parents: 6739e07
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Oct 4 11:31:33 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Oct 4 11:34:54 2016 -0400
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../perfield/PerFieldDocValuesFormat.java | 28 ++
.../codecs/perfield/PerFieldMergeState.java | 274 +++++++++++++++++++
.../codecs/perfield/PerFieldPostingsFormat.java | 85 ++++--
.../perfield/TestPerFieldDocValuesFormat.java | 122 +++++++++
.../perfield/TestPerFieldPostingsFormat2.java | 110 ++++++++
6 files changed, 595 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/796ed508/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f752217..32f6b51 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -49,6 +49,9 @@ Bug Fixes
* LUCENE-7472: MultiFieldQueryParser.getFieldQuery() drops queries that are
neither BooleanQuery nor TermQuery. (Steve Rowe)
+* LUCENE-7456: PerFieldPostings/DocValues was failing to delegate the
+ merge method (Julien MASSENET via Mike McCandless)
+
Improvements
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/796ed508/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
index 6772cad..eef232c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.perfield;
import java.io.Closeable;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.IdentityHashMap;
@@ -32,6 +33,7 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@@ -127,6 +129,32 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
getInstance(field).addSortedSetField(field, valuesProducer);
}
+ @Override
+ public void merge(MergeState mergeState) throws IOException {
+ Map<DocValuesConsumer, Collection<String>> consumersToField = new IdentityHashMap<>();
+
+ // Group each consumer by the fields it handles
+ for (FieldInfo fi : mergeState.mergeFieldInfos) {
+ DocValuesConsumer consumer = getInstance(fi);
+ Collection<String> fieldsForConsumer = consumersToField.get(consumer);
+ if (fieldsForConsumer == null) {
+ fieldsForConsumer = new ArrayList<>();
+ consumersToField.put(consumer, fieldsForConsumer);
+ }
+ fieldsForConsumer.add(fi.name);
+ }
+
+ // Delegate the merge to the appropriate consumer
+ PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
+ try {
+ for (Map.Entry<DocValuesConsumer, Collection<String>> e : consumersToField.entrySet()) {
+ e.getKey().merge(pfMergeState.apply(e.getValue()));
+ }
+ } finally {
+ pfMergeState.reset();
+ }
+ }
+
private DocValuesConsumer getInstance(FieldInfo field) throws IOException {
DocValuesFormat format = null;
if (field.getDocValuesGen() != -1) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/796ed508/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java
new file mode 100644
index 0000000..991eedf
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.perfield;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.Terms;
+
+/**
+ * Utility class to update the {@link MergeState} instance to be restricted to a set of fields.
+ * <p>
+ * Warning: the input {@linkplain MergeState} instance will be updated when calling {@link #apply(Collection)}.
+ * <p>
+ * It should be called within a {@code try {...} finally {...}} block to make sure that the mergeState instance is
+ * restored to its original state:
+ * <pre>
+ * PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
+ * try {
+ * doSomething(pfMergeState.apply(fields));
+ * ...
+ * } finally {
+ * pfMergeState.reset();
+ * }
+ * </pre>
+ */
+final class PerFieldMergeState {
+ private final MergeState in;
+ private final FieldInfos orgMergeFieldInfos;
+ private final FieldInfos[] orgFieldInfos;
+ private final FieldsProducer[] orgFieldsProducers;
+
+ PerFieldMergeState(MergeState in) {
+ this.in = in;
+ this.orgMergeFieldInfos = in.mergeFieldInfos;
+ this.orgFieldInfos = new FieldInfos[in.fieldInfos.length];
+ this.orgFieldsProducers = new FieldsProducer[in.fieldsProducers.length];
+
+ System.arraycopy(in.fieldInfos, 0, this.orgFieldInfos, 0, this.orgFieldInfos.length);
+ System.arraycopy(in.fieldsProducers, 0, this.orgFieldsProducers, 0, this.orgFieldsProducers.length);
+ }
+
+ /**
+ * Update the input {@link MergeState} instance to restrict the fields to the given ones.
+ *
+ * @param fields The fields to keep in the updated instance.
+ * @return The updated instance.
+ */
+ MergeState apply(Collection<String> fields) {
+ in.mergeFieldInfos = new FilterFieldInfos(orgMergeFieldInfos, fields);
+ for (int i = 0; i < orgFieldInfos.length; i++) {
+ in.fieldInfos[i] = new FilterFieldInfos(orgFieldInfos[i], fields);
+ }
+ for (int i = 0; i < orgFieldsProducers.length; i++) {
+ in.fieldsProducers[i] = new FilterFieldsProducer(orgFieldsProducers[i], fields);
+ }
+ return in;
+ }
+
+ /**
+ * Resets the input {@link MergeState} instance to its original state.
+ *
+ * @return The reset instance.
+ */
+ MergeState reset() {
+ in.mergeFieldInfos = orgMergeFieldInfos;
+ System.arraycopy(orgFieldInfos, 0, in.fieldInfos, 0, in.fieldInfos.length);
+ System.arraycopy(orgFieldsProducers, 0, in.fieldsProducers, 0, in.fieldsProducers.length);
+ return in;
+ }
+
+ private static class FilterFieldInfos extends FieldInfos {
+ private final Set<String> filteredNames;
+ private final List<FieldInfo> filtered;
+
+ // Copy of the private fields from FieldInfos
+ // Renamed so as to be less confusing about which fields we're referring to
+ private final boolean filteredHasVectors;
+ private final boolean filteredHasProx;
+ private final boolean filteredHasPayloads;
+ private final boolean filteredHasOffsets;
+ private final boolean filteredHasFreq;
+ private final boolean filteredHasNorms;
+ private final boolean filteredHasDocValues;
+ private final boolean filteredHasPointValues;
+
+ FilterFieldInfos(FieldInfos src, Collection<String> filterFields) {
+ // Copy all the input FieldInfo objects since the field numbering must be kept consistent
+ super(toArray(src));
+
+ boolean hasVectors = false;
+ boolean hasProx = false;
+ boolean hasPayloads = false;
+ boolean hasOffsets = false;
+ boolean hasFreq = false;
+ boolean hasNorms = false;
+ boolean hasDocValues = false;
+ boolean hasPointValues = false;
+
+ this.filteredNames = new HashSet<>(filterFields);
+ this.filtered = new ArrayList<>(filterFields.size());
+ for (FieldInfo fi : src) {
+ if (filterFields.contains(fi.name)) {
+ this.filtered.add(fi);
+ hasVectors |= fi.hasVectors();
+ hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS;
+ hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ hasNorms |= fi.hasNorms();
+ hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE;
+ hasPayloads |= fi.hasPayloads();
+ hasPointValues |= (fi.getPointDimensionCount() != 0);
+ }
+ }
+
+ this.filteredHasVectors = hasVectors;
+ this.filteredHasProx = hasProx;
+ this.filteredHasPayloads = hasPayloads;
+ this.filteredHasOffsets = hasOffsets;
+ this.filteredHasFreq = hasFreq;
+ this.filteredHasNorms = hasNorms;
+ this.filteredHasDocValues = hasDocValues;
+ this.filteredHasPointValues = hasPointValues;
+ }
+
+ private static FieldInfo[] toArray(FieldInfos src) {
+ FieldInfo[] res = new FieldInfo[src.size()];
+ int i = 0;
+ for (FieldInfo fi : src) {
+ res[i++] = fi;
+ }
+ return res;
+ }
+
+ @Override
+ public Iterator<FieldInfo> iterator() {
+ return filtered.iterator();
+ }
+
+ @Override
+ public boolean hasFreq() {
+ return filteredHasFreq;
+ }
+
+ @Override
+ public boolean hasProx() {
+ return filteredHasProx;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return filteredHasPayloads;
+ }
+
+ @Override
+ public boolean hasOffsets() {
+ return filteredHasOffsets;
+ }
+
+ @Override
+ public boolean hasVectors() {
+ return filteredHasVectors;
+ }
+
+ @Override
+ public boolean hasNorms() {
+ return filteredHasNorms;
+ }
+
+ @Override
+ public boolean hasDocValues() {
+ return filteredHasDocValues;
+ }
+
+ @Override
+ public boolean hasPointValues() {
+ return filteredHasPointValues;
+ }
+
+ @Override
+ public int size() {
+ return filtered.size();
+ }
+
+ @Override
+ public FieldInfo fieldInfo(String fieldName) {
+ if (!filteredNames.contains(fieldName)) {
+ // Throw IAE to be consistent with fieldInfo(int) which throws it as well on invalid numbers
+ throw new IllegalArgumentException("The field named '" + fieldName + "' is not accessible in the current " +
+ "merge context, available ones are: " + filteredNames);
+ }
+ return super.fieldInfo(fieldName);
+ }
+
+ @Override
+ public FieldInfo fieldInfo(int fieldNumber) {
+ FieldInfo res = super.fieldInfo(fieldNumber);
+ if (!filteredNames.contains(res.name)) {
+ throw new IllegalArgumentException("The field named '" + res.name + "' numbered '" + fieldNumber + "' is not " +
+ "accessible in the current merge context, available ones are: " + filteredNames);
+ }
+ return res;
+ }
+ }
+
+ private static class FilterFieldsProducer extends FieldsProducer {
+ private final FieldsProducer in;
+ private final List<String> filtered;
+
+ FilterFieldsProducer(FieldsProducer in, Collection<String> filterFields) {
+ this.in = in;
+ this.filtered = new ArrayList<>(filterFields);
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return in.ramBytesUsed();
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return filtered.iterator();
+ }
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ if (!filtered.contains(field)) {
+ throw new IllegalArgumentException("The field named '" + field + "' is not accessible in the current " +
+ "merge context, available ones are: " + filtered);
+ }
+ return in.terms(field);
+ }
+
+ @Override
+ public int size() {
+ return filtered.size();
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ in.checkIntegrity();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/796ed508/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
index e49d2ff..281b08f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
@@ -39,6 +39,8 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader.FilterFields;
import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
@@ -116,7 +118,61 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
@Override
public void write(Fields fields) throws IOException {
+ Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);
+ // Write postings
+ boolean success = false;
+ try {
+ for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
+ PostingsFormat format = ent.getKey();
+ final FieldsGroup group = ent.getValue();
+
+ // Exposes only the fields from this group:
+ Fields maskedFields = new FilterFields(fields) {
+ @Override
+ public Iterator<String> iterator() {
+ return group.fields.iterator();
+ }
+ };
+
+ FieldsConsumer consumer = format.fieldsConsumer(group.state);
+ toClose.add(consumer);
+ consumer.write(maskedFields);
+ }
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(toClose);
+ }
+ }
+ }
+
+ @Override
+ public void merge(MergeState mergeState) throws IOException {
+ Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(new MultiFields(mergeState.fieldsProducers, null));
+
+ // Merge postings
+ PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
+ boolean success = false;
+ try {
+ for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
+ PostingsFormat format = ent.getKey();
+ final FieldsGroup group = ent.getValue();
+
+ FieldsConsumer consumer = format.fieldsConsumer(group.state);
+ toClose.add(consumer);
+ consumer.merge(pfMergeState.apply(group.fields));
+ }
+ success = true;
+ } finally {
+ pfMergeState.reset();
+ if (!success) {
+ IOUtils.closeWhileHandlingException(toClose);
+ }
+ }
+ }
+
+ private Map<PostingsFormat, FieldsGroup> buildFieldsGroupMapping(Fields fields) {
// Maps a PostingsFormat instance to the suffix it
// should use
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>();
@@ -124,7 +180,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
// Holds last suffix of each PostingFormat name
Map<String,Integer> suffixes = new HashMap<>();
- // First pass: assign field -> PostingsFormat
+ // Assign field -> PostingsFormat
for(String field : fields) {
FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
@@ -177,32 +233,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
", field=" + fieldInfo.name + ", old=" + previousValue + ", new=" + group.suffix);
}
}
-
- // Second pass: write postings
- boolean success = false;
- try {
- for(Map.Entry<PostingsFormat,FieldsGroup> ent : formatToGroups.entrySet()) {
- PostingsFormat format = ent.getKey();
- final FieldsGroup group = ent.getValue();
-
- // Exposes only the fields from this group:
- Fields maskedFields = new FilterFields(fields) {
- @Override
- public Iterator<String> iterator() {
- return group.fields.iterator();
- }
- };
-
- FieldsConsumer consumer = format.fieldsConsumer(group.state);
- toClose.add(consumer);
- consumer.write(maskedFields);
- }
- success = true;
- } finally {
- if (success == false) {
- IOUtils.closeWhileHandlingException(toClose);
- }
- }
+ return formatToGroups;
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/796ed508/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
index 5777dad..5b985a4 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
@@ -18,13 +18,19 @@ package org.apache.lucene.codecs.perfield;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
@@ -33,11 +39,15 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.BaseDocValuesFormatTestCase;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomCodec;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -127,4 +137,116 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
ireader.close();
directory.close();
}
+
+ public void testMergeCalledOnTwoFormats() throws IOException {
+ MergeRecordingDocValueFormatWrapper dvf1 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
+ MergeRecordingDocValueFormatWrapper dvf2 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
+
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ iwc.setCodec(new AssertingCodec() {
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ switch (field) {
+ case "dv1":
+ case "dv2":
+ return dvf1;
+
+ case "dv3":
+ return dvf2;
+
+ default:
+ return super.getDocValuesFormatForField(field);
+ }
+ }
+ });
+
+ Directory directory = newDirectory();
+
+ IndexWriter iwriter = new IndexWriter(directory, iwc);
+
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("dv1", 5));
+ doc.add(new NumericDocValuesField("dv2", 42));
+ doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("dv1", 8));
+ doc.add(new NumericDocValuesField("dv2", 45));
+ doc.add(new BinaryDocValuesField("dv3", new BytesRef("goodbye world")));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+
+ iwriter.forceMerge(1, true);
+ iwriter.close();
+
+ assertEquals(1, dvf1.nbMergeCalls);
+ assertEquals(new HashSet<>(Arrays.asList("dv1", "dv2")), new HashSet<>(dvf1.fieldNames));
+ assertEquals(1, dvf2.nbMergeCalls);
+ assertEquals(Collections.singletonList("dv3"), dvf2.fieldNames);
+
+ directory.close();
+ }
+
+ private static final class MergeRecordingDocValueFormatWrapper extends DocValuesFormat {
+ private final DocValuesFormat delegate;
+ final List<String> fieldNames = new ArrayList<>();
+ volatile int nbMergeCalls = 0;
+
+ MergeRecordingDocValueFormatWrapper(DocValuesFormat delegate) {
+ super(delegate.getName());
+ this.delegate = delegate;
+ }
+
+ @Override
+ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ final DocValuesConsumer consumer = delegate.fieldsConsumer(state);
+ return new DocValuesConsumer() {
+ @Override
+ public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+ consumer.addNumericField(field, values);
+ }
+
+ @Override
+ public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+ consumer.addBinaryField(field, values);
+ }
+
+ @Override
+ public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+ consumer.addSortedField(field, values, docToOrd);
+ }
+
+ @Override
+ public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
+ consumer.addSortedNumericField(field, docToValueCount, values);
+ }
+
+ @Override
+ public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
+ consumer.addSortedSetField(field, values, docToOrdCount, ords);
+ }
+
+ @Override
+ public void merge(MergeState mergeState) throws IOException {
+ nbMergeCalls++;
+ for (FieldInfo fi : mergeState.mergeFieldInfos) {
+ fieldNames.add(fi.name);
+ }
+ consumer.merge(mergeState);
+ }
+
+ @Override
+ public void close() throws IOException {
+ consumer.close();
+ }
+ };
+ }
+
+ @Override
+ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
+ return delegate.fieldsProducer(state);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/796ed508/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
index 58c37fc..959a2b7 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
@@ -18,9 +18,16 @@ package org.apache.lucene.codecs.perfield;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
@@ -29,14 +36,21 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
@@ -322,4 +336,100 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
iw.close();
dir.close(); // checkindex
}
+
+ @SuppressWarnings("deprecation")
+ public void testMergeCalledOnTwoFormats() throws IOException {
+ MergeRecordingPostingsFormatWrapper pf1 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat());
+ MergeRecordingPostingsFormatWrapper pf2 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat());
+
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ iwc.setCodec(new AssertingCodec() {
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ switch (field) {
+ case "f1":
+ case "f2":
+ return pf1;
+
+ case "f3":
+ case "f4":
+ return pf2;
+
+ default:
+ return super.getPostingsFormatForField(field);
+ }
+ }
+ });
+
+ Directory directory = newDirectory();
+
+ IndexWriter iwriter = new IndexWriter(directory, iwc);
+
+ Document doc = new Document();
+ doc.add(new StringField("f1", "val1", Field.Store.NO));
+ doc.add(new StringField("f2", "val2", Field.Store.YES));
+ doc.add(new IntPoint("f3", 3)); // Points are not indexed as postings and should not appear in the merge fields
+ doc.add(new StringField("f4", "val4", Field.Store.NO));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+
+ doc = new Document();
+ doc.add(new StringField("f1", "val5", Field.Store.NO));
+ doc.add(new StringField("f2", "val6", Field.Store.YES));
+ doc.add(new IntPoint("f3", 7));
+ doc.add(new StringField("f4", "val8", Field.Store.NO));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+
+ iwriter.forceMerge(1, true);
+ iwriter.close();
+
+ assertEquals(1, pf1.nbMergeCalls);
+ assertEquals(new HashSet<>(Arrays.asList("f1", "f2")), new HashSet<>(pf1.fieldNames));
+ assertEquals(1, pf2.nbMergeCalls);
+ assertEquals(Collections.singletonList("f4"), pf2.fieldNames);
+
+ directory.close();
+ }
+
+ private static final class MergeRecordingPostingsFormatWrapper extends PostingsFormat {
+ private final PostingsFormat delegate;
+ final List<String> fieldNames = new ArrayList<>();
+ int nbMergeCalls = 0;
+
+ MergeRecordingPostingsFormatWrapper(PostingsFormat delegate) {
+ super(delegate.getName());
+ this.delegate = delegate;
+ }
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ final FieldsConsumer consumer = delegate.fieldsConsumer(state);
+ return new FieldsConsumer() {
+ @Override
+ public void write(Fields fields) throws IOException {
+ consumer.write(fields);
+ }
+
+ @Override
+ public void merge(MergeState mergeState) throws IOException {
+ nbMergeCalls++;
+ for (FieldInfo fi : mergeState.mergeFieldInfos) {
+ fieldNames.add(fi.name);
+ }
+ consumer.merge(mergeState);
+ }
+
+ @Override
+ public void close() throws IOException {
+ consumer.close();
+ }
+ };
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ return delegate.fieldsProducer(state);
+ }
+ }
}