You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2021/11/19 21:14:30 UTC
[lucene] branch branch_9x updated: LUCENE-10062: Switch to numeric doc values for encoding taxonomy ordinals
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 0ba3107 LUCENE-10062: Switch to numeric doc values for encoding taxonomy ordinals
0ba3107 is described below
commit 0ba310782fb5fdc5900417dedbffed5c6c45e59b
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Fri Nov 19 13:11:42 2021 -0800
LUCENE-10062: Switch to numeric doc values for encoding taxonomy ordinals
---
lucene/CHANGES.txt | 3 +
lucene/MIGRATE.md | 14 ++
.../java/org/apache/lucene/facet/FacetUtils.java | 50 ++++
.../java/org/apache/lucene/facet/FacetsConfig.java | 32 ++-
.../taxonomy/BackCompatSortedNumericDocValues.java | 148 ++++++++++++
.../facet/taxonomy/CachedOrdinalsReader.java | 4 +
.../facet/taxonomy/DocValuesOrdinalsReader.java | 73 +++---
.../facet/taxonomy/FastTaxonomyFacetCounts.java | 48 +---
.../facet/taxonomy/OrdinalMappingLeafReader.java | 105 +++++++-
.../lucene/facet/taxonomy/OrdinalsReader.java | 8 +-
.../lucene/facet/taxonomy/TaxonomyFacetCounts.java | 5 +-
.../lucene/facet/taxonomy/TaxonomyFacetLabels.java | 160 ++++++++++---
.../taxonomy/TaxonomyFacetSumValueSource.java | 86 +++++--
.../lucene/facet/taxonomy/TaxonomyWriter.java | 14 ++
.../directory/DirectoryTaxonomyWriter.java | 7 +-
.../lucene/facet/TestMultipleIndexFields.java | 2 +-
.../TestBackCompatSortedNumericDocValues.java | 140 +++++++++++
.../taxonomy/TestTaxonomyFacetSumValueSource.java | 12 +-
.../directory/TestBackwardsCompatibility.java | 264 ++++++++++++++++++---
.../facet/taxonomy/directory/index.8.11.0-cfs.zip | Bin 0 -> 3351 bytes
.../taxonomy/directory/taxonomy.8.10.0-cfs.zip | Bin 3092 -> 0 bytes
.../taxonomy/directory/taxonomy.8.11.0-cfs.zip | Bin 0 -> 3197 bytes
22 files changed, 989 insertions(+), 186 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1e9b1fb..c34794b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -367,6 +367,9 @@ Improvements
See release notes. https://github.com/locationtech/spatial4j/releases/tag/spatial4j-0.8
(David Smiley)
+* LUCENE-10062: Switch taxonomy faceting to use numeric doc values for storing ordinals instead of binary doc values
+ with its own custom encoding. (Greg Miller)
+
Bug fixes
---------------------
diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md
index eb5436d..b309d8a 100644
--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@@ -450,3 +450,17 @@ structure. Use a standard BoostQuery here instead.
Rather than using `setSort()` to change sort values, you should instead create
a new Sort instance with the new values.
+
+## Taxonomy-based faceting uses more modern encodings (LUCENE-9450, LUCENE-10062, LUCENE-10122)
+
+The side-car taxonomy index now uses doc values for ord-to-path lookup (LUCENE-9450) and parent
+lookup (LUCENE-10122) instead of stored fields and positions (respectively). Document ordinals
+are now encoded with `SortedNumericDocValues` instead of using a custom (v-int) binary format.
+Performance gains have been observed with these encoding changes, but to benefit from them, users
+must create a new index using 9.x (it is not sufficient to reindex documents against an existing
+8.x index). In order to remain backwards-compatible with 8.x indexes, the older format is retained
+until a full rebuild is done.
+
+Additionally, `OrdinalsReader` (and sub-classes) have been marked `@Deprecated` as custom binary
+encodings will not be supported for Document ordinals in 9.x onwards (`SortedNumericDocValues` are
+used out-of-the-box instead).
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FacetUtils.java b/lucene/facet/src/java/org/apache/lucene/facet/FacetUtils.java
index 01ec9e4..76d4802 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/FacetUtils.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetUtils.java
@@ -18,8 +18,15 @@
package org.apache.lucene.facet;
import java.io.IOException;
+import java.util.function.BiConsumer;
+import org.apache.lucene.facet.taxonomy.BackCompatSortedNumericDocValues;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
/**
* Utility class with a single method for getting a DocIdSetIterator that skips deleted docs
@@ -81,4 +88,47 @@ public final class FacetUtils {
}
};
}
+
+ /**
+ * Loads ordinal values as {@link SortedNumericDocValues}. If the index still uses the older
+ * binary format, it will wrap that with the SNDV API. Newer format indexes will just load the
+ * SNDV directly.
+ *
+ * <p>This is really only needed/useful to maintain back-compat with the binary format. Once
+ * back-compat is no longer needed, the SNDV field should just be loaded directly.
+ *
+ * @deprecated Please do not rely on this method. It is added as a temporary measure for providing
+ * index backwards-compatibility with Lucene 8 and earlier indexes, and will be removed in
+ * Lucene 10.
+ */
+ @Deprecated
+ public static SortedNumericDocValues loadOrdinalValues(LeafReader reader, String fieldName)
+ throws IOException {
+ return loadOrdinalValues(reader, fieldName, null);
+ }
+
+ /**
+ * Loads ordinal values as {@link SortedNumericDocValues}. If the index still uses the older
+ * binary format, it will wrap that with the SNDV API. Newer format indexes will just load the
+ * SNDV directly. The provided {@code binaryValueDecoder} allows custom decoding logic for older
+ * binary format fields to be provided.
+ *
+ * <p>This is really only needed/useful to maintain back-compat with the binary format. Once
+ * back-compat is no longer needed, the SNDV field should just be loaded directly.
+ *
+ * @deprecated Please do not rely on this method. It is added as a temporary measure for providing
+ * index backwards-compatibility with Lucene 8 and earlier indexes, and will be removed in
+ * Lucene 10.
+ */
+ @Deprecated
+ public static SortedNumericDocValues loadOrdinalValues(
+ LeafReader reader, String fieldName, BiConsumer<BytesRef, IntsRef> binaryValueDecoder)
+ throws IOException {
+ if (reader.getMetaData().getCreatedVersionMajor() <= 8) {
+ BinaryDocValues oldStyleDocValues = reader.getBinaryDocValues(fieldName);
+ return BackCompatSortedNumericDocValues.wrap(oldStyleDocValues, binaryValueDecoder);
+ } else {
+ return reader.getSortedNumericDocValues(fieldName);
+ }
+ }
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
index 7558f61..53c7050 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
@@ -28,6 +28,7 @@ import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
@@ -409,9 +410,26 @@ public class FacetsConfig {
indexDrillDownTerms(doc, indexFieldName, dimConfig, facetLabel);
}
- // Facet counts:
- // DocValues are considered stored fields:
- doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get())));
+ // Store the taxonomy ordinals associated with each doc. Prefer to use SortedNumericDocValues
+ // but "fall back" to a custom binary format to maintain backwards compatibility with Lucene 8
+ // indexes.
+ IntsRef ords = ordinals.get();
+ if (taxoWriter.useNumericDocValuesForOrdinals()) {
+ // Dedupe and encode the ordinals. It's not important that we sort here
+ // (SortedNumericDocValuesField will handle this internally), but we
+ // sort to identify dups (since SNDVF doesn't dedupe):
+ Arrays.sort(ords.ints, ords.offset, ords.offset + ords.length);
+ int prev = -1;
+ for (int i = 0; i < ords.length; i++) {
+ int ord = ords.ints[ords.offset + i];
+ if (ord > prev) {
+ doc.add(new SortedNumericDocValuesField(indexFieldName, ord));
+ prev = ord;
+ }
+ }
+ } else {
+ doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ords)));
+ }
}
}
@@ -507,7 +525,13 @@ public class FacetsConfig {
}
}
- /** Encodes ordinals into a BytesRef; expert: subclass can override this to change encoding. */
+ /**
+ * Encodes ordinals into a BytesRef; expert: subclass can override this to change encoding.
+ *
+ * @deprecated Starting in Lucene 9, we moved to a more straight-forward numeric doc values
+ * encoding and no longer support custom binary encodings.
+ */
+ @Deprecated
protected BytesRef dedupAndEncode(IntsRef ordinals) {
Arrays.sort(ordinals.ints, ordinals.offset, ordinals.length);
byte[] bytes = new byte[5 * ordinals.length];
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/BackCompatSortedNumericDocValues.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/BackCompatSortedNumericDocValues.java
new file mode 100644
index 0000000..0a48f13
--- /dev/null
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/BackCompatSortedNumericDocValues.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.facet.taxonomy;
+
+import java.io.IOException;
+import java.util.function.BiConsumer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+
+/**
+ * Wraps a {@link BinaryDocValues} instance, providing a {@link SortedNumericDocValues} interface
+ * for the purpose of being backwards-compatible. (see: LUCENE-10062)
+ *
+ * @deprecated Only here for back-compat support. Should be removed with Lucene 10.
+ */
+@Deprecated
+public class BackCompatSortedNumericDocValues extends SortedNumericDocValues {
+ private final BinaryDocValues binaryDocValues;
+ private final BiConsumer<BytesRef, IntsRef> binaryValueDecoder;
+ private final IntsRef scratch = new IntsRef();
+ private int curr;
+
+ /**
+ * Wrap the provided binary encoded doc values. Decodes the binary values with the provided {@code
+ * binaryValueDecoder}, allowing the default decoding behavior to be overridden. If a null doc
+ * values instance is provided, the returned instance will also be null. If a null value decoder
+ * is specified, the default encoding will be assumed.
+ */
+ public static SortedNumericDocValues wrap(
+ BinaryDocValues binaryDocValues, BiConsumer<BytesRef, IntsRef> binaryValueDecoder) {
+ if (binaryDocValues == null) {
+ return null;
+ }
+
+ return new BackCompatSortedNumericDocValues(binaryDocValues, binaryValueDecoder);
+ }
+
+ /** see the static {@code wrap} methods */
+ private BackCompatSortedNumericDocValues(
+ BinaryDocValues binaryDocValues, BiConsumer<BytesRef, IntsRef> binaryValueDecoder) {
+ assert binaryDocValues != null;
+ this.binaryDocValues = binaryDocValues;
+
+ if (binaryValueDecoder != null) {
+ this.binaryValueDecoder = binaryValueDecoder;
+ } else {
+ this.binaryValueDecoder = BackCompatSortedNumericDocValues::loadValues;
+ }
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ boolean result = binaryDocValues.advanceExact(target);
+ if (result) {
+ reloadValues();
+ }
+ return result;
+ }
+
+ @Override
+ public long nextValue() throws IOException {
+ curr++;
+ assert curr < scratch.length;
+ return scratch.ints[scratch.offset + curr];
+ }
+
+ @Override
+ public int docValueCount() {
+ return scratch.length;
+ }
+
+ @Override
+ public int docID() {
+ return binaryDocValues.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(binaryDocValues.docID() + 1);
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ int doc = binaryDocValues.advance(target);
+ if (doc != NO_MORE_DOCS) {
+ reloadValues();
+ }
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return binaryDocValues.cost();
+ }
+
+ private void reloadValues() throws IOException {
+ curr = -1;
+ binaryValueDecoder.accept(binaryDocValues.binaryValue(), scratch);
+ }
+
+ /** Load ordinals for the currently-positioned doc, assuming the default binary encoding. */
+ static void loadValues(BytesRef buf, IntsRef ordinals) {
+ // grow the buffer up front, even if by a large number of values (buf.length)
+ // that saves the need to check inside the loop for every decoded value if
+ // the buffer needs to grow.
+ if (ordinals.ints.length < buf.length) {
+ ordinals.ints = ArrayUtil.grow(ordinals.ints, buf.length);
+ }
+
+ ordinals.offset = 0;
+ ordinals.length = 0;
+
+ // it is better if the decoding is inlined like so, and not e.g.
+ // in a utility method
+ int upto = buf.offset + buf.length;
+ int value = 0;
+ int offset = buf.offset;
+ int prev = 0;
+ while (offset < upto) {
+ byte b = buf.bytes[offset++];
+ if (b >= 0) {
+ ordinals.ints[ordinals.length] = ((value << 7) | b) + prev;
+ value = 0;
+ prev = ordinals.ints[ordinals.length];
+ ordinals.length++;
+ } else {
+ value = (value << 7) | (b & 0x7F);
+ }
+ }
+ }
+}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java
index 0729c90..cbec7c5 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java
@@ -45,7 +45,11 @@ import org.apache.lucene.util.RamUsageEstimator;
*
* <p><b>NOTE:</b> create one instance of this and re-use it for all facet implementations (the
* cache is per-instance, not static).
+ *
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ * Lucene 9
*/
+@Deprecated
public class CachedOrdinalsReader extends OrdinalsReader implements Accountable {
private final OrdinalsReader source;
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/DocValuesOrdinalsReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/DocValuesOrdinalsReader.java
index 7099de7..cf1a438 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/DocValuesOrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/DocValuesOrdinalsReader.java
@@ -17,15 +17,22 @@
package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
+import org.apache.lucene.facet.FacetUtils;
import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
-/** Decodes ordinals previously indexed into a BinaryDocValues field */
+/**
+ * Decodes ordinals previously indexed into a BinaryDocValues field
+ *
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ * Lucene 9
+ */
+@Deprecated
public class DocValuesOrdinalsReader extends OrdinalsReader {
private final String field;
@@ -41,12 +48,12 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {
@Override
public OrdinalsSegmentReader getReader(LeafReaderContext context) throws IOException {
- BinaryDocValues values0 = context.reader().getBinaryDocValues(field);
- if (values0 == null) {
- values0 = DocValues.emptyBinary();
+ SortedNumericDocValues dv0 =
+ FacetUtils.loadOrdinalValues(context.reader(), field, this::decode);
+ if (dv0 == null) {
+ dv0 = DocValues.emptySortedNumeric();
}
-
- final BinaryDocValues values = values0;
+ final SortedNumericDocValues dv = dv0;
return new OrdinalsSegmentReader() {
@@ -59,16 +66,21 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {
"docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
}
lastDocID = docID;
- if (docID > values.docID()) {
- values.advance(docID);
- }
- final BytesRef bytes;
- if (values.docID() == docID) {
- bytes = values.binaryValue();
- } else {
- bytes = new BytesRef(BytesRef.EMPTY_BYTES);
+
+ ordinals.offset = 0;
+ ordinals.length = 0;
+
+ if (dv.advanceExact(docID)) {
+ int count = dv.docValueCount();
+ if (ordinals.ints.length < count) {
+ ordinals.ints = ArrayUtil.grow(ordinals.ints, count);
+ }
+
+ for (int i = 0; i < count; i++) {
+ ordinals.ints[ordinals.length] = (int) dv.nextValue();
+ ordinals.length++;
+ }
}
- decode(bytes, ordinals);
}
};
}
@@ -91,33 +103,6 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {
* @param ordinals buffer for decoded ordinals
*/
public void decode(BytesRef buf, IntsRef ordinals) {
-
- // grow the buffer up front, even if by a large number of values (buf.length)
- // that saves the need to check inside the loop for every decoded value if
- // the buffer needs to grow.
- if (ordinals.ints.length < buf.length) {
- ordinals.ints = ArrayUtil.grow(ordinals.ints, buf.length);
- }
-
- ordinals.offset = 0;
- ordinals.length = 0;
-
- // it is better if the decoding is inlined like so, and not e.g.
- // in a utility method
- int upto = buf.offset + buf.length;
- int value = 0;
- int offset = buf.offset;
- int prev = 0;
- while (offset < upto) {
- byte b = buf.bytes[offset++];
- if (b >= 0) {
- ordinals.ints[ordinals.length] = ((value << 7) | b) + prev;
- value = 0;
- prev = ordinals.ints[ordinals.length];
- ordinals.length++;
- } else {
- value = (value << 7) | (b & 0x7F);
- }
- }
+ BackCompatSortedNumericDocValues.loadValues(buf, ordinals);
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
index 4b6332b..668b773 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
@@ -19,17 +19,17 @@ package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
+import org.apache.lucene.facet.FacetUtils;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConjunctionUtils;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
/**
* Computes facets counts, assuming the default encoding into DocValues was used.
@@ -70,8 +70,9 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
- BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
- if (dv == null) { // this reader does not have DocValues for the requested category list
+ SortedNumericDocValues dv =
+ FacetUtils.loadOrdinalValues(hits.context.reader(), indexFieldName);
+ if (dv == null) {
continue;
}
@@ -79,21 +80,8 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
- final BytesRef bytesRef = dv.binaryValue();
- byte[] bytes = bytesRef.bytes;
- int end = bytesRef.offset + bytesRef.length;
- int ord = 0;
- int offset = bytesRef.offset;
- int prev = 0;
- while (offset < end) {
- byte b = bytes[offset++];
- if (b >= 0) {
- prev = ord = ((ord << 7) | b) + prev;
- increment(ord);
- ord = 0;
- } else {
- ord = (ord << 7) | (b & 0x7F);
- }
+ for (int i = 0; i < dv.docValueCount(); i++) {
+ increment((int) dv.nextValue());
}
}
}
@@ -103,8 +91,8 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
private final void countAll(IndexReader reader) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
- BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
- if (dv == null) { // this reader does not have DocValues for the requested category list
+ SortedNumericDocValues dv = FacetUtils.loadOrdinalValues(context.reader(), indexFieldName);
+ if (dv == null) {
continue;
}
@@ -114,21 +102,9 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
if (liveDocs != null && liveDocs.get(doc) == false) {
continue;
}
- final BytesRef bytesRef = dv.binaryValue();
- byte[] bytes = bytesRef.bytes;
- int end = bytesRef.offset + bytesRef.length;
- int ord = 0;
- int offset = bytesRef.offset;
- int prev = 0;
- while (offset < end) {
- byte b = bytes[offset++];
- if (b >= 0) {
- prev = ord = ((ord << 7) | b) + prev;
- increment(ord);
- ord = 0;
- } else {
- ord = (ord << 7) | (b & 0x7F);
- }
+
+ for (int i = 0; i < dv.docValueCount(); i++) {
+ increment((int) dv.nextValue());
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalMappingLeafReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalMappingLeafReader.java
index 731d4a9..0b6912e 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalMappingLeafReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalMappingLeafReader.java
@@ -16,7 +16,9 @@
*/
package org.apache.lucene.facet.taxonomy;
+import com.carrotsearch.hppc.IntArrayList;
import java.io.IOException;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.facet.FacetsConfig;
@@ -26,7 +28,10 @@ import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.Ordina
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FilterBinaryDocValues;
import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.FilterSortedNumericDocValues;
import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
@@ -107,6 +112,66 @@ public class OrdinalMappingLeafReader extends FilterLeafReader {
}
}
+ private class OrdinalMappingSortedNumericDocValues extends FilterSortedNumericDocValues {
+ private final IntArrayList currentValues;
+ private int currIndex;
+
+ OrdinalMappingSortedNumericDocValues(SortedNumericDocValues in) {
+ super(in);
+ currentValues = new IntArrayList(32);
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ boolean result = in.advanceExact(target);
+ if (result) {
+ reloadValues();
+ }
+ return result;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ int result = in.advance(target);
+ if (result != DocIdSetIterator.NO_MORE_DOCS) {
+ reloadValues();
+ }
+ return result;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ int result = in.nextDoc();
+ if (result != DocIdSetIterator.NO_MORE_DOCS) {
+ reloadValues();
+ }
+ return result;
+ }
+
+ @Override
+ public int docValueCount() {
+ return currentValues.elementsCount;
+ }
+
+ private void reloadValues() throws IOException {
+ currIndex = 0;
+ currentValues.clear();
+ for (int i = 0; i < in.docValueCount(); i++) {
+ int originalOrd = Math.toIntExact(in.nextValue());
+ currentValues.add(ordinalMap[originalOrd]);
+ }
+ Arrays.sort(currentValues.buffer, 0, currentValues.elementsCount);
+ }
+
+ @Override
+ public long nextValue() {
+ assert currIndex < currentValues.size();
+ int actual = currentValues.get(currIndex);
+ currIndex++;
+ return actual;
+ }
+ }
+
private final int[] ordinalMap;
private final InnerFacetsConfig facetsConfig;
private final Set<String> facetFields;
@@ -125,31 +190,59 @@ public class OrdinalMappingLeafReader extends FilterLeafReader {
}
// always add the default indexFieldName. This is because FacetsConfig does
// not explicitly record dimensions that were indexed under the default
- // DimConfig, unless they have a custome DimConfig.
+ // DimConfig, unless they have a custom DimConfig.
facetFields.add(FacetsConfig.DEFAULT_DIM_CONFIG.indexFieldName);
}
/**
* Expert: encodes category ordinals into a BytesRef. Override in case you use custom encoding,
* other than the default done by FacetsConfig.
+ *
+ * @deprecated Custom binary formats are no longer directly supported for taxonomy faceting
+ * starting in Lucene 9
*/
+ @Deprecated
protected BytesRef encode(IntsRef ordinals) {
return facetsConfig.dedupAndEncode(ordinals);
}
- /** Expert: override in case you used custom encoding for the categories under this field. */
+ /**
+ * Expert: override in case you used custom encoding for the categories under this field.
+ *
+ * @deprecated Custom binary formats are no longer directly supported for taxonomy faceting
+ * starting in Lucene 9
+ */
+ @Deprecated
protected OrdinalsReader getOrdinalsReader(String field) {
return new DocValuesOrdinalsReader(field);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
- if (facetFields.contains(field)) {
+ BinaryDocValues original = in.getBinaryDocValues(field);
+ if (original != null && facetFields.contains(field)) {
+ // The requested field is a facet ordinals field _and_ it's non-null, so move forward with
+ // mapping:
final OrdinalsReader ordsReader = getOrdinalsReader(field);
- return new OrdinalMappingBinaryDocValues(
- ordsReader.getReader(in.getContext()), in.getBinaryDocValues(field));
+ return new OrdinalMappingBinaryDocValues(ordsReader.getReader(in.getContext()), original);
+ } else {
+ // The requested field either isn't present (null) or isn't a facet ordinals field. Either
+ // way, just return the original:
+ return original;
+ }
+ }
+
+ @Override
+ public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
+ SortedNumericDocValues original = in.getSortedNumericDocValues(field);
+ if (original != null && facetFields.contains(field)) {
+ // The requested field is a facet ordinals field _and_ it's non-null, so move forward with
+ // mapping:
+ return new OrdinalMappingSortedNumericDocValues(original);
} else {
- return in.getBinaryDocValues(field);
+ // The requested field either isn't present (null) or isn't a facet ordinals field. Either
+ // way, just return the original:
+ return original;
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalsReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalsReader.java
index 5251109..5f93f73 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalsReader.java
@@ -20,7 +20,13 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.IntsRef;
-/** Provides per-document ordinals. */
+/**
+ * Provides per-document ordinals.
+ *
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ * Lucene 9
+ */
+@Deprecated
public abstract class OrdinalsReader {
/** Returns ordinals for documents in one segment. */
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java
index 1b14b07..601870e 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java
@@ -29,8 +29,11 @@ import org.apache.lucene.util.IntsRef;
* Reads from any {@link OrdinalsReader}; use {@link FastTaxonomyFacetCounts} if you are using the
* default encoding from {@link BinaryDocValues}.
*
- * @lucene.experimental
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ * Lucene 9. Please switch to {@link FastTaxonomyFacetCounts} or implement your own {@link
+ * org.apache.lucene.facet.Facets} implementation if you have custom needs.
*/
+@Deprecated
public class TaxonomyFacetCounts extends IntTaxonomyFacets {
private final OrdinalsReader ordinalsReader;
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java
index f0c66ba..b375754 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java
@@ -20,7 +20,10 @@ import static org.apache.lucene.facet.taxonomy.TaxonomyReader.INVALID_ORDINAL;
import static org.apache.lucene.facet.taxonomy.TaxonomyReader.ROOT_ORDINAL;
import java.io.IOException;
+import org.apache.lucene.facet.FacetUtils;
+import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.IntsRef;
/**
@@ -34,19 +37,16 @@ public class TaxonomyFacetLabels {
/** {@code TaxonomyReader} provided to the constructor */
private final TaxonomyReader taxoReader;
- /**
- * {@code OrdinalsReader} to decode ordinals previously indexed into the {@code BinaryDocValues}
- * facet field
- */
- private final OrdinalsReader ordsReader;
+ /** field storing the taxonomy ordinals */
+ private final String indexFieldName;
/**
* Sole constructor. Do not close the provided {@link TaxonomyReader} while still using this
* instance!
*/
- public TaxonomyFacetLabels(TaxonomyReader taxoReader, String indexFieldName) throws IOException {
+ public TaxonomyFacetLabels(TaxonomyReader taxoReader, String indexFieldName) {
this.taxoReader = taxoReader;
- this.ordsReader = new DocValuesOrdinalsReader(indexFieldName);
+ this.indexFieldName = indexFieldName;
}
/**
@@ -62,7 +62,13 @@ public class TaxonomyFacetLabels {
* @throws IOException when a low-level IO issue occurs
*/
public FacetLabelReader getFacetLabelReader(LeafReaderContext readerContext) throws IOException {
- return new FacetLabelReader(ordsReader, readerContext);
+ SortedNumericDocValues ordinalValues =
+ FacetUtils.loadOrdinalValues(readerContext.reader(), indexFieldName);
+ if (ordinalValues == null) {
+ ordinalValues = DocValues.emptySortedNumeric();
+ }
+
+ return new FacetLabelReader(ordinalValues);
}
/**
@@ -71,18 +77,50 @@ public class TaxonomyFacetLabels {
* @lucene.experimental
*/
public class FacetLabelReader {
+ /** By default, we store taxonomy ordinals in SortedNumericDocValues field */
+ private final SortedNumericDocValues ordinalValues;
+
+ /**
+ * Users can provide their own custom OrdinalsReader for cases where the default encoding isn't
+ * used. This capability is deprecated and will be removed in Lucene 10.
+ */
private final OrdinalsReader.OrdinalsSegmentReader ordinalsSegmentReader;
- private final IntsRef decodedOrds = new IntsRef();
+
+ private final IntsRef decodedOrds;
+
private int currentDocId = -1;
- private int currentPos = -1;
+ private boolean currentDocHasValues;
+ private int currentPos;
+ private int currentDocOrdinalCount;
// Lazily set when nextFacetLabel(int docId, String facetDimension) is first called
private int[] parents;
- /** Sole constructor. */
+ /**
+ * Construct from a specified {@link SortedNumericDocValues} field; useful for reading the
+ * default encoding.
+ */
+ public FacetLabelReader(SortedNumericDocValues ordinalValues) {
+ this.ordinalValues = ordinalValues;
+ ordinalsSegmentReader = null;
+ decodedOrds = null;
+ }
+
+ /**
+ * Construct using a custom {@link OrdinalsReader}; useful if using a custom binary format.
+ *
+ * <p>Note: If using the default encoding, you can use {@link
+ * #FacetLabelReader(SortedNumericDocValues)} directly
+ *
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting
+ * with Lucene 9
+ */
+ @Deprecated
public FacetLabelReader(OrdinalsReader ordsReader, LeafReaderContext readerContext)
throws IOException {
ordinalsSegmentReader = ordsReader.getReader(readerContext);
+ decodedOrds = new IntsRef();
+ ordinalValues = null;
}
/**
@@ -108,20 +146,45 @@ public class TaxonomyFacetLabels {
throw new IllegalArgumentException(
"docs out of order: previous docId=" + currentDocId + " current docId=" + docId);
}
- ordinalsSegmentReader.get(docId, decodedOrds);
+
currentDocId = docId;
- currentPos = decodedOrds.offset;
+
+ if (ordinalsSegmentReader != null) {
+ ordinalsSegmentReader.get(docId, decodedOrds);
+ currentPos = decodedOrds.offset;
+ } else {
+ currentDocHasValues = ordinalValues.advanceExact(docId);
+ if (currentDocHasValues) {
+ currentDocOrdinalCount = ordinalValues.docValueCount();
+ currentPos = 0;
+ }
+ }
}
- int endPos = decodedOrds.offset + decodedOrds.length;
- assert currentPos <= endPos;
+ int ord;
+ if (ordinalsSegmentReader != null) {
+ int endPos = decodedOrds.offset + decodedOrds.length;
+ assert currentPos <= endPos;
+
+ if (currentPos == endPos) {
+ return null;
+ }
+
+ ord = decodedOrds.ints[currentPos++];
+ } else {
+ if (currentDocHasValues == false) {
+ return null;
+ }
+
+ assert currentPos <= currentDocOrdinalCount;
+ if (currentPos == currentDocOrdinalCount) {
+ return null;
+ }
- if (currentPos == endPos) {
- // no more FacetLabels
- return null;
+ ord = (int) ordinalValues.nextValue();
+ currentPos++;
}
- int ord = decodedOrds.ints[currentPos++];
return taxoReader.getPath(ord);
}
@@ -168,24 +231,61 @@ public class TaxonomyFacetLabels {
throw new IllegalArgumentException(
"docs out of order: previous docId=" + currentDocId + " current docId=" + docId);
}
- ordinalsSegmentReader.get(docId, decodedOrds);
- currentPos = decodedOrds.offset;
currentDocId = docId;
- }
- if (parents == null) {
- parents = taxoReader.getParallelTaxonomyArrays().parents();
+ if (ordinalsSegmentReader != null) {
+ ordinalsSegmentReader.get(docId, decodedOrds);
+ currentPos = decodedOrds.offset;
+ } else {
+ currentDocHasValues = ordinalValues.advanceExact(docId);
+ if (currentDocHasValues) {
+ currentDocOrdinalCount = ordinalValues.docValueCount();
+ currentPos = 0;
+ }
+ }
}
- int endPos = decodedOrds.offset + decodedOrds.length;
- assert currentPos <= endPos;
+ if (ordinalsSegmentReader != null) {
+ int endPos = decodedOrds.offset + decodedOrds.length;
+ assert currentPos <= endPos;
- for (; currentPos < endPos; ) {
- int ord = decodedOrds.ints[currentPos++];
- if (isDescendant(ord, parentOrd) == true) {
- return taxoReader.getPath(ord);
+ if (currentPos == endPos) {
+ return null;
}
+
+ if (parents == null) {
+ parents = taxoReader.getParallelTaxonomyArrays().parents();
+ }
+
+ do {
+ int ord = decodedOrds.ints[currentPos++];
+ if (isDescendant(ord, parentOrd) == true) {
+ return taxoReader.getPath(ord);
+ }
+ } while (currentPos < endPos);
+ } else {
+ if (currentDocHasValues == false) {
+ return null;
+ }
+
+ assert currentPos <= currentDocOrdinalCount;
+ if (currentPos == currentDocOrdinalCount) {
+ return null;
+ }
+
+ if (parents == null) {
+ parents = taxoReader.getParallelTaxonomyArrays().parents();
+ }
+
+ do {
+ int ord = (int) ordinalValues.nextValue();
+ currentPos++;
+ if (isDescendant(ord, parentOrd) == true) {
+ return taxoReader.getPath(ord);
+ }
+ } while (currentPos < currentDocOrdinalCount);
}
+
return null;
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java
index 1197da5..5ce3752 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java
@@ -18,9 +18,12 @@ package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
import java.util.List;
+import org.apache.lucene.facet.FacetUtils;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.ConjunctionUtils;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
@@ -36,8 +39,7 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
/**
* Aggreggates double facet values from the provided {@link DoubleValuesSource}, pulling ordinals
- * using {@link DocValuesOrdinalsReader} against the default indexed facet field {@link
- * FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
+ * from the default indexed facet field {@link FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
*/
public TaxonomyFacetSumValueSource(
TaxonomyReader taxoReader,
@@ -45,18 +47,33 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
FacetsCollector fc,
DoubleValuesSource valueSource)
throws IOException {
- this(
- new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME),
- taxoReader,
- config,
- fc,
- valueSource);
+ this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc, valueSource);
+ }
+
+ /**
+ * Aggreggates double facet values from the provided {@link DoubleValuesSource}, pulling ordinals
+ * from the specified indexed facet field.
+ */
+ public TaxonomyFacetSumValueSource(
+ String indexField,
+ TaxonomyReader taxoReader,
+ FacetsConfig config,
+ FacetsCollector fc,
+ DoubleValuesSource valueSource)
+ throws IOException {
+ super(indexField, taxoReader, config);
+ ordinalsReader = null;
+ sumValues(fc.getMatchingDocs(), fc.getKeepScores(), valueSource);
}
/**
* Aggreggates float facet values from the provided {@link DoubleValuesSource}, and pulls ordinals
* from the provided {@link OrdinalsReader}.
+ *
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ * Lucene 9
*/
+ @Deprecated
public TaxonomyFacetSumValueSource(
OrdinalsReader ordinalsReader,
TaxonomyReader taxoReader,
@@ -91,20 +108,47 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
List<MatchingDocs> matchingDocs, boolean keepScores, DoubleValuesSource valueSource)
throws IOException {
- IntsRef scratch = new IntsRef();
- for (MatchingDocs hits : matchingDocs) {
- OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
- DoubleValues scores = keepScores ? scores(hits) : null;
- DoubleValues functionValues = valueSource.getValues(hits.context, scores);
- DocIdSetIterator docs = hits.bits.iterator();
+ if (ordinalsReader != null) {
+ // If the user provided a custom ordinals reader, use it to retrieve the document ordinals:
+ IntsRef scratch = new IntsRef();
+ for (MatchingDocs hits : matchingDocs) {
+ OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
+ DoubleValues scores = keepScores ? scores(hits) : null;
+ DoubleValues functionValues = valueSource.getValues(hits.context, scores);
+ DocIdSetIterator docs = hits.bits.iterator();
+
+ int doc;
+ while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ords.get(doc, scratch);
+ if (functionValues.advanceExact(doc)) {
+ float value = (float) functionValues.doubleValue();
+ for (int i = 0; i < scratch.length; i++) {
+ values[scratch.ints[i]] += value;
+ }
+ }
+ }
+ }
+ } else {
+ // If no custom ordinals reader is provided, expect the default encoding:
+ for (MatchingDocs hits : matchingDocs) {
+ SortedNumericDocValues ordinalValues =
+ FacetUtils.loadOrdinalValues(hits.context.reader(), indexFieldName);
+ if (ordinalValues == null) {
+ continue;
+ }
+
+ DoubleValues scores = keepScores ? scores(hits) : null;
+ DoubleValues functionValues = valueSource.getValues(hits.context, scores);
+ DocIdSetIterator it =
+ ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), ordinalValues));
- int doc;
- while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- ords.get(doc, scratch);
- if (functionValues.advanceExact(doc)) {
- float value = (float) functionValues.doubleValue();
- for (int i = 0; i < scratch.length; i++) {
- values[scratch.ints[i]] += value;
+ for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+ if (functionValues.advanceExact(doc)) {
+ float value = (float) functionValues.doubleValue();
+ int ordinalCount = ordinalValues.docValueCount();
+ for (int i = 0; i < ordinalCount; i++) {
+ values[(int) ordinalValues.nextValue()] += value;
+ }
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java
index fd3107f..f1df8b9 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java
@@ -97,4 +97,18 @@ public interface TaxonomyWriter extends Closeable, TwoPhaseCommit {
/** Returns the commit user data iterable that was set on {@link #setLiveCommitData(Iterable)}. */
public Iterable<Map.Entry<String, String>> getLiveCommitData();
+
+ /**
+ * Determine whether-or-not to store taxonomy ordinals for each document using the older binary
+ * format or the newer SortedNumericDocValues format (based on the version used to create the
+ * index).
+ *
+ * @deprecated Please don't rely on this method as it will be removed in Lucene 10. It's being
+ * introduced to support backwards-compatibility with Lucene 8 and earlier index formats
+ * temporarily.
+ */
+ @Deprecated
+ default boolean useNumericDocValuesForOrdinals() {
+ return false;
+ }
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
index f1e2ad9..35f17f1 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@@ -162,7 +162,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
openMode = config.getOpenMode();
if (DirectoryReader.indexExists(directory) == false) {
indexEpoch = 1;
- // no commit exists so we can safely use the new BinaryDocValues field
+ // no commit exists so we can safely use the newer formats:
useOlderFormat = false;
} else {
String epochStr = null;
@@ -1005,4 +1005,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
public final long getTaxonomyEpoch() {
return indexEpoch;
}
+
+ @Override
+ public boolean useNumericDocValuesForOrdinals() {
+ return useOlderFormat == false;
+ }
}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestMultipleIndexFields.java b/lucene/facet/src/test/org/apache/lucene/facet/TestMultipleIndexFields.java
index a65b136..759b536 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestMultipleIndexFields.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestMultipleIndexFields.java
@@ -190,7 +190,7 @@ public class TestMultipleIndexFields extends FacetTestCase {
private void assertOrdinalsExist(String field, IndexReader ir) throws IOException {
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
- if (r.getBinaryDocValues(field) != null) {
+ if (r.getSortedNumericDocValues(field) != null) {
return; // not all segments must have this DocValues
}
}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestBackCompatSortedNumericDocValues.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestBackCompatSortedNumericDocValues.java
new file mode 100644
index 0000000..c83e479
--- /dev/null
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestBackCompatSortedNumericDocValues.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.facet.taxonomy;
+
+import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestBackCompatSortedNumericDocValues extends LuceneTestCase {
+
+ private static class FacetsConfigWrapper extends FacetsConfig {
+ public BytesRef encodeValues(IntsRef values) {
+ return dedupAndEncode(values);
+ }
+ }
+
+ public void testRandom() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+
+ // sorta big scratch so we don't have to think about reallocating:
+ IntsRef scratch = new IntsRef(100);
+
+ // used to access default binary encoding easily:
+ FacetsConfigWrapper facetsConfig = new FacetsConfigWrapper();
+
+ // keep track of the values we expect to see for each doc:
+ Map<String, List<Integer>> expectedValues = new HashMap<>();
+
+ int numDocs = atLeast(100);
+ for (int i = 0; i < numDocs; i++) {
+ int numValues = RandomNumbers.randomIntBetween(random(), 1, 50);
+ scratch.length = 0;
+ scratch.offset = 0;
+ Set<Integer> values = new HashSet<>();
+ for (int j = 0; j < numValues; j++) {
+ int value = random().nextInt(Integer.MAX_VALUE);
+ values.add(value);
+ // we might have dups in here, which is fine (encoding takes care of deduping and sorting):
+ scratch.ints[j] = value;
+ scratch.length++;
+ }
+ // we expect to get sorted and deduped values back out:
+ expectedValues.put(String.valueOf(i), values.stream().sorted().collect(Collectors.toList()));
+
+ Document doc = new Document();
+ doc.add(new StoredField("id", String.valueOf(i)));
+ doc.add(new BinaryDocValuesField("bdv", facetsConfig.encodeValues(scratch)));
+ writer.addDocument(doc);
+ }
+
+ writer.forceMerge(1);
+ writer.commit();
+
+ IndexReader reader = writer.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ writer.close();
+
+ assert reader.leaves().size() == 1;
+ BinaryDocValues binaryDocValues = reader.leaves().get(0).reader().getBinaryDocValues("bdv");
+ assertNotNull(binaryDocValues);
+ SortedNumericDocValues docValues = BackCompatSortedNumericDocValues.wrap(binaryDocValues, null);
+
+ TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), numDocs, Sort.INDEXORDER);
+
+ for (ScoreDoc scoreDoc : docs.scoreDocs) {
+ String id = reader.document(scoreDoc.doc).get("id");
+ int docId = scoreDoc.doc;
+
+ int doc;
+ if (random().nextBoolean()) {
+ doc = docValues.nextDoc();
+ } else {
+ if (random().nextBoolean()) {
+ doc = docValues.advance(docId);
+ } else {
+ assertTrue(docValues.advanceExact(docId));
+ doc = docId;
+ }
+ }
+ assertEquals(docId, doc);
+ assertEquals(docId, docValues.docID());
+
+ List<Integer> expected = expectedValues.get(id);
+ assertEquals(expected.size(), docValues.docValueCount());
+ checkValues(expected, docValues);
+ }
+
+ // Run off the end and make sure that case is handled gracefully:
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.nextDoc());
+
+ IOUtils.close(reader, dir);
+ }
+
+ private void checkValues(List<Integer> expected, SortedNumericDocValues values)
+ throws IOException {
+ for (Integer e : expected) {
+ assertEquals((long) e, values.nextValue());
+ }
+ }
+}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetSumValueSource.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetSumValueSource.java
index 8c3cdaf..d88a6e7 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetSumValueSource.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetSumValueSource.java
@@ -410,9 +410,15 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
FacetsCollector.search(newSearcher(r), new MatchAllDocsQuery(), 10, fc);
Facets facets1 = getTaxonomyFacetCounts(taxoReader, config, fc);
- Facets facets2 =
- new TaxonomyFacetSumValueSource(
- new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);
+ Facets facets2;
+ if (random().nextBoolean()) {
+ facets2 =
+ new TaxonomyFacetSumValueSource(
+ new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);
+ } else {
+ facets2 =
+ new TaxonomyFacetSumValueSource("$b", taxoReader, config, fc, DoubleValuesSource.SCORES);
+ }
assertEquals(r.maxDoc(), facets1.getTopChildren(10, "a").value.intValue());
assertEquals(r.maxDoc(), facets2.getTopChildren(10, "b").value.doubleValue(), 1E-10);
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
index a702ece..e8f62c7 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
@@ -20,10 +20,32 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.DrillDownQuery;
+import org.apache.lucene.facet.FacetField;
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.Facets;
+import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.facet.taxonomy.DocValuesOrdinalsReader;
import org.apache.lucene.facet.taxonomy.FacetLabel;
+import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
+import org.apache.lucene.facet.taxonomy.TaxonomyFacetCounts;
+import org.apache.lucene.facet.taxonomy.TaxonomyFacetLabels;
+import org.apache.lucene.facet.taxonomy.TaxonomyFacetSumValueSource;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.Ignore;
@@ -49,50 +71,196 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
//
// Then move the zip file to your trunk checkout and use it in your test cases
- public static final String oldTaxonomyIndexName = "taxonomy.8.10.0-cfs";
+ private static final String OLD_TAXONOMY_INDEX_NAME = "taxonomy.8.11.0-cfs";
+ private static final String OLD_INDEX_NAME = "index.8.11.0-cfs";
public void testCreateNewTaxonomy() throws IOException {
- createNewTaxonomyIndex(oldTaxonomyIndexName);
+ createNewTaxonomyIndex(OLD_TAXONOMY_INDEX_NAME, OLD_INDEX_NAME);
}
- // Opens up a pre-existing old taxonomy index and adds new BinaryDocValues based fields
- private void createNewTaxonomyIndex(String dirName) throws IOException {
- Path indexDir = createTempDir(oldTaxonomyIndexName);
- TestUtil.unzip(getDataInputStream(dirName + ".zip"), indexDir);
- Directory dir = newFSDirectory(indexDir);
+ /**
+ * This test exercises a bunch of different faceting operations and directly taxonomy index
+ * reading to make sure more modern faceting formats introduced in 9.0 are backwards-compatible
+ * with 8.x indexes. It requires an "older" 8.x index to be in place with assumed docs/categories
+ * already present. It makes sure it can still run a number of different "read" operations against
+ * the old index, then it writes new content, forces a merge and does a bunch more "read"
+ * operations. It may seem a bit chaotic, but it's trying to test a number of different
+ * faceting-related implementations that require specific back-compat support.
+ */
+ private void createNewTaxonomyIndex(String taxoDirName, String indexDirName) throws IOException {
+ Path taxoPath = createTempDir(taxoDirName);
+ Path indexPath = createTempDir(indexDirName);
- DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+ TestUtil.unzip(getDataInputStream(taxoDirName + ".zip"), taxoPath);
+ TestUtil.unzip(getDataInputStream(indexDirName + ".zip"), indexPath);
+
+ Directory taxoDir = newFSDirectory(taxoPath);
+ Directory indexDir = newFSDirectory(indexPath);
+
+ // Open the existing indexes (explicitly open in APPEND mode and fail if they don't exist since
+ // we're trying to test
+ // back-compat with existing indexes):
+ DirectoryTaxonomyWriter taxoWriter =
+ new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.APPEND);
+ IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
+ indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
+ RandomIndexWriter indexWriter =
+ new RandomIndexWriter(random(), indexDir, indexWriterConfig, random().nextBoolean());
+
+ // Use a default FacetsConfig. This assumes that we didn't need to register anything interesting
+ // when creating
+ // the older format index. If that changes, we need a way to ensure we re-use the same facet
+ // configuration used
+ // in created the old format taxonomy index:
+ FacetsConfig facetsConfig = new FacetsConfig();
+
+ // At this point we should have a taxonomy index and "regular" index containing some taxonomy
+ // categories and
+ // documents with facet ordinals indexed. Confirm that we can facet and search against it as-is
+ // before adding
+ // anything new. Of course these tests are highly dependent on the index we're starting with, so
+ // they will
+ // need to be updated accordingly if the "old" test index changes:
+ TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+ IndexSearcher searcher = newSearcher(indexWriter.getReader());
+ FacetsCollector facetsCollector = new FacetsCollector();
+ searcher.search(new MatchAllDocsQuery(), facetsCollector);
+ // Test a few different facet implementations that we know have back-compat implications:
+ Facets facets = new FastTaxonomyFacetCounts(taxoReader, facetsConfig, facetsCollector);
+ FacetResult facetResult = facets.getTopChildren(10, "f1");
+ assertEquals(2, facetResult.value);
+ facets =
+ new TaxonomyFacetCounts(
+ new DocValuesOrdinalsReader(), taxoReader, facetsConfig, facetsCollector);
+ facetResult = facets.getTopChildren(10, "f1");
+ assertEquals(2, facetResult.value);
+ facets =
+ new TaxonomyFacetSumValueSource(
+ taxoReader, facetsConfig, facetsCollector, DoubleValuesSource.constant(1d));
+ facetResult = facets.getTopChildren(10, "f1");
+ assertEquals(2.0f, facetResult.value);
+ // Test that we can drill-down as expected (and read facet labels from matching docs):
+ TaxonomyFacetLabels facetLabels =
+ new TaxonomyFacetLabels(taxoReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
+ assert (searcher.getIndexReader().leaves().size() == 1);
+ TaxonomyFacetLabels.FacetLabelReader labelReader =
+ facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
+ DrillDownQuery query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
+ query.add("f1", "foo");
+ TopDocs docResults = searcher.search(query, 10);
+ assertEquals(1, docResults.totalHits.value);
+ int docId = docResults.scoreDocs[0].doc;
+ Set<FacetLabel> labels = new HashSet<>();
+ for (FacetLabel label = labelReader.nextFacetLabel(docId);
+ label != null;
+ label = labelReader.nextFacetLabel(docId)) {
+ labels.add(label);
+ }
+ assertEquals(2, labels.size());
+ assertTrue(
+ labels.containsAll(List.of(new FacetLabel("f1", "foo"), new FacetLabel("f2", "foo"))));
+ assertEquals(0, docResults.scoreDocs[0].doc);
+ // And make sure we can read directly from the taxonomy like we'd expect:
+ int ord = taxoReader.getOrdinal(new FacetLabel("f1", "foo"));
+ assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+ assertNotNull(taxoReader.getPath(ord));
+
+ // Now we'll add some new docs and taxonomy categories, force merge (to make sure that goes
+ // well) and then do
+ // some more searches, etc.:
+ Document doc = new Document();
+ doc.add(new FacetField("f1", "zed"));
+ indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
FacetLabel cp_c = new FacetLabel("c");
- writer.addCategory(cp_c);
- writer.getInternalIndexWriter().forceMerge(1);
- writer.commit();
+ taxoWriter.addCategory(cp_c);
+
+ indexWriter.forceMerge(1);
+ taxoWriter.getInternalIndexWriter().forceMerge(1);
+ indexWriter.commit();
+ taxoWriter.commit();
- TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
+ IOUtils.close(taxoReader, searcher.getIndexReader());
+ taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+ searcher = newSearcher(indexWriter.getReader());
+ IOUtils.close(indexWriter, taxoWriter);
- int ord1 = reader.getOrdinal(new FacetLabel("a"));
- assert ord1 != TaxonomyReader.INVALID_ORDINAL;
- // Just asserting ord1 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
- assertNotNull(reader.getPath(ord1));
+ // Re-test a number of different use-cases, which should now "see" the newly added content:
+ facetsCollector = new FacetsCollector();
+ searcher.search(new MatchAllDocsQuery(), facetsCollector);
+ facets = new FastTaxonomyFacetCounts(taxoReader, facetsConfig, facetsCollector);
+ facetResult = facets.getTopChildren(10, "f1");
+ assertEquals(3, facetResult.value);
+ facets =
+ new TaxonomyFacetCounts(
+ new DocValuesOrdinalsReader(), taxoReader, facetsConfig, facetsCollector);
+ facetResult = facets.getTopChildren(10, "f1");
+ assertEquals(3, facetResult.value);
+ facets =
+ new TaxonomyFacetSumValueSource(
+ taxoReader, facetsConfig, facetsCollector, DoubleValuesSource.constant(1d));
+ facetResult = facets.getTopChildren(10, "f1");
+ assertEquals(3.0f, facetResult.value);
+ // Test that we can drill-down as expected, and access facet labels:
+ facetLabels = new TaxonomyFacetLabels(taxoReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
+ assert (searcher.getIndexReader().leaves().size() == 1);
+ labelReader = facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
+ query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
+ query.add("f1", "foo");
+ docResults = searcher.search(query, 10);
+ assertEquals(1, docResults.totalHits.value);
+ docId = docResults.scoreDocs[0].doc;
+ labels = new HashSet<>();
+ for (FacetLabel label = labelReader.nextFacetLabel(docId);
+ label != null;
+ label = labelReader.nextFacetLabel(docId)) {
+ labels.add(label);
+ }
+ assertEquals(2, labels.size());
+ assertTrue(
+ labels.containsAll(List.of(new FacetLabel("f1", "foo"), new FacetLabel("f2", "foo"))));
+ labelReader = facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
+ query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
+ query.add("f1", "zed");
+ docResults = searcher.search(query, 10);
+ assertEquals(1, docResults.totalHits.value);
+ docId = docResults.scoreDocs[0].doc;
+ labels = new HashSet<>();
+ for (FacetLabel label = labelReader.nextFacetLabel(docId);
+ label != null;
+ label = labelReader.nextFacetLabel(docId)) {
+ labels.add(label);
+ }
+ assertEquals(1, labels.size());
+ assertTrue(labels.contains(new FacetLabel("f1", "zed")));
+ // And make sure we can read directly from the taxonomy like we'd expect:
+ ord = taxoReader.getOrdinal(new FacetLabel("f1", "foo"));
+ assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+ assertNotNull(taxoReader.getPath(ord));
+ ord = taxoReader.getOrdinal(new FacetLabel("f1", "zed"));
+ assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+ assertNotNull(taxoReader.getPath(ord));
+ // And check a few more direct reads from the taxonomy:
+ ord = taxoReader.getOrdinal(new FacetLabel("a"));
+ assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+ assertNotNull(taxoReader.getPath(ord));
- int ord2 = reader.getOrdinal(new FacetLabel("b"));
- assert ord2 != TaxonomyReader.INVALID_ORDINAL;
+ ord = taxoReader.getOrdinal(new FacetLabel("b"));
+ assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
// Just asserting ord2 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
- assertNotNull(reader.getPath(ord2));
+ assertNotNull(taxoReader.getPath(ord));
- int ord3 = reader.getOrdinal(cp_c);
- assert ord3 != TaxonomyReader.INVALID_ORDINAL;
- assertNotNull(reader.getPath(ord3));
+ ord = taxoReader.getOrdinal(cp_c);
+ assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+ assertNotNull(taxoReader.getPath(ord));
- reader.close();
- writer.close();
- dir.close();
+ IOUtils.close(taxoReader, searcher.getIndexReader(), taxoDir, indexDir);
}
// Opens up a pre-existing index and tries to run getBulkPath on it
public void testGetBulkPathOnOlderCodec() throws Exception {
- Path indexDir = createTempDir(oldTaxonomyIndexName);
- TestUtil.unzip(getDataInputStream(oldTaxonomyIndexName + ".zip"), indexDir);
+ Path indexDir = createTempDir(OLD_TAXONOMY_INDEX_NAME);
+ TestUtil.unzip(getDataInputStream(OLD_TAXONOMY_INDEX_NAME + ".zip"), indexDir);
Directory dir = newFSDirectory(indexDir);
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
@@ -114,21 +282,41 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// Used to create a fresh taxonomy index with StoredFields
@Ignore
public void testCreateOldTaxonomy() throws IOException {
- createOldTaxonomyIndex(oldTaxonomyIndexName);
+ createOldTaxonomyIndex(OLD_TAXONOMY_INDEX_NAME, OLD_INDEX_NAME);
}
- private void createOldTaxonomyIndex(String dirName) throws IOException {
- Path indexDir = getIndexDir().resolve(dirName);
- Files.deleteIfExists(indexDir);
- Directory dir = newFSDirectory(indexDir);
+ private void createOldTaxonomyIndex(String taxoDirName, String indexDirName) throws IOException {
+ Path taxoPath = getIndexDir().resolve(taxoDirName);
+ Path indexPath = getIndexDir().resolve(indexDirName);
+ Files.deleteIfExists(taxoPath);
+ Files.deleteIfExists(indexPath);
- TaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+ Directory taxoDir = newFSDirectory(taxoPath);
+ Directory indexDir = newFSDirectory(indexPath);
- writer.addCategory(new FacetLabel("a"));
- writer.addCategory(new FacetLabel("b"));
- writer.commit();
- writer.close();
- dir.close();
+ TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+ FacetsConfig facetsConfig = new FacetsConfig();
+ RandomIndexWriter indexWriter = new RandomIndexWriter(random(), indexDir);
+
+ Document doc = new Document();
+ doc.add(new FacetField("f1", "foo"));
+ doc.add(new FacetField("f2", "foo"));
+ indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.add(new FacetField("f1", "bar"));
+ indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.add(new FacetField("f2", "bar"));
+ indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
+
+ taxoWriter.addCategory(new FacetLabel("a"));
+ taxoWriter.addCategory(new FacetLabel("b"));
+
+ indexWriter.commit();
+ taxoWriter.commit();
+ IOUtils.close(indexWriter, taxoWriter, indexDir, taxoDir);
}
private Path getIndexDir() {
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/index.8.11.0-cfs.zip b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/index.8.11.0-cfs.zip
new file mode 100644
index 0000000..50fec1d
Binary files /dev/null and b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/index.8.11.0-cfs.zip differ
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip
deleted file mode 100644
index a412ab2..0000000
Binary files a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip and /dev/null differ
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.11.0-cfs.zip b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.11.0-cfs.zip
new file mode 100644
index 0000000..951eaec
Binary files /dev/null and b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.11.0-cfs.zip differ