You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/01/10 17:35:46 UTC
[lucene] branch branch_9x updated: LUCENE-10250: Add support for arbitrary length hierarchical SSDV facets (#509)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new c7650cd LUCENE-10250: Add support for arbitrary length hierarchical SSDV facets (#509)
c7650cd is described below
commit c7650cdec2cb4ed6b31c536a8eb2818c868dc42b
Author: Marc D'mello <ma...@gmail.com>
AuthorDate: Mon Jan 10 08:52:14 2022 -0800
LUCENE-10250: Add support for arbitrary length hierarchical SSDV facets (#509)
---
lucene/CHANGES.txt | 2 +
.../java/org/apache/lucene/facet/FacetsConfig.java | 32 +-
.../ConcurrentSortedSetDocValuesFacetCounts.java | 69 +-
.../DefaultSortedSetDocValuesReaderState.java | 264 ++++++-
.../sortedset/SortedSetDocValuesFacetCounts.java | 74 +-
.../sortedset/SortedSetDocValuesFacetField.java | 32 +-
.../sortedset/SortedSetDocValuesReaderState.java | 133 +++-
.../org/apache/lucene/facet/FacetTestCase.java | 2 +-
.../sortedset/TestSortedSetDocValuesFacets.java | 836 ++++++++++++++++++++-
.../lucene/facet/taxonomy/TestFacetLabel.java | 5 -
10 files changed, 1323 insertions(+), 126 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4123190..9373db11 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -66,6 +66,8 @@ New Features
* LUCENE-10335: Add ModuleResourceLoader as complement to ClasspathResourceLoader.
(Uwe Schindler)
+* LUCENE-10250: Add support for arbitrary length hierarchical SSDV facets. (Marc D'mello)
+
Improvements
---------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
index 53c7050..3d4a57f 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
@@ -478,19 +478,35 @@ public class FacetsConfig {
private void processSSDVFacetFields(
Map<String, List<SortedSetDocValuesFacetField>> byField, Document doc) {
+
for (Map.Entry<String, List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) {
String indexFieldName = ent.getKey();
for (SortedSetDocValuesFacetField facetField : ent.getValue()) {
- FacetLabel facetLabel = new FacetLabel(facetField.dim, facetField.label);
- String fullPath = pathToString(facetLabel.components, facetLabel.length);
-
- // For facet counts:
- doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
-
+ FacetLabel facetLabel = new FacetLabel(facetField.dim, facetField.path);
+ DimConfig dimConfig = getDimConfig(facetField.dim);
+ if (dimConfig.hierarchical) {
+ for (int i = 0; i < facetLabel.length; i++) {
+ String fullPath = pathToString(facetLabel.components, i + 1);
+ // For facet counts:
+ doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
+ }
+ } else {
+ if (facetLabel.length != 2) {
+ throw new IllegalArgumentException(
+ "dimension \""
+ + facetField.dim
+ + "\" is not hierarchical yet has "
+ + facetField.path.length
+ + " components");
+ }
+ String fullPath = pathToString(facetLabel.components, facetLabel.length);
+ // For facet counts:
+ doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
+ }
// For drill-down:
- indexDrillDownTerms(doc, indexFieldName, getDimConfig(facetField.dim), facetLabel);
+ indexDrillDownTerms(doc, indexFieldName, dimConfig, facetLabel);
}
}
}
@@ -599,7 +615,7 @@ public class FacetsConfig {
private static final char ESCAPE_CHAR = '\u001E';
/** Turns a dim + path into an encoded string. */
- public static String pathToString(String dim, String[] path) {
+ public static String pathToString(String dim, String... path) {
String[] fullPath = new String[1 + path.length];
fullPath[0] = dim;
System.arraycopy(path, 0, fullPath, 1, path.length);
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
index 1ee7863..5035235 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
@@ -22,7 +22,8 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
-import java.util.Map;
+import java.util.Objects;
+import java.util.PrimitiveIterator;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
@@ -64,10 +65,13 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
final ExecutorService exec;
final SortedSetDocValuesReaderState state;
+ final FacetsConfig stateConfig;
final SortedSetDocValues dv;
final String field;
final AtomicIntegerArray counts;
+ private static final String[] emptyPath = new String[0];
+
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
public ConcurrentSortedSetDocValuesFacetCounts(
SortedSetDocValuesReaderState state, ExecutorService exec)
@@ -81,6 +85,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
throws IOException, InterruptedException {
this.state = state;
this.field = state.getField();
+ this.stateConfig = Objects.requireNonNullElse(state.getFacetsConfig(), new FacetsConfig());
this.exec = exec;
dv = state.getDocValues();
counts = new AtomicIntegerArray(state.getSize());
@@ -97,17 +102,32 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
- if (path.length > 0) {
- throw new IllegalArgumentException("path should be 0 length");
- }
- OrdRange ordRange = state.getOrdRange(dim);
- if (ordRange == null) {
- return null; // means dimension was never indexed
+
+ if (stateConfig.getDimConfig(dim).hierarchical) {
+ int pathOrd = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
+ if (pathOrd < 0) {
+ // path was never indexed
+ return null;
+ }
+ SortedSetDocValuesReaderState.DimTree dimTree = state.getDimTree(dim);
+ return getDim(dim, path, pathOrd, dimTree.iterator(pathOrd), topN);
+ } else {
+ if (path.length > 0) {
+ throw new IllegalArgumentException(
+ "Field is not configured as hierarchical, path should be 0 length");
+ }
+ OrdRange ordRange = state.getOrdRange(dim);
+ if (ordRange == null) {
+ // means dimension was never indexed
+ return null;
+ }
+ return getDim(dim, null, -1, ordRange.iterator(), topN);
}
- return getDim(dim, ordRange, topN);
}
- private FacetResult getDim(String dim, OrdRange ordRange, int topN) throws IOException {
+ private FacetResult getDim(
+ String dim, String[] path, int pathOrd, PrimitiveIterator.OfInt childOrds, int topN)
+ throws IOException {
TopOrdAndIntQueue q = null;
@@ -118,7 +138,8 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
TopOrdAndIntQueue.OrdAndValue reuse = null;
- for (int ord = ordRange.start; ord <= ordRange.end; ord++) {
+ while (childOrds.hasNext()) {
+ int ord = childOrds.next();
if (counts.get(ord) > 0) {
dimCount += counts.get(ord);
childCount++;
@@ -148,12 +169,19 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for (int i = labelValues.length - 1; i >= 0; i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
+ assert ordAndValue != null;
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
- labelValues[i] = new LabelAndValue(parts[1], ordAndValue.value);
+ labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
}
- return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
+ if (pathOrd == -1) {
+ // not hierarchical facet
+ return new FacetResult(dim, emptyPath, dimCount, labelValues, childCount);
+ } else {
+ // hierarchical facet
+ return new FacetResult(dim, path, counts.get(pathOrd), labelValues, childCount);
+ }
}
private class CountOneSegment implements Callable<Void> {
@@ -365,10 +393,19 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
public List<FacetResult> getAllDims(int topN) throws IOException {
List<FacetResult> results = new ArrayList<>();
- for (Map.Entry<String, OrdRange> ent : state.getPrefixToOrdRange().entrySet()) {
- FacetResult fr = getDim(ent.getKey(), ent.getValue(), topN);
- if (fr != null) {
- results.add(fr);
+ for (String dim : state.getDims()) {
+ if (stateConfig.getDimConfig(dim).hierarchical) {
+ SortedSetDocValuesReaderState.DimTree dimTree = state.getDimTree(dim);
+ FacetResult fr = getDim(dim, emptyPath, dimTree.dimStartOrd, dimTree.iterator(), topN);
+ if (fr != null) {
+ results.add(fr);
+ }
+ } else {
+ OrdRange ordRange = state.getOrdRange(dim);
+ FacetResult fr = getDim(dim, emptyPath, -1, ordRange.iterator(), topN);
+ if (fr != null) {
+ results.add(fr);
+ }
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
index 08ad9e9..2642280 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
@@ -17,12 +17,15 @@
package org.apache.lucene.facet.sortedset;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
import java.util.Map;
+import java.util.Stack;
import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@@ -51,20 +54,42 @@ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesRead
private final Map<String, OrdinalMap> cachedOrdMaps = new HashMap<>();
+ private final FacetsConfig config;
+
+ /** Used for hierarchical dims. */
+ private final Map<String, DimTree> prefixToDimTree = new HashMap<>();
+
+ /** Used for flat dims. */
private final Map<String, OrdRange> prefixToOrdRange = new HashMap<>();
/**
- * Creates this, pulling doc values from the default {@link
+ * Creates this with a config, pulling doc values from the default {@link
+ * FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
+ */
+ public DefaultSortedSetDocValuesReaderState(IndexReader reader, FacetsConfig config)
+ throws IOException {
+ this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME, config);
+ }
+
+ /**
+ * Creates this without a config, pulling doc values from the default {@link
* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
*/
public DefaultSortedSetDocValuesReaderState(IndexReader reader) throws IOException {
- this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
+ this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME, null);
}
- /** Creates this, pulling doc values from the specified field. */
+ /** Creates this without a config, pulling doc values from the specified field. */
public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
+ this(reader, field, null);
+ }
+
+ /** Creates this, pulling doc values from the specified field. */
+ public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field, FacetsConfig config)
+ throws IOException {
this.field = field;
this.reader = reader;
+ this.config = config;
// We need this to create thread-safe MultiSortedSetDV
// per collector:
@@ -79,38 +104,139 @@ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesRead
}
valueCount = (int) dv.getValueCount();
- // TODO: we can make this more efficient if eg we can be
- // "involved" when OrdinalMap is being created? Ie see
- // each term/ord it's assigning as it goes...
- String lastDim = null;
- int startOrd = -1;
-
- // TODO: this approach can work for full hierarchy?;
- // TaxoReader can't do this since ords are not in
- // "sorted order" ... but we should generalize this to
- // support arbitrary hierarchy:
- for (int ord = 0; ord < valueCount; ord++) {
- final BytesRef term = dv.lookupOrd(ord);
+ int ord = 0;
+ while (ord != valueCount) {
+ BytesRef term = dv.lookupOrd(ord);
String[] components = FacetsConfig.stringToPath(term.utf8ToString());
- if (components.length != 2) {
- throw new IllegalArgumentException(
- "this class can only handle 2 level hierarchy (dim/value); got: "
- + Arrays.toString(components)
- + " "
- + term.utf8ToString());
+ String dim = components[0];
+ if (config != null && config.getDimConfig(dim).hierarchical) {
+ ord = createOneHierarchicalFacetDimState(dv, ord) + 1;
+ } else {
+ ord = createOneFlatFacetDimState(dv, ord) + 1;
}
- if (!components[0].equals(lastDim)) {
- if (lastDim != null) {
- prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord - 1));
+ }
+ }
+
+ // returns last ord of dimension
+ private int createOneHierarchicalFacetDimState(SortedSetDocValues dv, int dimStartOrd)
+ throws IOException {
+ List<Boolean> hasChildren = new ArrayList<>();
+ List<Integer> siblings = new ArrayList<>();
+
+ // stack of paths with unfulfilled siblings
+ Stack<OrdAndComponent> siblingStack = new Stack<>();
+
+ int dimEndOrd = dimStartOrd;
+
+ BytesRef nextTerm = dv.lookupOrd(dimEndOrd);
+ String[] nextComponents = FacetsConfig.stringToPath(nextTerm.utf8ToString());
+ String dim = nextComponents[0];
+
+ while (true) {
+ String[] components = nextComponents;
+
+ int ord = dimEndOrd - dimStartOrd;
+
+ while (siblingStack.empty() == false
+ && siblingStack.peek().component.length >= components.length) {
+ OrdAndComponent possibleSibling = siblingStack.pop();
+ if (possibleSibling.component.length == components.length) {
+ // lengths are equal, all non-siblings of equal length will have already been popped off
+ // so this must be sibling
+ siblings.set(possibleSibling.ord, ord);
}
- startOrd = ord;
- lastDim = components[0];
}
+
+ if (dimEndOrd + 1 == valueCount) {
+ // current ord needs to be added, can't have children or siblings
+ siblings.add(-1);
+ hasChildren.add(false);
+ break;
+ }
+
+ nextTerm = dv.lookupOrd(dimEndOrd + 1);
+ nextComponents = FacetsConfig.stringToPath(nextTerm.utf8ToString());
+
+ if (nextComponents[0].equals(components[0]) == false) {
+ // current ord needs to be added, can't have children or siblings
+ siblings.add(-1);
+ hasChildren.add(false);
+ break;
+ }
+
+ if (components.length < nextComponents.length) {
+ // next ord must be a direct child of current ord, this is because we are indexing all
+ // ancestral paths
+ hasChildren.add(ord, true);
+ // we don't know if this ord has a sibling or where it's sibling could be yet
+ siblingStack.push(new OrdAndComponent(ord, components));
+ // we still add INVALID_ORDINAL, which will be replaced if a valid sibling is found
+ siblings.add(ord, INVALID_ORDINAL);
+ } else if (components.length == nextComponents.length) {
+ // next ord must be a sibling of current and there are no direct children of current, this
+ // is because we
+ // are indexing all ancestral paths
+ siblings.add(ord, ord + 1);
+ hasChildren.add(ord, false);
+ } else {
+ // components.length > nextComponents.length
+ // next ord is neither sibling nor child
+ siblings.add(ord, INVALID_ORDINAL);
+ hasChildren.add(ord, false);
+ }
+
+ dimEndOrd++;
}
- if (lastDim != null) {
- prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount - 1));
+ prefixToDimTree.put(dim, new DimTree(dimStartOrd, siblings, hasChildren));
+
+ return dimEndOrd;
+ }
+
+ // returns last ord of dimension
+ private int createOneFlatFacetDimState(SortedSetDocValues dv, int dimStartOrd)
+ throws IOException {
+
+ int dimEndOrd = dimStartOrd;
+
+ BytesRef nextTerm = dv.lookupOrd(dimEndOrd);
+ String[] nextComponents = FacetsConfig.stringToPath(nextTerm.utf8ToString());
+ if (nextComponents.length != 2) {
+ throw new IllegalArgumentException(
+ "dimension not configured to handle hierarchical field; got: "
+ + Arrays.toString(nextComponents)
+ + " "
+ + nextTerm.utf8ToString());
}
+ String dim = nextComponents[0];
+
+ while (true) {
+ String[] components = nextComponents;
+
+ if (dimEndOrd + 1 == valueCount) {
+ break;
+ }
+
+ nextTerm = dv.lookupOrd(dimEndOrd + 1);
+ nextComponents = FacetsConfig.stringToPath(nextTerm.utf8ToString());
+
+ if (nextComponents[0].equals(components[0]) == false) {
+ break;
+ }
+
+ if (nextComponents.length != 2) {
+ throw new IllegalArgumentException(
+ "dimension not configured to handle hierarchical field; got: "
+ + Arrays.toString(nextComponents)
+ + " "
+ + nextTerm.utf8ToString());
+ }
+
+ dimEndOrd++;
+ }
+ prefixToOrdRange.put(dim, new OrdRange(dimStartOrd, dimEndOrd));
+
+ return dimEndOrd;
}
/** Return the memory usage of this object in bytes. Negative values are illegal. */
@@ -194,18 +320,6 @@ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesRead
return new MultiSortedSetDocValues(values, starts, map, cost);
}
- /** Returns mapping from prefix to {@link OrdRange}. */
- @Override
- public Map<String, OrdRange> getPrefixToOrdRange() {
- return prefixToOrdRange;
- }
-
- /** Returns the {@link OrdRange} for this dimension. */
- @Override
- public OrdRange getOrdRange(String dim) {
- return prefixToOrdRange.get(dim);
- }
-
/** Indexed field we are reading. */
@Override
public String getField() {
@@ -222,4 +336,72 @@ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesRead
public int getSize() {
return valueCount;
}
+
+ @Override
+ public FacetsConfig getFacetsConfig() {
+ return config;
+ }
+
+ @Override
+ public Iterable<String> getDims() {
+ return () ->
+ new Iterator<>() {
+
+ final Iterator<String> dimTreeIterator = prefixToDimTree.keySet().iterator();
+ final Iterator<String> ordRangeIterator = prefixToOrdRange.keySet().iterator();
+
+ @Override
+ public boolean hasNext() {
+ return ordRangeIterator.hasNext() || dimTreeIterator.hasNext();
+ }
+
+ @Override
+ public String next() {
+ if (dimTreeIterator.hasNext()) {
+ return dimTreeIterator.next();
+ } else if (ordRangeIterator.hasNext()) {
+ return ordRangeIterator.next();
+ } else {
+ return null;
+ }
+ }
+ };
+ }
+
+ /* Flat facet operations */
+
+ @Override
+ public Map<String, OrdRange> getPrefixToOrdRange() {
+ return prefixToOrdRange;
+ }
+
+ @Override
+ public OrdRange getOrdRange(String dim) {
+ if (config != null && config.getDimConfig(dim).hierarchical) {
+ throw new UnsupportedOperationException(
+ "This operation is only supported for flat dimensions");
+ }
+ return prefixToOrdRange.get(dim);
+ }
+
+ /* Hierarchical facet operations */
+
+ @Override
+ public DimTree getDimTree(String dim) {
+ if (config == null || config.getDimConfig(dim).hierarchical == false) {
+ throw new UnsupportedOperationException(
+ "This opperation is only supported for hierarchical facets");
+ }
+ return prefixToDimTree.get(dim);
+ }
+
+ private static final class OrdAndComponent {
+ int ord;
+ String[] component;
+
+ public OrdAndComponent(int ord, String[] component) {
+ this.ord = ord;
+ this.component = component;
+ }
+ }
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
index 19ff96f..10351fc 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
@@ -22,7 +22,8 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
-import java.util.Map;
+import java.util.Objects;
+import java.util.PrimitiveIterator;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetUtils;
import org.apache.lucene.facet.Facets;
@@ -31,6 +32,7 @@ import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.TopOrdAndIntQueue;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.DimTree;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
@@ -66,10 +68,13 @@ import org.apache.lucene.util.LongValues;
public class SortedSetDocValuesFacetCounts extends Facets {
final SortedSetDocValuesReaderState state;
+ final FacetsConfig stateConfig;
final SortedSetDocValues dv;
final String field;
final int[] counts;
+ private static final String[] emptyPath = new String[0];
+
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException {
this(state, null);
@@ -80,8 +85,9 @@ public class SortedSetDocValuesFacetCounts extends Facets {
throws IOException {
this.state = state;
this.field = state.getField();
- dv = state.getDocValues();
- counts = new int[state.getSize()];
+ this.stateConfig = Objects.requireNonNullElse(state.getFacetsConfig(), new FacetsConfig());
+ this.dv = state.getDocValues();
+ this.counts = new int[state.getSize()];
if (hits == null) {
// browse only
countAll();
@@ -95,17 +101,32 @@ public class SortedSetDocValuesFacetCounts extends Facets {
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
- if (path.length > 0) {
- throw new IllegalArgumentException("path should be 0 length");
- }
- OrdRange ordRange = state.getOrdRange(dim);
- if (ordRange == null) {
- return null; // means dimension was never indexed
+
+ if (stateConfig.getDimConfig(dim).hierarchical) {
+ int pathOrd = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
+ if (pathOrd < 0) {
+ // path was never indexed
+ return null;
+ }
+ DimTree dimTree = state.getDimTree(dim);
+ return getDim(dim, path, pathOrd, dimTree.iterator(pathOrd), topN);
+ } else {
+ if (path.length > 0) {
+ throw new IllegalArgumentException(
+ "Field is not configured as hierarchical, path should be 0 length");
+ }
+ OrdRange ordRange = state.getOrdRange(dim);
+ if (ordRange == null) {
+ // means dimension was never indexed
+ return null;
+ }
+ return getDim(dim, null, -1, ordRange.iterator(), topN);
}
- return getDim(dim, ordRange, topN);
}
- private FacetResult getDim(String dim, OrdRange ordRange, int topN) throws IOException {
+ private FacetResult getDim(
+ String dim, String[] path, int pathOrd, PrimitiveIterator.OfInt childOrds, int topN)
+ throws IOException {
TopOrdAndIntQueue q = null;
@@ -115,7 +136,8 @@ public class SortedSetDocValuesFacetCounts extends Facets {
int childCount = 0;
TopOrdAndIntQueue.OrdAndValue reuse = null;
- for (int ord = ordRange.start; ord <= ordRange.end; ord++) {
+ while (childOrds.hasNext()) {
+ int ord = childOrds.next();
if (counts[ord] > 0) {
dimCount += counts[ord];
childCount++;
@@ -145,12 +167,19 @@ public class SortedSetDocValuesFacetCounts extends Facets {
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for (int i = labelValues.length - 1; i >= 0; i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
+ assert ordAndValue != null;
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
- labelValues[i] = new LabelAndValue(parts[1], ordAndValue.value);
+ labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
}
- return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
+ if (pathOrd == -1) {
+ // not hierarchical facet
+ return new FacetResult(dim, emptyPath, dimCount, labelValues, childCount);
+ } else {
+ // hierarchical facet
+ return new FacetResult(dim, path, counts[pathOrd], labelValues, childCount);
+ }
}
private void countOneSegment(
@@ -317,10 +346,19 @@ public class SortedSetDocValuesFacetCounts extends Facets {
public List<FacetResult> getAllDims(int topN) throws IOException {
List<FacetResult> results = new ArrayList<>();
- for (Map.Entry<String, OrdRange> ent : state.getPrefixToOrdRange().entrySet()) {
- FacetResult fr = getDim(ent.getKey(), ent.getValue(), topN);
- if (fr != null) {
- results.add(fr);
+ for (String dim : state.getDims()) {
+ if (stateConfig.getDimConfig(dim).hierarchical) {
+ DimTree dimTree = state.getDimTree(dim);
+ FacetResult fr = getDim(dim, emptyPath, dimTree.dimStartOrd, dimTree.iterator(), topN);
+ if (fr != null) {
+ results.add(fr);
+ }
+ } else {
+ OrdRange ordRange = state.getOrdRange(dim);
+ FacetResult fr = getDim(dim, emptyPath, -1, ordRange.iterator(), topN);
+ if (fr != null) {
+ results.add(fr);
+ }
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetField.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetField.java
index 68fd540..6138c6b 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetField.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetField.java
@@ -19,6 +19,7 @@ package org.apache.lucene.facet.sortedset;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.facet.FacetField;
+import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.index.IndexOptions;
/**
@@ -40,20 +41,39 @@ public class SortedSetDocValuesFacetField extends Field {
/** Dimension. */
public final String dim;
- /** Label. */
- public final String label;
+ /** Path. */
+ public final String[] path;
+
+ /**
+ * String form of path.
+ *
+ * @deprecated This field will be removed in a future version. {@link
+ * FacetsConfig#pathToString(String[])} can be applied to {@code path} as a replacement if
+ * string path is desired.
+ */
+ @Deprecated public final String label;
/** Sole constructor. */
- public SortedSetDocValuesFacetField(String dim, String label) {
+ public SortedSetDocValuesFacetField(String dim, String... path) {
super("dummy", TYPE);
- FacetField.verifyLabel(label);
+ for (String label : path) {
+ FacetField.verifyLabel(label);
+ }
FacetField.verifyLabel(dim);
+ if (path.length == 0) {
+ throw new IllegalArgumentException("path must have at least one element");
+ }
this.dim = dim;
- this.label = label;
+ this.path = path;
+ this.label = FacetsConfig.pathToString(path);
}
@Override
public String toString() {
- return "SortedSetDocValuesFacetField(dim=" + dim + " label=" + label + ")";
+ return "SortedSetDocValuesFacetField(dim="
+ + dim
+ + " path="
+ + FacetsConfig.pathToString(path)
+ + ")";
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java
index 1c2bc41..0d712d5 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java
@@ -17,10 +17,14 @@
package org.apache.lucene.facet.sortedset;
import java.io.IOException;
+import java.util.List;
import java.util.Map;
+import java.util.PrimitiveIterator;
+import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.FixedBitSet;
/**
* Wraps a {@link IndexReader} and resolves ords using existing {@link SortedSetDocValues} APIs
@@ -36,10 +40,7 @@ import org.apache.lucene.util.Accountable;
*/
public abstract class SortedSetDocValuesReaderState implements Accountable {
- /**
- * Holds start/end range of ords, which maps to one dimension (someday we may generalize it to map
- * to hierarchies within one dimension).
- */
+ /** Holds start/end range of ords, which maps to one dimension. Only used for flat hierarchies. */
public static final class OrdRange {
/** Start of range, inclusive: */
public final int start;
@@ -51,8 +52,111 @@ public abstract class SortedSetDocValuesReaderState implements Accountable {
this.start = start;
this.end = end;
}
+
+ /** Iterates from start to end ord (inclusive) */
+ public PrimitiveIterator.OfInt iterator() {
+ return new PrimitiveIterator.OfInt() {
+ int current = start;
+
+ @Override
+ public int nextInt() {
+ if (current > end) {
+ return INVALID_ORDINAL;
+ }
+ return current++;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return current <= end;
+ }
+ };
+ }
+ }
+
+ /**
+ * Holds children and sibling information for a single dimension. Only used with hierarchical
+ * dimensions.
+ */
+ public static final class DimTree {
+ private final FixedBitSet hasChildren;
+ // TODO: This array can take up a lot of space. Change type based on input size maybe?
+ private final int[] siblings;
+
+ /** The first ord of the dimension */
+ public final int dimStartOrd;
+
+ /** Sibling and children must be of same length */
+ public DimTree(int dimStartOrd, List<Integer> sibling, List<Boolean> hasChildren) {
+ if (sibling.size() != hasChildren.size()) {
+ throw new IllegalArgumentException(
+ "Sibling list and children list must have the same size. Got sibling list size of "
+ + sibling.size()
+ + " and child list size of "
+ + hasChildren.size());
+ }
+ this.hasChildren = new FixedBitSet(hasChildren.size());
+ this.siblings = new int[sibling.size()];
+ for (int i = 0; i < sibling.size(); i++) {
+ if (hasChildren.get(i)) {
+ assert i < sibling.size() - 1;
+ this.hasChildren.set(i);
+ }
+ assert this.siblings[i] < sibling.size();
+ this.siblings[i] = sibling.get(i);
+ }
+ this.dimStartOrd = dimStartOrd;
+ }
+
+ /** Iterates through all first level children of dimension */
+ public PrimitiveIterator.OfInt iterator() {
+ return iterator(dimStartOrd);
+ }
+
+ /** Iterates through all children of given pathOrd */
+ public PrimitiveIterator.OfInt iterator(int pathOrd) {
+ return new PrimitiveIterator.OfInt() {
+
+ boolean atStart = true;
+ int currentOrd = pathOrd - dimStartOrd;
+
+ @Override
+ public int nextInt() {
+ if (atStart) {
+ if (currentOrd < 0 || currentOrd >= hasChildren.length()) {
+ return INVALID_ORDINAL;
+ }
+ atStart = false;
+ if (hasChildren.get(currentOrd)) {
+ currentOrd++;
+ return currentOrd + dimStartOrd;
+ } else {
+ return INVALID_ORDINAL;
+ }
+ } else {
+ currentOrd = siblings[currentOrd];
+ return currentOrd + dimStartOrd;
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (atStart) {
+ if (currentOrd < 0 || currentOrd >= hasChildren.length()) {
+ return false;
+ }
+ return hasChildren.get(currentOrd);
+ } else {
+ return siblings[currentOrd] != INVALID_ORDINAL;
+ }
+ }
+ };
+ }
}
+ /** Invalid ordinal const */
+ public static final int INVALID_ORDINAL = -1;
+
/** Sole constructor. */
protected SortedSetDocValuesReaderState() {}
@@ -62,15 +166,28 @@ public abstract class SortedSetDocValuesReaderState implements Accountable {
/** Indexed field we are reading. */
public abstract String getField();
+ /** Returns top-level index reader. */
+ public abstract IndexReader getReader();
+
+ /** Number of unique labels. */
+ public abstract int getSize();
+
+ /** Returns the associated facet config. */
+ public abstract FacetsConfig getFacetsConfig();
+
+ /* Only used for flat facets (dim/value) */
+
/** Returns the {@link OrdRange} for this dimension. */
public abstract OrdRange getOrdRange(String dim);
/** Returns mapping from prefix to {@link OrdRange}. */
public abstract Map<String, OrdRange> getPrefixToOrdRange();
- /** Returns top-level index reader. */
- public abstract IndexReader getReader();
+ /* Only used for hierarchical facets */
- /** Number of unique labels. */
- public abstract int getSize();
+ /** Returns mapping from prefix to {@link DimTree} */
+ public abstract DimTree getDimTree(String dim);
+
+ /** Returns a list of all dimensions */
+ public abstract Iterable<String> getDims();
}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java
index e945aa8..70b4d2f 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java
@@ -238,7 +238,7 @@ public abstract class FacetTestCase extends LuceneTestCase {
} else if (b.value.doubleValue() > a.value.doubleValue()) {
return 1;
} else {
- return 0;
+ return a.dim.compareTo(b.dim);
}
}
});
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java
index d6e9b0b..3f72a76 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java
@@ -18,14 +18,19 @@ package org.apache.lucene.facet.sortedset;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
@@ -47,6 +52,7 @@ import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NamedThreadFactory;
public class TestSortedSetDocValuesFacets extends FacetTestCase {
@@ -85,6 +91,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
try {
Facets facets = getAllFacets(searcher, state, exec);
+ // value should ideally be 2 but SSDV facets are bugged here
assertEquals(
"dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n",
facets.getTopChildren(10, "a").toString());
@@ -105,6 +112,84 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
}
+ public void testBasicHierarchical() throws Exception {
+ FacetsConfig config = new FacetsConfig();
+ config.setMultiValued("a", true);
+ config.setMultiValued("c", true);
+ config.setHierarchical("c", true);
+ try (Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("a", "foo"));
+ doc.add(new SortedSetDocValuesFacetField("a", "bar"));
+ doc.add(new SortedSetDocValuesFacetField("a", "zoo"));
+ doc.add(new SortedSetDocValuesFacetField("b", "baz"));
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz"));
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bee"));
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bif"));
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bif", "baf"));
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz", "biz"));
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz", "biz", "bar"));
+ writer.addDocument(config.build(doc));
+ if (random().nextBoolean()) {
+ writer.commit();
+ }
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("a", "foo"));
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bif", "baf"));
+ writer.addDocument(config.build(doc));
+
+ // NRT open
+ try (IndexReader r = writer.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+
+ // Per-top-reader state:
+ SortedSetDocValuesReaderState state =
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
+
+ ExecutorService exec = randomExecutorServiceOrNull();
+ try {
+ Facets facets = getAllFacets(searcher, state, exec);
+
+ // since a is not set to be hierarchical, it's value count will be bugged as ancestral
+ // paths are not indexed
+ assertEquals(
+ "dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n",
+ facets.getTopChildren(10, "a").toString());
+ assertEquals(
+ "dim=b path=[] value=1 childCount=1\n baz (1)\n",
+ facets.getTopChildren(10, "b").toString());
+ assertEquals(
+ "dim=c path=[buzz] value=2 childCount=3\n bif (2)\n bee (1)\n biz (1)\n",
+ facets.getTopChildren(10, "c", "buzz").toString());
+ assertEquals(
+ "dim=c path=[buzz, bif] value=2 childCount=1\n baf (2)\n",
+ facets.getTopChildren(10, "c", "buzz", "bif").toString());
+
+ // DrillDown:
+ DrillDownQuery q = new DrillDownQuery(config);
+ q.add("a", "foo");
+ q.add("b", "baz");
+ TopDocs hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("c", "buzz", "bif");
+ hits = searcher.search(q, 2);
+ assertEquals(2, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("c", "buzz", "biz", "bar");
+ hits = searcher.search(q, 2);
+ assertEquals(1, hits.totalHits.value);
+ } finally {
+ if (exec != null) exec.shutdownNow();
+ }
+ }
+ }
+ }
+
// See: LUCENE-10070
public void testCountAll() throws Exception {
try (Directory dir = newDirectory();
@@ -158,6 +243,67 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
}
+ public void testHierarchicalCountAll() throws Exception {
+ try (Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
+ FacetsConfig config = new FacetsConfig();
+ config.setHierarchical("b", true);
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "0", Field.Store.NO));
+ doc.add(new SortedSetDocValuesFacetField("a", "foo"));
+ doc.add(new SortedSetDocValuesFacetField("b", "buzz", "bee"));
+ writer.addDocument(config.build(doc));
+
+ doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new SortedSetDocValuesFacetField("a", "bar"));
+ doc.add(new SortedSetDocValuesFacetField("b", "buzz", "baz"));
+ writer.addDocument(config.build(doc));
+
+ writer.deleteDocuments(new Term("id", "0"));
+
+ // NRT open
+ try (IndexReader r = writer.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+
+ // Per-top-reader state:
+ SortedSetDocValuesReaderState state =
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
+
+ Facets facets = new SortedSetDocValuesFacetCounts(state);
+
+ assertEquals(
+ "dim=a path=[] value=1 childCount=1\n bar (1)\n",
+ facets.getTopChildren(10, "a").toString());
+ assertEquals(
+ "dim=b path=[buzz] value=1 childCount=1\n baz (1)\n",
+ facets.getTopChildren(10, "b", "buzz").toString());
+
+ ExecutorService exec =
+ new ThreadPoolExecutor(
+ 1,
+ TestUtil.nextInt(random(), 2, 6),
+ Long.MAX_VALUE,
+ TimeUnit.MILLISECONDS,
+ new LinkedBlockingQueue<Runnable>(),
+ new NamedThreadFactory("TestIndexSearcher"));
+ try {
+ facets = new ConcurrentSortedSetDocValuesFacetCounts(state, exec);
+
+ assertEquals(
+ "dim=a path=[] value=1 childCount=1\n bar (1)\n",
+ facets.getTopChildren(10, "a").toString());
+ assertEquals(
+ "dim=b path=[buzz] value=1 childCount=1\n baz (1)\n",
+ facets.getTopChildren(10, "b", "buzz").toString());
+ } finally {
+ exec.shutdownNow();
+ }
+ }
+ }
+ }
+
public void testBasicSingleValued() throws Exception {
FacetsConfig config = new FacetsConfig();
config.setMultiValued("a", false);
@@ -210,6 +356,57 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
}
+ public void testHierarchicalBasicSingleValues() throws Exception {
+ FacetsConfig config = new FacetsConfig();
+ config.setHierarchical("c", true);
+ try (Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bar"));
+ writer.addDocument(config.build(doc));
+ doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("c", "buzz", "baz"));
+ writer.addDocument(config.build(doc));
+ if (random().nextBoolean()) {
+ writer.commit();
+ }
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("c", "baz"));
+ writer.addDocument(config.build(doc));
+
+ // NRT open
+ try (IndexReader r = writer.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+
+ // Per-top-reader state:
+ SortedSetDocValuesReaderState state =
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
+
+ ExecutorService exec = randomExecutorServiceOrNull();
+ try {
+ Facets facets = getAllFacets(searcher, state, exec);
+
+ assertEquals(
+ "dim=c path=[buzz] value=2 childCount=2\n bar (1)\n baz (1)\n",
+ facets.getTopChildren(10, "c", "buzz").toString());
+
+ DrillDownQuery q = new DrillDownQuery(config);
+ q.add("c", "buzz");
+ TopDocs hits = searcher.search(q, 1);
+ assertEquals(2, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("c", "buzz", "bar");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+ } finally {
+ if (exec != null) exec.shutdownNow();
+ }
+ }
+ }
+ }
+
public void testDrillDownOptions() throws Exception {
FacetsConfig config = new FacetsConfig();
config.setDrillDownTermsIndexing("c", FacetsConfig.DrillDownTermsIndexing.NONE);
@@ -293,6 +490,144 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
}
+ public void testHierarchicalDrillDownOptions() throws Exception {
+ FacetsConfig config = new FacetsConfig();
+ config.setDrillDownTermsIndexing("c", FacetsConfig.DrillDownTermsIndexing.NONE);
+ config.setDrillDownTermsIndexing(
+ "d", FacetsConfig.DrillDownTermsIndexing.DIMENSION_AND_FULL_PATH);
+ config.setDrillDownTermsIndexing("e", FacetsConfig.DrillDownTermsIndexing.ALL_PATHS_NO_DIM);
+ config.setDrillDownTermsIndexing("f", FacetsConfig.DrillDownTermsIndexing.FULL_PATH_ONLY);
+ config.setDrillDownTermsIndexing("g", FacetsConfig.DrillDownTermsIndexing.ALL);
+ config.setHierarchical("c", true);
+ config.setHierarchical("d", true);
+ config.setHierarchical("e", true);
+ config.setHierarchical("f", true);
+ config.setHierarchical("g", true);
+ try (Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("c", "biz", "baz"));
+ doc.add(new SortedSetDocValuesFacetField("d", "biz", "baz"));
+ doc.add(new SortedSetDocValuesFacetField("e", "biz", "baz"));
+ doc.add(new SortedSetDocValuesFacetField("f", "biz", "baz"));
+ doc.add(new SortedSetDocValuesFacetField("g", "biz", "baz"));
+ writer.addDocument(config.build(doc));
+ if (random().nextBoolean()) {
+ writer.commit();
+ }
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("a", "foo"));
+ writer.addDocument(config.build(doc));
+
+ // NRT open
+ try (IndexReader r = writer.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+ // Drill down with different indexing configuration options
+ DrillDownQuery q = new DrillDownQuery(config);
+ q.add("c");
+ TopDocs hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("c", "biz");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("c", "biz", "baz");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("c", "foo");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("d");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("d", "foo");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("d", "biz");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("d", "biz", "baz");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("e");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("e", "foo");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("e", "biz");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("e", "biz", "baz");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("f");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("f", "foo");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("f", "biz");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("f", "biz", "baz");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("g");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("g", "foo");
+ hits = searcher.search(q, 1);
+ assertEquals(0, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("g", "biz");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+
+ q = new DrillDownQuery(config);
+ q.add("g", "biz", "baz");
+ hits = searcher.search(q, 1);
+ assertEquals(1, hits.totalHits.value);
+ }
+ }
+ }
+
// LUCENE-5090
@SuppressWarnings("unused")
public void testStaleState() throws Exception {
@@ -319,29 +654,102 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
try (IndexReader r2 = writer.getReader()) {
IndexSearcher searcher = newSearcher(r2);
- FacetsCollector c = new FacetsCollector();
+ FacetsCollector c = new FacetsCollector();
+
+ searcher.search(new MatchAllDocsQuery(), c);
+
+ expectThrows(
+ IllegalStateException.class,
+ () -> {
+ new SortedSetDocValuesFacetCounts(state, c);
+ });
+ }
+ }
+ }
+ }
+
+ // LUCENE-5333
+ public void testSparseFacets() throws Exception {
+ try (Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
+
+ FacetsConfig config = new FacetsConfig();
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("a", "foo1"));
+ writer.addDocument(config.build(doc));
+
+ if (random().nextBoolean()) {
+ writer.commit();
+ }
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("a", "foo2"));
+ doc.add(new SortedSetDocValuesFacetField("b", "bar1"));
+ writer.addDocument(config.build(doc));
+
+ if (random().nextBoolean()) {
+ writer.commit();
+ }
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("a", "foo3"));
+ doc.add(new SortedSetDocValuesFacetField("b", "bar2"));
+ doc.add(new SortedSetDocValuesFacetField("c", "baz1"));
+ writer.addDocument(config.build(doc));
+
+ // NRT open
+ try (IndexReader r = writer.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+
+ // Per-top-reader state:
+ SortedSetDocValuesReaderState state =
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
+
+ ExecutorService exec = randomExecutorServiceOrNull();
+ try {
+ Facets facets = getAllFacets(searcher, state, exec);
+
+ // Ask for top 10 labels for any dims that have counts:
+ List<FacetResult> results = facets.getAllDims(10);
- searcher.search(new MatchAllDocsQuery(), c);
+ assertEquals(3, results.size());
+ assertEquals(
+ "dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n",
+ results.get(0).toString());
+ assertEquals(
+ "dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n",
+ results.get(1).toString());
+ assertEquals(
+ "dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(2).toString());
- expectThrows(
- IllegalStateException.class,
- () -> {
- new SortedSetDocValuesFacetCounts(state, c);
- });
+ Collection<Accountable> resources = state.getChildResources();
+ assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
+ if (searcher.getIndexReader().leaves().size() > 1) {
+ assertTrue(state.ramBytesUsed() > 0);
+ assertFalse(resources.isEmpty());
+ assertTrue(resources.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
+ } else {
+ assertEquals(0, state.ramBytesUsed());
+ assertTrue(resources.isEmpty());
+ }
+ } finally {
+ if (exec != null) exec.shutdownNow();
}
}
}
}
- // LUCENE-5333
- public void testSparseFacets() throws Exception {
+ public void testHierarchicalSparseFacets() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
FacetsConfig config = new FacetsConfig();
+ config.setHierarchical("d", true);
+ config.setHierarchical("e", true);
Document doc = new Document();
- doc.add(new SortedSetDocValuesFacetField("a", "foo1"));
+ doc.add(new SortedSetDocValuesFacetField("d", "foo", "bar"));
writer.addDocument(config.build(doc));
if (random().nextBoolean()) {
@@ -349,8 +757,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
doc = new Document();
- doc.add(new SortedSetDocValuesFacetField("a", "foo2"));
- doc.add(new SortedSetDocValuesFacetField("b", "bar1"));
+ doc.add(new SortedSetDocValuesFacetField("d", "foo", "baz"));
writer.addDocument(config.build(doc));
if (random().nextBoolean()) {
@@ -358,9 +765,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
doc = new Document();
- doc.add(new SortedSetDocValuesFacetField("a", "foo3"));
- doc.add(new SortedSetDocValuesFacetField("b", "bar2"));
- doc.add(new SortedSetDocValuesFacetField("c", "baz1"));
+ doc.add(new SortedSetDocValuesFacetField("e", "biz", "baz"));
writer.addDocument(config.build(doc));
// NRT open
@@ -369,7 +774,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
// Per-top-reader state:
SortedSetDocValuesReaderState state =
- new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
ExecutorService exec = randomExecutorServiceOrNull();
try {
@@ -378,15 +783,11 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
// Ask for top 10 labels for any dims that have counts:
List<FacetResult> results = facets.getAllDims(10);
- assertEquals(3, results.size());
- assertEquals(
- "dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n",
- results.get(0).toString());
+ assertEquals(2, results.size());
assertEquals(
- "dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n",
- results.get(1).toString());
+ "dim=d path=[] value=2 childCount=1\n foo (2)\n", results.get(0).toString());
assertEquals(
- "dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(2).toString());
+ "dim=e path=[] value=1 childCount=1\n biz (1)\n", results.get(1).toString());
Collection<Accountable> resources = state.getChildResources();
assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
@@ -448,6 +849,58 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
}
+ public void testHierarchicalSomeSegmentsMissing() throws Exception {
+ try (Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
+
+ FacetsConfig config = new FacetsConfig();
+ config.setHierarchical("b", true);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("a", "foo1"));
+ doc.add(new SortedSetDocValuesFacetField("b", "foo", "bar"));
+ writer.addDocument(config.build(doc));
+ writer.commit();
+
+ doc = new Document();
+ writer.addDocument(config.build(doc));
+ writer.commit();
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("a", "foo2"));
+ doc.add(new SortedSetDocValuesFacetField("b", "foo", "buzz"));
+ writer.addDocument(config.build(doc));
+ writer.commit();
+
+ // NRT open
+ try (IndexReader r = writer.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+
+ // Per-top-reader state:
+ SortedSetDocValuesReaderState state =
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
+
+ ExecutorService exec = randomExecutorServiceOrNull();
+ try {
+ Facets facets = getAllFacets(searcher, state, exec);
+
+ // Ask for top 10 labels for any dims that have counts:
+ assertEquals(
+ "dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n",
+ facets.getTopChildren(10, "a").toString());
+ assertEquals(
+ "dim=b path=[] value=2 childCount=1\n foo (2)\n",
+ facets.getTopChildren(10, "b").toString());
+ assertEquals(
+ "dim=b path=[foo] value=2 childCount=2\n bar (1)\n buzz (1)\n",
+ facets.getTopChildren(10, "b", "foo").toString());
+ } finally {
+ if (exec != null) exec.shutdownNow();
+ }
+ }
+ }
+ }
+
public void testRandom() throws Exception {
int fullIterations = LuceneTestCase.TEST_NIGHTLY ? 20 : 3;
for (int fullIter = 0; fullIter < fullIterations; fullIter++) {
@@ -562,6 +1015,274 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
}
+ public void testRandomHierarchicalFlatMix() throws Exception {
+ int fullIterations = LuceneTestCase.TEST_NIGHTLY ? 20 : 3;
+ for (int fullIter = 0; fullIter < fullIterations; fullIter++) {
+ String[] tokens = getRandomTokens(10);
+
+ try (Directory indexDir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), indexDir)) {
+ FacetsConfig config = new FacetsConfig();
+ int numDocs = atLeast(1000);
+ // Most of the time allow up to 7 dims per doc, but occasionally limit all docs to a single
+ // dim:
+ int numDims;
+ if (random().nextInt(10) < 8) {
+ numDims = TestUtil.nextInt(random(), 1, 7);
+ } else {
+ numDims = 1;
+ }
+ boolean[] hierarchicalDims = new boolean[numDims];
+ for (int i = 0; i < numDims; i++) {
+ boolean isHierarchicalDim = random().nextBoolean();
+ config.setHierarchical("dim" + i, isHierarchicalDim);
+ hierarchicalDims[i] = isHierarchicalDim;
+ }
+ List<TestDoc> testDocs = getRandomDocs(tokens, numDocs, numDims);
+ List<Set<SortedSetDocValuesFacetField>> testDocFacets = new ArrayList<>();
+ for (TestDoc testDoc : testDocs) {
+ Document doc = new Document();
+ Set<SortedSetDocValuesFacetField> docFacets = new HashSet<>();
+ doc.add(newStringField("content", testDoc.content, Field.Store.NO));
+ for (int i = 0; i < numDims; i++) {
+ if (hierarchicalDims[i]) {
+ int pathLength;
+ if (numDims == 1) {
+ pathLength = 1;
+ } else {
+ pathLength = random().nextInt(numDims - 1) + 1;
+ }
+ List<String> path = new ArrayList<>();
+ for (int j = 0; j < pathLength; j++) {
+ if (testDoc.dims[j] != null) {
+ path.add(testDoc.dims[j]);
+ }
+ }
+ doc.add(new SortedSetDocValuesFacetField("dim" + i, path.toArray(String[]::new)));
+ for (int j = 0; j < path.size(); j++) {
+ docFacets.add(
+ new SortedSetDocValuesFacetField(
+ "dim" + i, path.subList(0, j + 1).toArray(String[]::new)));
+ }
+ } else if (testDoc.dims[i] != null) {
+ doc.add(new SortedSetDocValuesFacetField("dim" + i, testDoc.dims[i]));
+ docFacets.add(new SortedSetDocValuesFacetField("dim" + i, testDoc.dims[i]));
+ }
+ }
+ testDocFacets.add(docFacets);
+ w.addDocument(config.build(doc));
+ }
+
+ // NRT open
+ try (IndexReader r = w.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+
+ // Per-top-reader state:
+ SortedSetDocValuesReaderState state =
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
+ ExecutorService exec = randomExecutorServiceOrNull();
+ try {
+ int iters = atLeast(100);
+ for (int iter = 0; iter < iters; iter++) {
+ String searchToken = tokens[random().nextInt(tokens.length)];
+ if (VERBOSE) {
+ System.out.println("\nTEST: iter content=" + searchToken);
+ }
+ FacetsCollector fc = new FacetsCollector();
+ FacetsCollector.search(
+ searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
+ Facets facets;
+ if (exec != null) {
+ facets = new ConcurrentSortedSetDocValuesFacetCounts(state, fc, exec);
+ } else {
+ facets = new SortedSetDocValuesFacetCounts(state, fc);
+ }
+ // Slow, yet hopefully bug-free, faceting:
+ Map<String, FacetResult> expectedResults = new HashMap<>();
+
+ for (int i = 0; i < testDocs.size(); i++) {
+ TestDoc doc = testDocs.get(i);
+ if (doc.content.equals(searchToken)) {
+ // goes through all facets paths in the doc
+ for (SortedSetDocValuesFacetField facetField : testDocFacets.get(i)) {
+ String[] path = facetField.path;
+ String parentDimPathString;
+ if (path.length == 1) {
+ parentDimPathString = facetField.dim;
+ } else {
+ parentDimPathString =
+ facetField.dim
+ + FacetsConfig.DELIM_CHAR
+ + FacetsConfig.pathToString(path, path.length - 1);
+ }
+ FacetResult result = expectedResults.get(parentDimPathString);
+ if (result == null) {
+ String[] resultPath = new String[path.length - 1];
+ System.arraycopy(path, 0, resultPath, 0, resultPath.length);
+ result =
+ new FacetResult(facetField.dim, resultPath, 0, new LabelAndValue[0], 0);
+ }
+ String child = path[path.length - 1];
+ LabelAndValue[] labelAndValues = result.labelValues;
+ boolean containsChild = false;
+ for (int k = 0; k < labelAndValues.length; k++) {
+ if (labelAndValues[k].label.equals(child)) {
+ containsChild = true;
+ labelAndValues[k] =
+ new LabelAndValue(
+ labelAndValues[k].label, labelAndValues[k].value.intValue() + 1);
+ break;
+ }
+ }
+ LabelAndValue[] newLabelAndValues;
+ int childCount = result.childCount;
+ if (containsChild == false) {
+ newLabelAndValues = new LabelAndValue[labelAndValues.length + 1];
+ System.arraycopy(
+ labelAndValues, 0, newLabelAndValues, 0, labelAndValues.length);
+ newLabelAndValues[newLabelAndValues.length - 1] = new LabelAndValue(child, 1);
+ childCount++;
+ } else {
+ newLabelAndValues = labelAndValues;
+ }
+ newLabelAndValues =
+ Arrays.stream(newLabelAndValues)
+ .sorted(
+ (o1, o2) -> {
+ if (o1.value.equals(o2.value)) {
+ return new BytesRef(o1.label).compareTo(new BytesRef(o2.label));
+ } else {
+ return o2.value.intValue() - o1.value.intValue();
+ }
+ })
+ .collect(Collectors.toList())
+ .toArray(LabelAndValue[]::new);
+ FacetResult newResult =
+ new FacetResult(result.dim, result.path, 0, newLabelAndValues, childCount);
+ expectedResults.put(parentDimPathString, newResult);
+ }
+ }
+ }
+
+ // second pass to update values
+ for (int i = 0; i < testDocs.size(); i++) {
+ TestDoc doc = testDocs.get(i);
+ if (doc.content.equals(searchToken)) {
+ Set<String> dimsCounted = new HashSet<>();
+ for (SortedSetDocValuesFacetField facetField : testDocFacets.get(i)) {
+ String dimPathString =
+ FacetsConfig.pathToString(facetField.dim, facetField.path);
+ FacetResult result = expectedResults.get(dimPathString);
+ FacetResult dimResult = expectedResults.get(facetField.dim);
+ if (result != null) {
+ expectedResults.put(
+ dimPathString,
+ new FacetResult(
+ result.dim,
+ result.path,
+ result.value.intValue() + 1,
+ result.labelValues,
+ result.childCount));
+ }
+ if (dimResult != null && dimsCounted.add(facetField.dim)) {
+ expectedResults.put(
+ facetField.dim,
+ new FacetResult(
+ dimResult.dim,
+ dimResult.path,
+ dimResult.value.intValue() + 1,
+ dimResult.labelValues,
+ dimResult.childCount));
+ }
+ }
+ }
+ }
+
+ List<FacetResult> expected = new ArrayList<>(expectedResults.values());
+
+ List<FacetResult> expectedAllDims = new ArrayList<>();
+ for (FacetResult result : expected) {
+ if (result.path.length == 0) {
+ expectedAllDims.add(result);
+ if (expectedAllDims.size() >= 10) {
+ break;
+ }
+ }
+ }
+ sortFacetResults(expectedAllDims);
+
+ List<FacetResult> actualAllDims = facets.getAllDims(10);
+
+ assertEquals(expectedAllDims, actualAllDims);
+
+ // Dfs through top children
+ for (FacetResult dimResult : actualAllDims) {
+ if (config.getDimConfig(dimResult.dim).hierarchical) {
+ Stack<String[]> stack = new Stack<>();
+ for (LabelAndValue labelAndValue : dimResult.labelValues) {
+ String[] path = new String[1];
+ path[0] = labelAndValue.label;
+ stack.add(path);
+ }
+ while (stack.empty() == false) {
+ String[] currPath = stack.pop();
+ FacetResult expectedResult =
+ getFacetResultForPath(expected, dimResult.dim, currPath);
+ FacetResult actualResult = facets.getTopChildren(10, dimResult.dim, currPath);
+ try {
+ assertEquals(expectedResult, actualResult);
+ } catch (AssertionError e) {
+ System.out.println(iter);
+ System.out.println(config.getDimConfig(dimResult.dim).hierarchical);
+ throw e;
+ }
+ if (actualResult != null) {
+ for (LabelAndValue labelAndValue : actualResult.labelValues) {
+ String[] path = new String[currPath.length + 1];
+ System.arraycopy(currPath, 0, path, 0, currPath.length);
+ path[path.length - 1] = labelAndValue.label;
+ stack.add(path);
+ }
+ }
+ }
+ }
+ }
+ }
+ } finally {
+ if (exec != null) exec.shutdownNow();
+ }
+ }
+ }
+ }
+ }
+
+ private static FacetResult getFacetResultForPath(
+ List<FacetResult> allPaths, String dim, String[] path) {
+ for (FacetResult result : allPaths) {
+ if (path.length == 0) {
+ if (result.path.length == 0 && result.dim.equals(dim)) {
+ return result;
+ }
+ } else {
+ boolean isEqualPath = true;
+ if (path.length != result.path.length) {
+ isEqualPath = false;
+ } else {
+ for (int i = 0; i < path.length; i++) {
+ if (path[i].equals(result.path[i]) == false) {
+ isEqualPath = false;
+ break;
+ }
+ }
+ }
+ if (isEqualPath && result.dim.equals(dim)) {
+ return result;
+ }
+ }
+ }
+ return null;
+ }
+
public void testNonExistentDimension() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
@@ -592,6 +1313,75 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
}
}
+ public void testHierarchicalNonExistentDimension() throws Exception {
+ try (Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
+ FacetsConfig config = new FacetsConfig();
+ config.setHierarchical("fizz", true);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("foo", "bar"));
+ doc.add(new SortedSetDocValuesFacetField("fizz", "buzz", "baz"));
+ writer.addDocument(config.build(doc));
+ writer.commit();
+
+ try (IndexReader r = writer.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+
+ SortedSetDocValuesReaderState state =
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
+
+ ExecutorService exec = randomExecutorServiceOrNull();
+ try {
+ Facets facets = getAllFacets(searcher, state, exec);
+ FacetResult result = facets.getTopChildren(5, "non-existent dimension");
+
+ // make sure the result is null (and no exception was thrown)
+ assertNull(result);
+
+ expectThrows(
+ IllegalArgumentException.class,
+ () -> {
+ facets.getTopChildren(5, "non-existent dimension", "with a path");
+ });
+ } finally {
+ if (exec != null) exec.shutdownNow();
+ }
+ }
+ }
+ }
+
+ public void testHierarchicalNonExistentPath() throws Exception {
+ try (Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
+ FacetsConfig config = new FacetsConfig();
+ config.setHierarchical("fizz", true);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesFacetField("fizz", "buzz", "baz"));
+ writer.addDocument(config.build(doc));
+ writer.commit();
+
+ try (IndexReader r = writer.getReader()) {
+ IndexSearcher searcher = newSearcher(r);
+
+ SortedSetDocValuesReaderState state =
+ new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
+
+ ExecutorService exec = randomExecutorServiceOrNull();
+ try {
+ Facets facets = getAllFacets(searcher, state, exec);
+ FacetResult result = facets.getTopChildren(5, "fizz", "fake", "path");
+
+ // make sure the result is null (and no exception was thrown)
+ assertNull(result);
+ } finally {
+ if (exec != null) exec.shutdownNow();
+ }
+ }
+ }
+ }
+
private static Facets getAllFacets(
IndexSearcher searcher, SortedSetDocValuesReaderState state, ExecutorService exec)
throws IOException, InterruptedException {
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java
index 9040361..ee612db 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java
@@ -240,11 +240,6 @@ public class TestFacetLabel extends FacetTestCase {
expectThrows(
IllegalArgumentException.class,
() -> {
- new SortedSetDocValuesFacetField("dim", null);
- });
- expectThrows(
- IllegalArgumentException.class,
- () -> {
new SortedSetDocValuesFacetField("dim", "");
});
}