You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2019/06/10 15:14:58 UTC
[lucene-solr] branch branch_8x updated: LUCENE-8845: Allow
configurable maxExpansions for prefix/wildcard intervals
This is an automated email from the ASF dual-hosted git repository.
romseygeek pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 74a695e LUCENE-8845: Allow configurable maxExpansions for prefix/wildcard intervals
74a695e is described below
commit 74a695ee444d195421c9e5cceeee28a813b42cea
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Mon Jun 10 15:49:15 2019 +0100
LUCENE-8845: Allow configurable maxExpansions for prefix/wildcard intervals
---
lucene/CHANGES.txt | 3 ++
.../apache/lucene/search/intervals/Intervals.java | 36 ++++++++++++++++++++--
.../search/intervals/MultiTermIntervalsSource.java | 4 +--
.../lucene/search/intervals/TestIntervals.java | 16 ++++++++++
4 files changed, 55 insertions(+), 4 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 99a4db2..8d12d0f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -61,6 +61,9 @@ Improvements
* LUCENE-8818: Fix smokeTestRelease.py encoding bug (janhoy)
+* LUCENE-8845: Allow Intervals.prefix() and Intervals.wildcard() to specify
+ their maximum allowed expansions (Alan Woodward)
+
Test Framework
* LUCENE-8825: CheckHits now display the shard index in case of mismatch
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
index d579c6f..cfe132a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
@@ -147,8 +147,23 @@ public final class Intervals {
* @throws IllegalStateException if the prefix expands to more than 128 terms
*/
public static IntervalsSource prefix(String prefix) {
+ return prefix(prefix, 128);
+ }
+
+ /**
+ * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that begin with a prefix
+ *
+ * WARNING: Setting {@code maxExpansions} to higher than the default value of 128
+ * can be both slow and memory-intensive
+ *
+ * @param prefix the prefix to expand
+ * @param maxExpansions the maximum number of terms to expand to
+ *
+ * @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms
+ */
+ public static IntervalsSource prefix(String prefix, int maxExpansions) {
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(new BytesRef(prefix)));
- return new MultiTermIntervalsSource(ca, 128, prefix);
+ return new MultiTermIntervalsSource(ca, maxExpansions, prefix + "*");
}
/**
@@ -159,8 +174,25 @@ public final class Intervals {
* @see WildcardQuery for glob format
*/
public static IntervalsSource wildcard(String wildcard) {
+ return wildcard(wildcard, 128);
+ }
+
+ /**
+ * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that match a wildcard glob
+ *
+ * WARNING: Setting {@code maxExpansions} to higher than the default value of 128
+ * can be both slow and memory-intensive
+ *
+ * @param wildcard the glob to expand
+ * @param maxExpansions the maximum number of terms to expand to
+ *
+ * @throws IllegalStateException if the wildcard glob expands to more than {@code maxExpansions} terms
+ *
+ * @see WildcardQuery for glob format
+ */
+ public static IntervalsSource wildcard(String wildcard, int maxExpansions) {
CompiledAutomaton ca = new CompiledAutomaton(WildcardQuery.toAutomaton(new Term("", wildcard)));
- return new MultiTermIntervalsSource(ca, 128, wildcard);
+ return new MultiTermIntervalsSource(ca, maxExpansions, wildcard);
}
/**
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MultiTermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MultiTermIntervalsSource.java
index 4b1d233..213ef73 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MultiTermIntervalsSource.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MultiTermIntervalsSource.java
@@ -57,8 +57,8 @@ class MultiTermIntervalsSource extends IntervalsSource {
int count = 0;
while ((term = te.next()) != null) {
subSources.add(TermIntervalsSource.intervals(term, te));
- if (count++ > maxExpansions) {
- throw new IllegalStateException("Automaton " + this.pattern + " expanded to too many terms (limit " + maxExpansions + ")");
+ if (++count > maxExpansions) {
+ throw new IllegalStateException("Automaton [" + this.pattern + "] expanded to too many terms (limit " + maxExpansions + ")");
}
}
if (subSources.size() == 0) {
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
index 8bf7f8a..2130514 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
@@ -753,6 +753,14 @@ public class TestIntervals extends LuceneTestCase {
IntervalsSource noSuch = Intervals.prefix("qqq");
checkIntervals(noSuch, "field1", 0, new int[][]{});
+
+ IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
+ IntervalsSource s = Intervals.prefix("p", 1);
+ for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
+ s.intervals("field1", ctx);
+ }
+ });
+ assertEquals("Automaton [p*] expanded to too many terms (limit 1)", e.getMessage());
}
public void testWildcard() throws IOException {
@@ -770,6 +778,14 @@ public class TestIntervals extends LuceneTestCase {
assertMatch(mi, 2, 2, 15, 18);
assertMatch(mi, 10, 10, 63, 66);
assertMatch(mi, 17, 17, 97, 100);
+
+ IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
+ IntervalsSource s = Intervals.wildcard("?ot", 1);
+ for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
+ s.intervals("field1", ctx);
+ }
+ });
+ assertEquals("Automaton [?ot] expanded to too many terms (limit 1)", e.getMessage());
}
public void testWrappedFilters() throws IOException {