You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2019/06/10 15:14:59 UTC

[lucene-solr] branch master updated: LUCENE-8845: Allow configurable maxExpansions for prefix/wildcard intervals

This is an automated email from the ASF dual-hosted git repository.

romseygeek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new e8950f4  LUCENE-8845: Allow configurable maxExpansions for prefix/wildcard intervals
e8950f4 is described below

commit e8950f4a528605f9be17c644eef4f47d0659317b
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Mon Jun 10 15:49:15 2019 +0100

    LUCENE-8845: Allow configurable maxExpansions for prefix/wildcard intervals
---
 lucene/CHANGES.txt                                 |  3 ++
 .../apache/lucene/search/intervals/Intervals.java  | 36 ++++++++++++++++++++--
 .../search/intervals/MultiTermIntervalsSource.java |  4 +--
 .../lucene/search/intervals/TestIntervals.java     | 16 ++++++++++
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e2dd243..1c818bd 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -85,6 +85,9 @@ Improvements
 
 * LUCENE-8818: Fix smokeTestRelease.py encoding bug (janhoy)
 
+* LUCENE-8845: Allow Intervals.prefix() and Intervals.wildcard() to specify
+  their maximum allowed expansions (Alan Woodward)
+
 Test Framework
 
 * LUCENE-8825: CheckHits now display the shard index in case of mismatch
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
index d579c6f..cfe132a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
@@ -147,8 +147,23 @@ public final class Intervals {
    * @throws IllegalStateException if the prefix expands to more than 128 terms
    */
   public static IntervalsSource prefix(String prefix) {
+    return prefix(prefix, 128);
+  }
+
+  /**
+   * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that begin with a prefix
+   *
+   * WARNING: Setting {@code maxExpansions} to higher than the default value of 128
+   * can be both slow and memory-intensive
+   *
+   * @param prefix        the prefix to expand
+   * @param maxExpansions the maximum number of terms to expand to
+   *
+   * @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms
+   */
+  public static IntervalsSource prefix(String prefix, int maxExpansions) {
     CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(new BytesRef(prefix)));
-    return new MultiTermIntervalsSource(ca, 128, prefix);
+    return new MultiTermIntervalsSource(ca, maxExpansions, prefix + "*");
   }
 
   /**
@@ -159,8 +174,25 @@ public final class Intervals {
    * @see WildcardQuery for glob format
    */
   public static IntervalsSource wildcard(String wildcard) {
+    return wildcard(wildcard, 128);
+  }
+
+  /**
+   * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that match a wildcard glob
+   *
+   * WARNING: Setting {@code maxExpansions} to higher than the default value of 128
+   * can be both slow and memory-intensive
+   *
+   * @param wildcard the glob to expand
+   * @param maxExpansions the maximum number of terms to expand to
+   *
+   * @throws IllegalStateException if the wildcard glob expands to more than {@code maxExpansions} terms
+   *
+   * @see WildcardQuery for glob format
+   */
+  public static IntervalsSource wildcard(String wildcard, int maxExpansions) {
     CompiledAutomaton ca = new CompiledAutomaton(WildcardQuery.toAutomaton(new Term("", wildcard)));
-    return new MultiTermIntervalsSource(ca, 128, wildcard);
+    return new MultiTermIntervalsSource(ca, maxExpansions, wildcard);
   }
 
   /**
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MultiTermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MultiTermIntervalsSource.java
index 4b1d233..213ef73 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MultiTermIntervalsSource.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MultiTermIntervalsSource.java
@@ -57,8 +57,8 @@ class MultiTermIntervalsSource extends IntervalsSource {
     int count = 0;
     while ((term = te.next()) != null) {
       subSources.add(TermIntervalsSource.intervals(term, te));
-      if (count++ > maxExpansions) {
-        throw new IllegalStateException("Automaton " + this.pattern + " expanded to too many terms (limit " + maxExpansions + ")");
+      if (++count > maxExpansions) {
+        throw new IllegalStateException("Automaton [" + this.pattern + "] expanded to too many terms (limit " + maxExpansions + ")");
       }
     }
     if (subSources.size() == 0) {
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
index 8bf7f8a..2130514 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
@@ -753,6 +753,14 @@ public class TestIntervals extends LuceneTestCase {
 
     IntervalsSource noSuch = Intervals.prefix("qqq");
     checkIntervals(noSuch, "field1", 0, new int[][]{});
+
+    IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
+      IntervalsSource s = Intervals.prefix("p", 1);
+      for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
+        s.intervals("field1", ctx);
+      }
+    });
+    assertEquals("Automaton [p*] expanded to too many terms (limit 1)", e.getMessage());
   }
 
   public void testWildcard() throws IOException {
@@ -770,6 +778,14 @@ public class TestIntervals extends LuceneTestCase {
     assertMatch(mi, 2, 2, 15, 18);
     assertMatch(mi, 10, 10, 63, 66);
     assertMatch(mi, 17, 17, 97, 100);
+
+    IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
+      IntervalsSource s = Intervals.wildcard("?ot", 1);
+      for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
+        s.intervals("field1", ctx);
+      }
+    });
+    assertEquals("Automaton [?ot] expanded to too many terms (limit 1)", e.getMessage());
   }
 
   public void testWrappedFilters() throws IOException {