You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2020/02/24 11:09:24 UTC

[lucene-solr] branch master updated: LUCENE-9212: Intervals.multiterm() should take CompiledAutomaton

This is an automated email from the ASF dual-hosted git repository.

romseygeek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new ffb7caf  LUCENE-9212: Intervals.multiterm() should take CompiledAutomaton
ffb7caf is described below

commit ffb7cafe9351cd6cd5181bc06dd053d586f6d63f
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Mon Feb 24 11:08:48 2020 +0000

    LUCENE-9212: Intervals.multiterm() should take CompiledAutomaton
---
 lucene/CHANGES.txt                                 |  3 +
 .../apache/lucene/queries/intervals/Intervals.java | 34 +++++++++++
 .../lucene/queries/intervals/TestIntervals.java    | 68 ++++++++++++++--------
 3 files changed, 80 insertions(+), 25 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index dacf75e..6956eff 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -114,6 +114,9 @@ API Changes
 
 * LUCENE-9218: XY geometries API works in float space. (Ignacio Vera)
 
+* LUCENE-9212: Intervals.multiterm() takes CompiledAutomaton rather than plain Automaton
+  (Alan Woodward)
+
 New Features
 ---------------------
 
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java
index 6fd3901..5d835bd 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java
@@ -203,7 +203,10 @@ public final class Intervals {
    * @param pattern string representation of the given automaton, mostly used in exception messages  
    *
    * @throws IllegalStateException if the automaton accepts more than 128 terms
+   *
+   * @deprecated use {@link #multiterm(CompiledAutomaton, String)}
    */
+  @Deprecated
   public static IntervalsSource multiterm(Automaton automaton, String pattern) {
     return multiterm(automaton, 128, pattern);
   }
@@ -219,11 +222,42 @@ public final class Intervals {
    * @param pattern string representation of the given automaton, mostly used in exception messages  
    *
    * @throws IllegalStateException if the automaton accepts more than {@code maxExpansions} terms
+   *
+   * @deprecated use {@link #multiterm(CompiledAutomaton, int, String)}
    */
+  @Deprecated
   public static IntervalsSource multiterm(Automaton automaton, int maxExpansions, String pattern) {
     CompiledAutomaton ca = new CompiledAutomaton(automaton);
     return new MultiTermIntervalsSource(ca, maxExpansions, pattern);
   }
+
+  /**
+   * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that's accepted by the given automaton
+   *
+   * @param ca      an automaton accepting matching terms
+   * @param pattern string representation of the given automaton, mostly used in exception messages
+   *
+   * @throws IllegalStateException if the automaton accepts more than 128 terms
+   */
+  public static IntervalsSource multiterm(CompiledAutomaton ca, String pattern) {
+    return multiterm(ca, 128, pattern);
+  }
+
+  /**
+   * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that's accepted by the given automaton
+   *
+   * WARNING: Setting {@code maxExpansions} to higher than the default value of 128
+   * can be both slow and memory-intensive
+   *
+   * @param ca            an automaton accepting matching terms
+   * @param maxExpansions the maximum number of terms to expand to
+   * @param pattern string representation of the given automaton, mostly used in exception messages
+   *
+   * @throws IllegalStateException if the automaton accepts more than {@code maxExpansions} terms
+   */
+  public static IntervalsSource multiterm(CompiledAutomaton ca, int maxExpansions, String pattern) {
+    return new MultiTermIntervalsSource(ca, maxExpansions, pattern);
+  }
   
   /**
    * Create an {@link IntervalsSource} that filters a sub-source by the width of its intervals
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
index a2d783e..4addd2f 100644
--- a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
@@ -20,7 +20,6 @@ package org.apache.lucene.queries.intervals;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -44,7 +43,6 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchesIterator;
-import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.search.TermQuery;
@@ -52,6 +50,8 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RegExp;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
@@ -886,36 +886,30 @@ public class TestIntervals extends LuceneTestCase {
   }
 
   public void testPrefix() throws IOException {
-    for (IntervalsSource source : List.of(Intervals.prefix(new BytesRef("p")),
-        Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("p")), "p*" ) )) {
-      checkIntervals(source, "field1", 5, new int[][]{
-          {},
-          { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10, 27, 27 },
-          { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10 },
-          { 7, 7 },
-          { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10 },
-          { 0, 0 }
-      });
-      MatchesIterator mi = getMatches(source, 1, "field1");
-      assertNotNull(mi);
-      assertMatch(mi, 0, 0, 0, 5);
-      assertMatch(mi, 1, 1, 6, 14);
-    }
+    IntervalsSource source = Intervals.prefix(new BytesRef("p"));
+    checkIntervals(source, "field1", 5, new int[][]{
+        {},
+        {0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10, 27, 27},
+        {0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10},
+        {7, 7},
+        {0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10},
+        {0, 0}
+    });
+    MatchesIterator mi = getMatches(source, 1, "field1");
+    assertNotNull(mi);
+    assertMatch(mi, 0, 0, 0, 5);
+    assertMatch(mi, 1, 1, 6, 14);
 
-    for (IntervalsSource noSuch : List.of(Intervals.prefix(new BytesRef("qqq")),
-        Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("qqq")), "qqq*" ))) {
-      checkIntervals(noSuch, "field1", 0, new int[][]{});
-    }
+    IntervalsSource noSuch = Intervals.prefix(new BytesRef("qqq"));
+    checkIntervals(noSuch, "field1", 0, new int[][]{});
 
-    for (IntervalsSource source : List.of(Intervals.prefix(new BytesRef("p"), 1), 
-        Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("p")), 1, "p*")) ) {
+    IntervalsSource s = Intervals.prefix(new BytesRef("p"), 1);
       IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
         for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
-          source.intervals("field1", ctx);
+          s.intervals("field1", ctx);
         }
       });
       assertEquals("Automaton [p*] expanded to too many terms (limit 1)", e.getMessage());
-    }
 
     checkVisits(Intervals.prefix(new BytesRef("p")), 1);
   }
@@ -965,4 +959,28 @@ public class TestIntervals extends LuceneTestCase {
 
   }
 
+  public void testMultiTerm() throws IOException {
+    RegExp re = new RegExp("p.*e");
+    IntervalsSource source = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), re.toString());
+
+    checkIntervals(source, "field1", 5, new int[][]{
+        {},
+        { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
+        { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
+        { 7, 7 },
+        { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
+        { 0, 0 }
+    });
+
+    IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
+      IntervalsSource s = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), 1, re.toString());
+      for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
+        s.intervals("field1", ctx);
+      }
+    });
+    assertEquals("Automaton [\\p(.)*\\e] expanded to too many terms (limit 1)", e.getMessage());
+
+    checkVisits(source, 1);
+  }
+
 }