You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2020/02/24 11:11:44 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9212:
Intervals.multiterm() should take CompiledAutomaton
This is an automated email from the ASF dual-hosted git repository.
romseygeek pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 90028a7 LUCENE-9212: Intervals.multiterm() should take CompiledAutomaton
90028a7 is described below
commit 90028a7b935ad3205a8a6837cbb7ce1e9dbb6dff
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Mon Feb 24 11:08:48 2020 +0000
LUCENE-9212: Intervals.multiterm() should take CompiledAutomaton
---
lucene/CHANGES.txt | 3 +
.../apache/lucene/queries/intervals/Intervals.java | 34 +++++++++++
.../lucene/queries/intervals/TestIntervals.java | 68 ++++++++++++++--------
3 files changed, 81 insertions(+), 24 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 15f53c2..c13cf6a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -24,6 +24,9 @@ API Changes
* LUCENE-9218: XY geometries API works in float space. (Ignacio Vera)
+* LUCENE-9212: Intervals.multiterm() takes CompiledAutomaton rather than plain Automaton
+ (Alan Woodward)
+
New Features
---------------------
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java
index 6fd3901..5d835bd 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java
@@ -203,7 +203,10 @@ public final class Intervals {
* @param pattern string representation of the given automaton, mostly used in exception messages
*
* @throws IllegalStateException if the automaton accepts more than 128 terms
+ *
+ * @deprecated use {@link #multiterm(CompiledAutomaton, String)}
*/
+ @Deprecated
public static IntervalsSource multiterm(Automaton automaton, String pattern) {
return multiterm(automaton, 128, pattern);
}
@@ -219,11 +222,42 @@ public final class Intervals {
* @param pattern string representation of the given automaton, mostly used in exception messages
*
* @throws IllegalStateException if the automaton accepts more than {@code maxExpansions} terms
+ *
+ * @deprecated use {@link #multiterm(CompiledAutomaton, int, String)}
*/
+ @Deprecated
public static IntervalsSource multiterm(Automaton automaton, int maxExpansions, String pattern) {
CompiledAutomaton ca = new CompiledAutomaton(automaton);
return new MultiTermIntervalsSource(ca, maxExpansions, pattern);
}
+
+ /**
+ * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that's accepted by the given automaton
+ *
+ * @param ca an automaton accepting matching terms
+ * @param pattern string representation of the given automaton, mostly used in exception messages
+ *
+ * @throws IllegalStateException if the automaton accepts more than 128 terms
+ */
+ public static IntervalsSource multiterm(CompiledAutomaton ca, String pattern) {
+ return multiterm(ca, 128, pattern);
+ }
+
+ /**
+ * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that's accepted by the given automaton
+ *
+ * WARNING: Setting {@code maxExpansions} to higher than the default value of 128
+ * can be both slow and memory-intensive
+ *
+ * @param ca an automaton accepting matching terms
+ * @param maxExpansions the maximum number of terms to expand to
+ * @param pattern string representation of the given automaton, mostly used in exception messages
+ *
+ * @throws IllegalStateException if the automaton accepts more than {@code maxExpansions} terms
+ */
+ public static IntervalsSource multiterm(CompiledAutomaton ca, int maxExpansions, String pattern) {
+ return new MultiTermIntervalsSource(ca, maxExpansions, pattern);
+ }
/**
* Create an {@link IntervalsSource} that filters a sub-source by the width of its intervals
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
index e98f115..b230dd2 100644
--- a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
@@ -43,7 +43,6 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchesIterator;
-import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.TermQuery;
@@ -51,6 +50,8 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RegExp;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -885,36 +886,31 @@ public class TestIntervals extends LuceneTestCase {
}
public void testPrefix() throws IOException {
- for (IntervalsSource source : Arrays.asList(Intervals.prefix(new BytesRef("p")),
- Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("p")), "p*" ) )) {
- checkIntervals(source, "field1", 5, new int[][]{
- {},
- { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10, 27, 27 },
- { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10 },
- { 7, 7 },
- { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10 },
- { 0, 0 }
- });
- MatchesIterator mi = getMatches(source, 1, "field1");
- assertNotNull(mi);
- assertMatch(mi, 0, 0, 0, 5);
- assertMatch(mi, 1, 1, 6, 14);
- }
- for (IntervalsSource noSuch : Arrays.asList(Intervals.prefix(new BytesRef("qqq")),
- Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("qqq")), "qqq*" ))) {
- checkIntervals(noSuch, "field1", 0, new int[][]{});
- }
+ IntervalsSource source = Intervals.prefix(new BytesRef("p"));
+ checkIntervals(source, "field1", 5, new int[][]{
+ {},
+ {0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10, 27, 27},
+ {0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10},
+ {7, 7},
+ {0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10},
+ {0, 0}
+ });
+ MatchesIterator mi = getMatches(source, 1, "field1");
+ assertNotNull(mi);
+ assertMatch(mi, 0, 0, 0, 5);
+ assertMatch(mi, 1, 1, 6, 14);
+
+ IntervalsSource noSuch = Intervals.prefix(new BytesRef("qqq"));
+ checkIntervals(noSuch, "field1", 0, new int[][]{});
- for (IntervalsSource source : Arrays.asList(Intervals.prefix(new BytesRef("p"), 1),
- Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("p")), 1, "p*")) ) {
+ IntervalsSource s = Intervals.prefix(new BytesRef("p"), 1);
IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
- source.intervals("field1", ctx);
+ s.intervals("field1", ctx);
}
});
assertEquals("Automaton [p*] expanded to too many terms (limit 1)", e.getMessage());
- }
checkVisits(Intervals.prefix(new BytesRef("p")), 1);
}
@@ -964,4 +960,28 @@ public class TestIntervals extends LuceneTestCase {
}
+ public void testMultiTerm() throws IOException {
+ RegExp re = new RegExp("p.*e");
+ IntervalsSource source = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), re.toString());
+
+ checkIntervals(source, "field1", 5, new int[][]{
+ {},
+ { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
+ { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
+ { 7, 7 },
+ { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
+ { 0, 0 }
+ });
+
+ IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
+ IntervalsSource s = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), 1, re.toString());
+ for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
+ s.intervals("field1", ctx);
+ }
+ });
+ assertEquals("Automaton [\\p(.)*\\e] expanded to too many terms (limit 1)", e.getMessage());
+
+ checkVisits(source, 1);
+ }
+
}