You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2023/05/26 15:55:31 UTC
[lucene] branch branch_9x updated: GH#12321: Marked DaciukMihovAutomatonBuilder as deprecated (#12332)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new a990c15286f GH#12321: Marked DaciukMihovAutomatonBuilder as deprecated (#12332)
a990c15286f is described below
commit a990c15286fe919dd24c99bcf113cd548be160b0
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Fri May 26 08:55:24 2023 -0700
GH#12321: Marked DaciukMihovAutomatonBuilder as deprecated (#12332)
Preparing to reduce visibility of this class in a future release
---
lucene/CHANGES.txt | 3 +++
.../apache/lucene/analysis/core/TestFlattenGraphFilter.java | 4 ++--
.../src/java/org/apache/lucene/util/automaton/Automata.java | 5 +++++
.../lucene/util/automaton/DaciukMihovAutomatonBuilder.java | 12 ++++++++++--
.../apache/lucene/analysis/TestAutomatonToTokenStream.java | 8 ++++----
.../apache/lucene/util/automaton/TestCompiledAutomaton.java | 2 +-
.../lucene/search/uhighlight/MemoryIndexOffsetStrategy.java | 4 ++--
.../lucene/search/uhighlight/TestUnifiedHighlighter.java | 7 +++----
8 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index d2a4d240891..97cf5a08bf4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -15,6 +15,9 @@ API Changes
RuntimePermission "accessDeclaredMembers" is needed in applications using
SecurityManager. (Patrick Zhai, Ben Trent, Uwe Schindler)
+* GITHUB#xx: DaciukMihovAutomatonBuilder has been marked deprecated in preparation of reducing its visibility in
+ a future release. (Greg Miller)
+
New Features
---------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
index 7b35f56016c..7fa901ac5bf 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
@@ -40,8 +40,8 @@ import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;
@@ -780,7 +780,7 @@ public class TestFlattenGraphFilter extends BaseTokenStreamTestCase {
acceptStrings.sort(Comparator.naturalOrder());
acceptStrings = acceptStrings.stream().limit(wordCount).collect(Collectors.toList());
- Automaton nonFlattenedAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+ Automaton nonFlattenedAutomaton = Automata.makeStringUnion(acceptStrings);
TokenStream ts = AutomatonToTokenStream.toTokenStream(nonFlattenedAutomaton);
TokenStream flattenedTokenStream = new FlattenGraphFilter(ts);
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
index 9a642338f09..def98684d07 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
@@ -40,6 +40,11 @@ import org.apache.lucene.util.StringHelper;
* @lucene.experimental
*/
public final class Automata {
+ /**
+ * {@link #makeStringUnion(Collection)} limits terms of this max length to ensure the stack
+ * doesn't overflow while building, since our algorithm currently relies on recursion.
+ */
+ public static final int MAX_STRING_UNION_TERM_LENGTH = 1000;
private Automata() {}
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
index 2fe13101168..a0ff7d3f13a 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
@@ -32,14 +32,19 @@ import org.apache.lucene.util.CharsRefBuilder;
*
* @see #build(Collection)
* @see Automata#makeStringUnion(Collection)
+ * @deprecated Visibility of this class will be reduced in a future release. Users can access this
+ * functionality directly through {@link Automata#makeStringUnion(Collection)}
*/
+@Deprecated
public final class DaciukMihovAutomatonBuilder {
/**
* This builder rejects terms that are more than 1k chars long since it then uses recursion based
* on the length of the string, which might cause stack overflows.
+ *
+ * @deprecated See {@link Automata#MAX_STRING_UNION_TERM_LENGTH}
*/
- public static final int MAX_TERM_LENGTH = 1_000;
+ @Deprecated public static final int MAX_TERM_LENGTH = 1_000;
/** The default constructor is private. Use static methods directly. */
private DaciukMihovAutomatonBuilder() {
@@ -193,7 +198,7 @@ public final class DaciukMihovAutomatonBuilder {
* or equal compared to any previous sequences added to this automaton (the input must be sorted).
*/
private void add(CharsRef current) {
- if (current.length > MAX_TERM_LENGTH) {
+ if (current.length > Automata.MAX_STRING_UNION_TERM_LENGTH) {
throw new IllegalArgumentException(
"This builder doesn't allow terms that are larger than 1,000 characters, got " + current);
}
@@ -266,7 +271,10 @@ public final class DaciukMihovAutomatonBuilder {
/**
* Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing
* strings in UTF-8. These strings must be binary-sorted.
+ *
+ * @deprecated Please see {@link Automata#makeStringUnion(Collection)} instead
*/
+ @Deprecated
public static Automaton build(Collection<BytesRef> input) {
final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java b/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java
index 27d2c72ddb0..d558ad6d369 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java
@@ -22,8 +22,8 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
public class TestAutomatonToTokenStream extends BaseTokenStreamTestCase {
@@ -31,7 +31,7 @@ public class TestAutomatonToTokenStream extends BaseTokenStreamTestCase {
List<BytesRef> acceptStrings = new ArrayList<>();
acceptStrings.add(new BytesRef("abc"));
- Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+ Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
assertTokenStreamContents(
ts,
@@ -48,7 +48,7 @@ public class TestAutomatonToTokenStream extends BaseTokenStreamTestCase {
acceptStrings.add(new BytesRef("123"));
acceptStrings.add(new BytesRef("abc"));
- Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+ Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
assertTokenStreamContents(
ts,
@@ -65,7 +65,7 @@ public class TestAutomatonToTokenStream extends BaseTokenStreamTestCase {
acceptStrings.add(new BytesRef("ab3"));
acceptStrings.add(new BytesRef("abc"));
- Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+ Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
assertTokenStreamContents(
ts,
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
index 5d63b8051cd..4640bea2dc2 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
@@ -35,7 +35,7 @@ public class TestCompiledAutomaton extends LuceneTestCase {
terms.add(new BytesRef(s));
}
Collections.sort(terms);
- final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
+ final Automaton a = Automata.makeStringUnion(terms);
return new CompiledAutomaton(a, true, false, determinizeWorkLimit, false);
}
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
index aad68190c64..851c4639df8 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@@ -30,7 +30,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.lucene.util.automaton.Automata;
/**
* Uses an {@link Analyzer} on content to get offsets and then populates a {@link MemoryIndex}.
@@ -67,7 +67,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
// to build an automaton on them
List<BytesRef> filteredTerms =
Arrays.stream(components.getTerms())
- .filter(b -> b.length < DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH)
+ .filter(b -> b.length < Automata.MAX_STRING_UNION_TERM_LENGTH)
.collect(Collectors.toList());
allAutomata.add(CharArrayMatcher.fromTerms(filteredTerms));
}
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 6cc524a9fac..54792a569f7 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -58,7 +58,7 @@ import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.analysis.MockTokenizer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
-import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.lucene.util.automaton.Automata;
import org.junit.After;
import org.junit.Before;
@@ -1671,12 +1671,11 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
Query query =
new BooleanQuery.Builder()
.add(
- new TermQuery(
- new Term("title", "a".repeat(DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH))),
+ new TermQuery(new Term("title", "a".repeat(Automata.MAX_STRING_UNION_TERM_LENGTH))),
BooleanClause.Occur.SHOULD)
.add(
new TermQuery(
- new Term("title", "a".repeat(DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH + 1))),
+ new Term("title", "a".repeat(Automata.MAX_STRING_UNION_TERM_LENGTH + 1))),
BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("title", "title")), BooleanClause.Occur.SHOULD)
.build();