You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2022/05/19 12:45:35 UTC
[lucene] branch branch_9_2 updated: LUCENE-10312: Revert changes in PersianAnalyzer from 9x (#904)
This is an automated email from the ASF dual-hosted git repository.
tomoko pushed a commit to branch branch_9_2
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9_2 by this push:
new ba8c3a806ad LUCENE-10312: Revert changes in PersianAnalyzer from 9x (#904)
ba8c3a806ad is described below
commit ba8c3a806ada3d7b3c34d408e449a92376a8481b
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Thu May 19 21:38:52 2022 +0900
LUCENE-10312: Revert changes in PersianAnalyzer from 9x (#904)
---
.../apache/lucene/analysis/fa/PersianAnalyzer.java | 21 +--------------------
.../lucene/analysis/fa/TestPersianStemFilter.java | 9 ++++++++-
2 files changed, 9 insertions(+), 21 deletions(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index afc41233906..f0202db90ec 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@@ -87,8 +86,6 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
}
}
- private final CharArraySet stemExclusionSet;
-
/** Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */
public PersianAnalyzer() {
this(DefaultSetHolder.DEFAULT_STOP_SET);
@@ -100,19 +97,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
*/
public PersianAnalyzer(CharArraySet stopwords) {
- this(stopwords, CharArraySet.EMPTY_SET);
- }
-
- /**
- * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is provided
- * this analyzer will add a {@link SetKeywordMarkerFilter} before {@link PersianStemFilter}.
- *
- * @param stopwords a stopword set
- * @param stemExclusionSet a set of terms not to be stemmed
- */
- public PersianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -136,11 +121,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
* the order here is important: the stopword list is normalized with the
* above!
*/
- result = new StopFilter(result, stopwords);
- if (!stemExclusionSet.isEmpty()) {
- result = new SetKeywordMarkerFilter(result, stemExclusionSet);
- }
- return new TokenStreamComponents(source, new PersianStemFilter(result));
+ return new TokenStreamComponents(source, new StopFilter(result, stopwords));
}
@Override
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
index 983dd077347..5b7716cf124 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
@@ -32,7 +32,14 @@ public class TestPersianStemFilter extends BaseTokenStreamTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
- a = new PersianAnalyzer();
+ a =
+ new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ final Tokenizer source = new MockTokenizer();
+ return new TokenStreamComponents(source, new PersianStemFilter(source));
+ }
+ };
}
@Override