You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2022/05/19 12:45:35 UTC
[lucene] branch branch_9_2 updated: LUCENE-10312: Revert changes in PersianAnalyzer from 9x (#904)

This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch branch_9_2
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9_2 by this push:
     new ba8c3a806ad LUCENE-10312: Revert changes in PersianAnalyzer from 9x (#904)
ba8c3a806ad is described below

commit ba8c3a806ada3d7b3c34d408e449a92376a8481b
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Thu May 19 21:38:52 2022 +0900

    LUCENE-10312: Revert changes in PersianAnalyzer from 9x (#904)
---
 .../apache/lucene/analysis/fa/PersianAnalyzer.java  | 21 +--------------------
 .../lucene/analysis/fa/TestPersianStemFilter.java   |  9 ++++++++-
 2 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index afc41233906..f0202db90ec 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.IOUtils;
 
@@ -87,8 +86,6 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
     }
   }
 
-  private final CharArraySet stemExclusionSet;
-
   /** Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */
   public PersianAnalyzer() {
     this(DefaultSetHolder.DEFAULT_STOP_SET);
@@ -100,19 +97,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
    * @param stopwords a stopword set
    */
   public PersianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
-  }
-
-  /**
-   * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is provided
-   * this analyzer will add a {@link SetKeywordMarkerFilter} before {@link PersianStemFilter}.
-   *
-   * @param stopwords a stopword set
-   * @param stemExclusionSet a set of terms not to be stemmed
-   */
-  public PersianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
     super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
   }
 
   /**
@@ -136,11 +121,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
      * the order here is important: the stopword list is normalized with the
      * above!
      */
-    result = new StopFilter(result, stopwords);
-    if (!stemExclusionSet.isEmpty()) {
-      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
-    }
-    return new TokenStreamComponents(source, new PersianStemFilter(result));
+    return new TokenStreamComponents(source, new StopFilter(result, stopwords));
   }
 
   @Override
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
index 983dd077347..5b7716cf124 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
@@ -32,7 +32,14 @@ public class TestPersianStemFilter extends BaseTokenStreamTestCase {
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    a = new PersianAnalyzer();
+    a =
+        new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName) {
+            final Tokenizer source = new MockTokenizer();
+            return new TokenStreamComponents(source, new PersianStemFilter(source));
+          }
+        };
   }
 
   @Override