You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2022/05/21 03:43:07 UTC
[lucene] branch main updated: LUCENE-10312: MIGRATE entry and small follow-ups (#908)

This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 71a9acb2e2a LUCENE-10312: MIGRATE entry and small follow-ups (#908)
71a9acb2e2a is described below

commit 71a9acb2e2aa55257021eefce1e5d8d390bc7048
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Sat May 21 12:43:02 2022 +0900

    LUCENE-10312: MIGRATE entry and small follow-ups (#908)
---
 lucene/MIGRATE.md                                                | 5 +++++
 .../src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java  | 4 ++--
 .../org/apache/lucene/analysis/fa/TestPersianStemFilter.java     | 9 ++++++++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md
index a6836166046..8a1c5c535bb 100644
--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@@ -19,6 +19,11 @@
 
 ## Migration from Lucene 9.x to Lucene 10.0
 
+### PersianStemFilter is added to PersianAnalyzer (LUCENE-10312)
+
+PersianAnalyzer now includes PersianStemFilter, that would change analysis results. If you need the exactly same analysis
+behaviour as 9.x, clone `PersianAnalyzer` in 9.x or create custom analyzer by using `CustomAnalyzer` on your own. 
+
 ### AutomatonQuery/CompiledAutomaton/RunAutomaton/RegExp no longer determinize (LUCENE-10010)
 
 These classes no longer take a `determinizeWorkLimit` and no longer determinize
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index afc41233906..2da9df1f3cc 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -121,8 +121,8 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
    *
    * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} built from a {@link
    *     StandardTokenizer} filtered with {@link LowerCaseFilter}, {@link DecimalDigitFilter},
-   *     {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} and Persian Stop
-   *     words
+   *     {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter}, Persian Stop words,
+   *     and {@link PersianStemFilter}.
    */
   @Override
   protected TokenStreamComponents createComponents(String fieldName) {
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
index 983dd077347..5b7716cf124 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
@@ -32,7 +32,14 @@ public class TestPersianStemFilter extends BaseTokenStreamTestCase {
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    a = new PersianAnalyzer();
+    a =
+        new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName) {
+            final Tokenizer source = new MockTokenizer();
+            return new TokenStreamComponents(source, new PersianStemFilter(source));
+          }
+        };
   }
 
   @Override