You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/02/10 08:34:49 UTC

[lucene-solr] branch master updated: LUCENE-9752: Hunspell Stemmer: reduce parameter count (#2333)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new c4c569b  LUCENE-9752: Hunspell Stemmer: reduce parameter count (#2333)
c4c569b is described below

commit c4c569b998bcea0c835ab852af21be254d68756b
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Wed Feb 10 09:34:36 2021 +0100

    LUCENE-9752: Hunspell Stemmer: reduce parameter count (#2333)
---
 .../apache/lucene/analysis/hunspell/Stemmer.java   | 41 ++++++----------------
 1 file changed, 11 insertions(+), 30 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index 159642d..3c8e186 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -289,8 +289,6 @@ final class Stemmer {
           -1,
           0,
           true,
-          true,
-          false,
           false,
           originalCase,
           processor);
@@ -408,12 +406,9 @@ final class Stemmer {
    *     checked against the word
    * @param recursionDepth current recursiondepth
    * @param doPrefix true if we should remove prefixes
-   * @param doSuffix true if we should remove suffixes
    * @param previousWasPrefix true if the previous removal was a prefix: if we are removing a
    *     suffix, and it has no continuation requirements, it's ok. but two prefixes
    *     (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse.
-   * @param circumfix true if the previous prefix removal was signed as a circumfix this means inner
-   *     most suffix must also contain circumfix flag.
    * @param originalCase if non-null, represents original word case to disallow case variations of
    *     word with KEEPCASE flags
    * @return whether the processing should be continued
@@ -428,9 +423,7 @@ final class Stemmer {
       int prefixId,
       int recursionDepth,
       boolean doPrefix,
-      boolean doSuffix,
       boolean previousWasPrefix,
-      boolean circumfix,
       WordCase originalCase,
       RootProcessor processor)
       throws IOException {
@@ -478,7 +471,6 @@ final class Stemmer {
                 -1,
                 recursionDepth,
                 true,
-                circumfix,
                 originalCase,
                 processor)) {
               return false;
@@ -488,7 +480,7 @@ final class Stemmer {
       }
     }
 
-    if (doSuffix && dictionary.suffixes != null) {
+    if (dictionary.suffixes != null) {
       FST<IntsRef> fst = dictionary.suffixes;
       FST.Arc<IntsRef> arc = suffixArcs[recursionDepth];
       fst.getFirstArc(arc);
@@ -533,7 +525,6 @@ final class Stemmer {
                 prefixId,
                 recursionDepth,
                 false,
-                circumfix,
                 originalCase,
                 processor)) {
               return false;
@@ -674,13 +665,12 @@ final class Stemmer {
       int prefixId,
       int recursionDepth,
       boolean prefix,
-      boolean circumfix,
       WordCase originalCase,
       RootProcessor processor)
       throws IOException {
     char flag = dictionary.affixData(affix, Dictionary.AFFIX_FLAG);
 
-    boolean skipLookup = needsAnotherAffix(affix, previousAffix, !prefix);
+    boolean skipLookup = needsAnotherAffix(affix, previousAffix, !prefix, prefixId);
     IntsRef forms = skipLookup ? null : dictionary.lookupWord(strippedWord, offset, length);
     if (forms != null) {
       for (int i = 0; i < forms.length; i += formStep) {
@@ -698,15 +688,6 @@ final class Stemmer {
             }
           }
 
-          // if circumfix was previously set by a prefix, we must check this suffix,
-          // to ensure it has it, and vice versa
-          if (dictionary.circumfix != Dictionary.FLAG_UNSET) {
-            boolean suffixCircumfix = isFlagAppendedByAffix(affix, dictionary.circumfix);
-            if (circumfix != suffixCircumfix) {
-              continue;
-            }
-          }
-
           // we are looking for a case variant, but this word does not allow it
           if (!acceptCase(originalCase, entryId, strippedWord, offset, length)) {
             continue;
@@ -730,12 +711,6 @@ final class Stemmer {
       }
     }
 
-    // if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we
-    // have that flag
-    if (dictionary.circumfix != Dictionary.FLAG_UNSET && !circumfix && prefix) {
-      circumfix = isFlagAppendedByAffix(affix, dictionary.circumfix);
-    }
-
     if (dictionary.isCrossProduct(affix) && recursionDepth <= 1) {
       boolean doPrefix;
       if (recursionDepth == 0) {
@@ -774,9 +749,7 @@ final class Stemmer {
           prefixId,
           recursionDepth + 1,
           doPrefix,
-          true,
           prefix,
-          circumfix,
           originalCase,
           processor);
     }
@@ -784,7 +757,15 @@ final class Stemmer {
     return true;
   }
 
-  private boolean needsAnotherAffix(int affix, int previousAffix, boolean isSuffix) {
+  private boolean needsAnotherAffix(int affix, int previousAffix, boolean isSuffix, int prefixId) {
+    char circumfix = dictionary.circumfix;
+    // if circumfix was previously set by a prefix, we must check this suffix,
+    // to ensure it has it, and vice versa
+    if (isSuffix
+        && isFlagAppendedByAffix(prefixId, circumfix) != isFlagAppendedByAffix(affix, circumfix)) {
+      return true;
+    }
+
     if (isFlagAppendedByAffix(affix, dictionary.needaffix)) {
       return !isSuffix
           || previousAffix < 0