You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/02/10 08:34:49 UTC
[lucene-solr] branch master updated: LUCENE-9752: Hunspell Stemmer:
reduce parameter count (#2333)
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new c4c569b LUCENE-9752: Hunspell Stemmer: reduce parameter count (#2333)
c4c569b is described below
commit c4c569b998bcea0c835ab852af21be254d68756b
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Wed Feb 10 09:34:36 2021 +0100
LUCENE-9752: Hunspell Stemmer: reduce parameter count (#2333)
---
.../apache/lucene/analysis/hunspell/Stemmer.java | 41 ++++++----------------
1 file changed, 11 insertions(+), 30 deletions(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index 159642d..3c8e186 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -289,8 +289,6 @@ final class Stemmer {
-1,
0,
true,
- true,
- false,
false,
originalCase,
processor);
@@ -408,12 +406,9 @@ final class Stemmer {
* checked against the word
* @param recursionDepth current recursiondepth
* @param doPrefix true if we should remove prefixes
- * @param doSuffix true if we should remove suffixes
* @param previousWasPrefix true if the previous removal was a prefix: if we are removing a
* suffix, and it has no continuation requirements, it's ok. but two prefixes
* (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse.
- * @param circumfix true if the previous prefix removal was signed as a circumfix this means inner
- * most suffix must also contain circumfix flag.
* @param originalCase if non-null, represents original word case to disallow case variations of
* word with KEEPCASE flags
* @return whether the processing should be continued
@@ -428,9 +423,7 @@ final class Stemmer {
int prefixId,
int recursionDepth,
boolean doPrefix,
- boolean doSuffix,
boolean previousWasPrefix,
- boolean circumfix,
WordCase originalCase,
RootProcessor processor)
throws IOException {
@@ -478,7 +471,6 @@ final class Stemmer {
-1,
recursionDepth,
true,
- circumfix,
originalCase,
processor)) {
return false;
@@ -488,7 +480,7 @@ final class Stemmer {
}
}
- if (doSuffix && dictionary.suffixes != null) {
+ if (dictionary.suffixes != null) {
FST<IntsRef> fst = dictionary.suffixes;
FST.Arc<IntsRef> arc = suffixArcs[recursionDepth];
fst.getFirstArc(arc);
@@ -533,7 +525,6 @@ final class Stemmer {
prefixId,
recursionDepth,
false,
- circumfix,
originalCase,
processor)) {
return false;
@@ -674,13 +665,12 @@ final class Stemmer {
int prefixId,
int recursionDepth,
boolean prefix,
- boolean circumfix,
WordCase originalCase,
RootProcessor processor)
throws IOException {
char flag = dictionary.affixData(affix, Dictionary.AFFIX_FLAG);
- boolean skipLookup = needsAnotherAffix(affix, previousAffix, !prefix);
+ boolean skipLookup = needsAnotherAffix(affix, previousAffix, !prefix, prefixId);
IntsRef forms = skipLookup ? null : dictionary.lookupWord(strippedWord, offset, length);
if (forms != null) {
for (int i = 0; i < forms.length; i += formStep) {
@@ -698,15 +688,6 @@ final class Stemmer {
}
}
- // if circumfix was previously set by a prefix, we must check this suffix,
- // to ensure it has it, and vice versa
- if (dictionary.circumfix != Dictionary.FLAG_UNSET) {
- boolean suffixCircumfix = isFlagAppendedByAffix(affix, dictionary.circumfix);
- if (circumfix != suffixCircumfix) {
- continue;
- }
- }
-
// we are looking for a case variant, but this word does not allow it
if (!acceptCase(originalCase, entryId, strippedWord, offset, length)) {
continue;
@@ -730,12 +711,6 @@ final class Stemmer {
}
}
- // if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we
- // have that flag
- if (dictionary.circumfix != Dictionary.FLAG_UNSET && !circumfix && prefix) {
- circumfix = isFlagAppendedByAffix(affix, dictionary.circumfix);
- }
-
if (dictionary.isCrossProduct(affix) && recursionDepth <= 1) {
boolean doPrefix;
if (recursionDepth == 0) {
@@ -774,9 +749,7 @@ final class Stemmer {
prefixId,
recursionDepth + 1,
doPrefix,
- true,
prefix,
- circumfix,
originalCase,
processor);
}
@@ -784,7 +757,15 @@ final class Stemmer {
return true;
}
- private boolean needsAnotherAffix(int affix, int previousAffix, boolean isSuffix) {
+ private boolean needsAnotherAffix(int affix, int previousAffix, boolean isSuffix, int prefixId) {
+ char circumfix = dictionary.circumfix;
+ // if circumfix was previously set by a prefix, we must check this suffix,
+ // to ensure it has it, and vice versa
+ if (isSuffix
+ && isFlagAppendedByAffix(prefixId, circumfix) != isFlagAppendedByAffix(affix, circumfix)) {
+ return true;
+ }
+
if (isFlagAppendedByAffix(affix, dictionary.needaffix)) {
return !isSuffix
|| previousAffix < 0