You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/01/29 07:24:38 UTC
[lucene-solr] branch master updated: LUCENE-9706: Hunspell: support
NEEDAFFIX flag on affixes (#2262)
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 4ba78f2 LUCENE-9706: Hunspell: support NEEDAFFIX flag on affixes (#2262)
4ba78f2 is described below
commit 4ba78f2ab25442c026623b62dc27c60347fd99d6
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Fri Jan 29 08:24:23 2021 +0100
LUCENE-9706: Hunspell: support NEEDAFFIX flag on affixes (#2262)
---
.../org/apache/lucene/analysis/hunspell/Dictionary.java | 2 +-
.../org/apache/lucene/analysis/hunspell/Stemmer.java | 17 +++++++++++++++--
.../lucene/analysis/hunspell/SpellCheckerTest.java | 5 +++++
.../org/apache/lucene/analysis/hunspell/needaffix5.aff | 13 +++++++++++++
.../org/apache/lucene/analysis/hunspell/needaffix5.dic | 2 ++
.../org/apache/lucene/analysis/hunspell/needaffix5.good | 11 +++++++++++
.../apache/lucene/analysis/hunspell/needaffix5.wrong | 3 +++
7 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index d9473a9..6d7638b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -76,7 +76,7 @@ public class Dictionary {
static final char[] NOFLAGS = new char[0];
- static final int FLAG_UNSET = 0;
+ static final char FLAG_UNSET = (char) 0;
private static final int DEFAULT_FLAGS = 65510;
private static final char HIDDEN_FLAG = (char) 65511; // called 'ONLYUPCASEFLAG' in Hunspell
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index 6b6fb80..572473c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -191,7 +191,7 @@ final class Stemmer {
length,
context,
-1,
- (char) 0,
+ Dictionary.FLAG_UNSET,
-1,
0,
true,
@@ -361,6 +361,7 @@ final class Stemmer {
pureAffix ? length - i : strippedWord.length,
context,
prefix,
+ previous,
-1,
recursionDepth,
true,
@@ -413,6 +414,7 @@ final class Stemmer {
pureAffix ? i : strippedWord.length,
context,
suffix,
+ previous,
prefixId,
recursionDepth,
false,
@@ -543,6 +545,7 @@ final class Stemmer {
int length,
WordContext context,
int affix,
+ int previousAffix,
int prefixId,
int recursionDepth,
boolean prefix,
@@ -553,7 +556,8 @@ final class Stemmer {
List<CharsRef> stems = new ArrayList<>();
- IntsRef forms = dictionary.lookupWord(strippedWord, offset, length);
+ boolean skipLookup = needsAnotherAffix(affix, previousAffix, !prefix);
+ IntsRef forms = skipLookup ? null : dictionary.lookupWord(strippedWord, offset, length);
if (forms != null) {
for (int i = 0; i < forms.length; i += formStep) {
char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
@@ -651,6 +655,15 @@ final class Stemmer {
return stems;
}
+ private boolean needsAnotherAffix(int affix, int previousAffix, boolean isSuffix) {
+ if (isFlagAppendedByAffix(affix, dictionary.needaffix)) {
+ return !isSuffix
+ || previousAffix < 0
+ || isFlagAppendedByAffix(previousAffix, dictionary.needaffix);
+ }
+ return false;
+ }
+
private boolean isFlagAppendedByAffix(int affixId, char flag) {
if (affixId < 0 || flag == Dictionary.FLAG_UNSET) return false;
int appendId = dictionary.affixData(affixId, Dictionary.AFFIX_APPEND);
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
index 30ceb58..dbfbbec 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
@@ -46,6 +46,11 @@ public class SpellCheckerTest extends StemmerTestBase {
doTest("i53643");
}
+ @Test
+ public void needAffixOnAffixes() throws Exception {
+ doTest("needaffix5");
+ }
+
public void testBreak() throws Exception {
doTest("break");
}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.aff
new file mode 100644
index 0000000..6399a3e
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.aff
@@ -0,0 +1,13 @@
+# on affixes
+NEEDAFFIX X
+
+SFX A Y 2
+SFX A 0 suf/B .
+SFX A 0 pseudosuf/XB .
+
+SFX B Y 1
+SFX B 0 bar .
+
+PFX C Y 2
+PFX C 0 pre .
+PFX C 0 pseudopre/X .
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.dic
new file mode 100644
index 0000000..83131e2
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.dic
@@ -0,0 +1,2 @@
+1
+foo/AC
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.good
new file mode 100644
index 0000000..d1b86bf
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.good
@@ -0,0 +1,11 @@
+foo
+prefoo
+foosuf
+prefoosuf
+foosufbar
+prefoosufbar
+pseudoprefoosuf
+pseudoprefoosufbar
+pseudoprefoopseudosufbar
+prefoopseudosuf
+prefoopseudosufbar
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.wrong
new file mode 100644
index 0000000..fdd1797
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix5.wrong
@@ -0,0 +1,3 @@
+pseudoprefoo
+foopseudosuf
+pseudoprefoopseudosuf