You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2023/05/19 19:36:13 UTC
[lucene] branch main updated: hunspell (minor): reduce allocations when processing compound rules (#12316)
This is an automated email from the ASF dual-hosted git repository.
donnerpeter pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new a454388b80e hunspell (minor): reduce allocations when processing compound rules (#12316)
a454388b80e is described below
commit a454388b80e2a92640b79792f2238acd0e7872b9
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Fri May 19 21:36:05 2023 +0200
hunspell (minor): reduce allocations when processing compound rules (#12316)
---
.../apache/lucene/analysis/hunspell/Dictionary.java | 8 ++++----
.../org/apache/lucene/analysis/hunspell/Hunspell.java | 18 ++++++++++++++++--
2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index 2249e70249e..b5d13271c3f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -155,7 +155,7 @@ public class Dictionary {
boolean checkCompoundCase, checkCompoundDup, checkCompoundRep;
boolean checkCompoundTriple, simplifiedTriple;
int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
- List<CompoundRule> compoundRules; // nullable
+ CompoundRule[] compoundRules; // nullable
List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>();
// ignored characters (dictionary, affix, inputs)
@@ -601,11 +601,11 @@ public class Dictionary {
return parts;
}
- private List<CompoundRule> parseCompoundRules(LineNumberReader reader, int num)
+ private CompoundRule[] parseCompoundRules(LineNumberReader reader, int num)
throws IOException, ParseException {
- List<CompoundRule> compoundRules = new ArrayList<>();
+ CompoundRule[] compoundRules = new CompoundRule[num];
for (int i = 0; i < num; i++) {
- compoundRules.add(new CompoundRule(singleArgument(reader, reader.readLine()), this));
+ compoundRules[i] = new CompoundRule(singleArgument(reader, reader.readLine()), this);
}
return compoundRules;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
index 1e2a1add13c..3b58e0f4f98 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
@@ -450,7 +450,7 @@ public class Hunspell {
if (forms != null) {
words.add(forms);
- if (dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) {
+ if (mayHaveCompoundRule(words)) {
if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
return true;
}
@@ -467,6 +467,15 @@ public class Hunspell {
return false;
}
+ private boolean mayHaveCompoundRule(List<IntsRef> words) {
+ for (CompoundRule rule : dictionary.compoundRules) {
+ if (rule.mayMatch(words)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
private boolean checkLastCompoundPart(
char[] wordChars, int start, int length, List<IntsRef> words) {
IntsRef ref = new IntsRef(new int[1], 0, 1);
@@ -475,7 +484,12 @@ public class Hunspell {
Stemmer.RootProcessor stopOnMatching =
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
ref.ints[0] = formID;
- return dictionary.compoundRules.stream().noneMatch(r -> r.fullyMatches(words));
+ for (CompoundRule r : dictionary.compoundRules) {
+ if (r.fullyMatches(words)) {
+ return false;
+ }
+ }
+ return true;
};
boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching);
words.remove(words.size() - 1);