You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2023/05/19 19:36:13 UTC

[lucene] branch main updated: hunspell (minor): reduce allocations when processing compound rules (#12316)

This is an automated email from the ASF dual-hosted git repository.

donnerpeter pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new a454388b80e hunspell (minor): reduce allocations when processing compound rules (#12316)
a454388b80e is described below

commit a454388b80e2a92640b79792f2238acd0e7872b9
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Fri May 19 21:36:05 2023 +0200

    hunspell (minor): reduce allocations when processing compound rules (#12316)
---
 .../apache/lucene/analysis/hunspell/Dictionary.java    |  8 ++++----
 .../org/apache/lucene/analysis/hunspell/Hunspell.java  | 18 ++++++++++++++++--
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index 2249e70249e..b5d13271c3f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -155,7 +155,7 @@ public class Dictionary {
   boolean checkCompoundCase, checkCompoundDup, checkCompoundRep;
   boolean checkCompoundTriple, simplifiedTriple;
   int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
-  List<CompoundRule> compoundRules; // nullable
+  CompoundRule[] compoundRules; // nullable
   List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>();
 
   // ignored characters (dictionary, affix, inputs)
@@ -601,11 +601,11 @@ public class Dictionary {
     return parts;
   }
 
-  private List<CompoundRule> parseCompoundRules(LineNumberReader reader, int num)
+  private CompoundRule[] parseCompoundRules(LineNumberReader reader, int num)
       throws IOException, ParseException {
-    List<CompoundRule> compoundRules = new ArrayList<>();
+    CompoundRule[] compoundRules = new CompoundRule[num];
     for (int i = 0; i < num; i++) {
-      compoundRules.add(new CompoundRule(singleArgument(reader, reader.readLine()), this));
+      compoundRules[i] = new CompoundRule(singleArgument(reader, reader.readLine()), this);
     }
     return compoundRules;
   }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
index 1e2a1add13c..3b58e0f4f98 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
@@ -450,7 +450,7 @@ public class Hunspell {
       if (forms != null) {
         words.add(forms);
 
-        if (dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) {
+        if (mayHaveCompoundRule(words)) {
           if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
             return true;
           }
@@ -467,6 +467,15 @@ public class Hunspell {
     return false;
   }
 
+  private boolean mayHaveCompoundRule(List<IntsRef> words) {
+    for (CompoundRule rule : dictionary.compoundRules) {
+      if (rule.mayMatch(words)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   private boolean checkLastCompoundPart(
       char[] wordChars, int start, int length, List<IntsRef> words) {
     IntsRef ref = new IntsRef(new int[1], 0, 1);
@@ -475,7 +484,12 @@ public class Hunspell {
     Stemmer.RootProcessor stopOnMatching =
         (stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
           ref.ints[0] = formID;
-          return dictionary.compoundRules.stream().noneMatch(r -> r.fullyMatches(words));
+          for (CompoundRule r : dictionary.compoundRules) {
+            if (r.fullyMatches(words)) {
+              return false;
+            }
+          }
+          return true;
         };
     boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching);
     words.remove(words.size() - 1);