You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/02/08 09:56:25 UTC
[lucene-solr] branch master updated: LUCENE-9735: Hunspell: speed up flag checks by avoiding allocations (#2315)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 061233c  LUCENE-9735: Hunspell: speed up flag checks by avoiding allocations (#2315)
061233c is described below

commit 061233ca4ee5baad0d2c531297c51fbc665c2844
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Mon Feb 8 10:56:10 2021 +0100

    LUCENE-9735: Hunspell: speed up flag checks by avoiding allocations (#2315)
---
 .../analysis/hunspell/CheckCompoundPattern.java    |   4 +-
 .../lucene/analysis/hunspell/CompoundRule.java     |  24 +++--
 .../lucene/analysis/hunspell/Dictionary.java       | 103 +++++++--------------
 .../lucene/analysis/hunspell/FlagEnumerator.java   |  86 +++++++++++++++++
 .../lucene/analysis/hunspell/SpellChecker.java     |  15 ++-
 .../apache/lucene/analysis/hunspell/Stemmer.java   |  54 +++++------
 .../lucene/analysis/hunspell/TestDictionary.java   |  25 +++--
 .../lucene/analysis/hunspell/TestPerformance.java  |   4 +-
 8 files changed, 178 insertions(+), 137 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CheckCompoundPattern.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CheckCompoundPattern.java
index 3d70591..b1c4b3d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CheckCompoundPattern.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CheckCompoundPattern.java
@@ -16,7 +16,6 @@
  */
 package org.apache.lucene.analysis.hunspell;
 
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
 
@@ -27,7 +26,6 @@ class CheckCompoundPattern {
   private final char[] endFlags;
   private final char[] beginFlags;
   private final Dictionary dictionary;
-  private final BytesRef scratch = new BytesRef();
 
   CheckCompoundPattern(
       String unparsed, Dictionary.FlagParsingStrategy strategy, Dictionary dictionary) {
@@ -93,7 +91,7 @@ class CheckCompoundPattern {
 
   private boolean hasAllFlags(char[] flags, IntsRef forms) {
     for (char flag : flags) {
-      if (!dictionary.hasFlag(forms, flag, scratch)) {
+      if (!dictionary.hasFlag(forms, flag)) {
         return false;
       }
     }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CompoundRule.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CompoundRule.java
index 0f89de8..726c1dc 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CompoundRule.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CompoundRule.java
@@ -17,7 +17,6 @@
 package org.apache.lucene.analysis.hunspell;
 
 import java.util.List;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
 
 class CompoundRule {
@@ -51,16 +50,15 @@ class CompoundRule {
     data = parsedFlags.toString().toCharArray();
   }
 
-  boolean mayMatch(List<IntsRef> words, BytesRef scratch) {
-    return match(words, 0, 0, scratch, false);
+  boolean mayMatch(List<IntsRef> words) {
+    return match(words, 0, 0, false);
   }
 
-  boolean fullyMatches(List<IntsRef> words, BytesRef scratch) {
-    return match(words, 0, 0, scratch, true);
+  boolean fullyMatches(List<IntsRef> words) {
+    return match(words, 0, 0, true);
   }
 
-  private boolean match(
-      List<IntsRef> words, int patternIndex, int wordIndex, BytesRef scratch, boolean fully) {
+  private boolean match(List<IntsRef> words, int patternIndex, int wordIndex, boolean fully) {
     if (patternIndex >= data.length) {
       return wordIndex >= words.size();
     }
@@ -71,12 +69,12 @@ class CompoundRule {
     char flag = data[patternIndex];
     if (patternIndex < data.length - 1 && data[patternIndex + 1] == '*') {
       int startWI = wordIndex;
-      while (wordIndex < words.size() && dictionary.hasFlag(words.get(wordIndex), flag, scratch)) {
+      while (wordIndex < words.size() && dictionary.hasFlag(words.get(wordIndex), flag)) {
         wordIndex++;
       }
 
       while (wordIndex >= startWI) {
-        if (match(words, patternIndex + 2, wordIndex, scratch, fully)) {
+        if (match(words, patternIndex + 2, wordIndex, fully)) {
           return true;
         }
 
@@ -86,16 +84,16 @@ class CompoundRule {
     }
 
     boolean currentWordMatches =
-        wordIndex < words.size() && dictionary.hasFlag(words.get(wordIndex), flag, scratch);
+        wordIndex < words.size() && dictionary.hasFlag(words.get(wordIndex), flag);
 
     if (patternIndex < data.length - 1 && data[patternIndex + 1] == '?') {
-      if (currentWordMatches && match(words, patternIndex + 2, wordIndex + 1, scratch, fully)) {
+      if (currentWordMatches && match(words, patternIndex + 2, wordIndex + 1, fully)) {
         return true;
       }
-      return match(words, patternIndex + 2, wordIndex, scratch, fully);
+      return match(words, patternIndex + 2, wordIndex, fully);
     }
 
-    return currentWordMatches && match(words, patternIndex + 1, wordIndex + 1, scratch, fully);
+    return currentWordMatches && match(words, patternIndex + 1, wordIndex + 1, fully);
   }
 
   @Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index ae1a3a1..95a4b83 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -52,8 +52,6 @@ import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
@@ -77,11 +75,11 @@ public class Dictionary {
 
   static final char FLAG_UNSET = (char) 0;
   private static final int DEFAULT_FLAGS = 65510;
-  private static final char HIDDEN_FLAG = (char) 65511; // called 'ONLYUPCASEFLAG' in Hunspell
+  static final char HIDDEN_FLAG = (char) 65511; // called 'ONLYUPCASEFLAG' in Hunspell
 
   // TODO: really for suffixes we should reverse the automaton and run them backwards
-  private static final String PREFIX_CONDITION_REGEX_PATTERN = "%s.*";
-  private static final String SUFFIX_CONDITION_REGEX_PATTERN = ".*%s";
+  private static final String PREFIX_CONDITION_REGEX = "%s.*";
+  private static final String SUFFIX_CONDITION_REGEX = ".*%s";
   private static final Pattern MORPH_KEY_PATTERN = Pattern.compile("\\s+(?=\\p{Alpha}{2}:)");
   static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
   CharsetDecoder decoder = replacingDecoder(DEFAULT_CHARSET);
@@ -106,7 +104,7 @@ public class Dictionary {
    * The list of unique flagsets (wordforms). theoretically huge, but practically small (for Polish
    * this is 756), otherwise humans wouldn't be able to deal with it either.
    */
-  BytesRefHash flagLookup = new BytesRefHash();
+  final FlagEnumerator.Lookup flagLookup;
 
   // the list of unique strip affixes.
   char[] stripData;
@@ -224,7 +222,6 @@ public class Dictionary {
     this.ignoreCase = ignoreCase;
     this.needsInputCleaning = ignoreCase;
     this.needsOutputCleaning = false; // set if we have an OCONV
-    flagLookup.add(new BytesRef()); // no flags -> ord 0
 
     Path tempPath = getDefaultTempDir(); // TODO: make this configurable?
     Path aff = Files.createTempFile(tempPath, "affix", "aff");
@@ -244,12 +241,14 @@ public class Dictionary {
 
       // pass 2: parse affixes
       aff2 = new BufferedInputStream(Files.newInputStream(aff));
-      readAffixFile(aff2, decoder);
+      FlagEnumerator flagEnumerator = new FlagEnumerator();
+      readAffixFile(aff2, decoder, flagEnumerator);
 
       // read dictionary entries
       IndexOutput unsorted = mergeDictionaries(tempDir, tempFileNamePrefix, dictionaries, decoder);
       String sortedFile = sortWordsOffline(tempDir, tempFileNamePrefix, unsorted);
-      words = readSortedDictionaries(tempDir, sortedFile);
+      words = readSortedDictionaries(tempDir, sortedFile, flagEnumerator);
+      flagLookup = flagEnumerator.finish();
       aliases = null; // no longer needed
       morphAliases = null; // no longer needed
       success = true;
@@ -321,7 +320,7 @@ public class Dictionary {
    * @param decoder CharsetDecoder to decode the content of the file
    * @throws IOException Can be thrown while reading from the InputStream
    */
-  private void readAffixFile(InputStream affixStream, CharsetDecoder decoder)
+  private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, FlagEnumerator flags)
       throws IOException, ParseException {
     TreeMap<String, List<Integer>> prefixes = new TreeMap<>();
     TreeMap<String, List<Integer>> suffixes = new TreeMap<>();
@@ -351,11 +350,9 @@ public class Dictionary {
       } else if ("AM".equals(firstWord)) {
         parseMorphAlias(line);
       } else if ("PFX".equals(firstWord)) {
-        parseAffix(
-            prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
+        parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX, seenPatterns, seenStrips, flags);
       } else if ("SFX".equals(firstWord)) {
-        parseAffix(
-            suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
+        parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX, seenPatterns, seenStrips, flags);
       } else if (line.equals("COMPLEXPREFIXES")) {
         complexPrefixes =
             true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
@@ -583,15 +580,15 @@ public class Dictionary {
       LineNumberReader reader,
       String conditionPattern,
       Map<String, Integer> seenPatterns,
-      Map<String, Integer> seenStrips)
+      Map<String, Integer> seenStrips,
+      FlagEnumerator flags)
       throws IOException, ParseException {
 
-    BytesRefBuilder scratch = new BytesRefBuilder();
     StringBuilder sb = new StringBuilder();
     String[] args = header.split("\\s+");
 
     boolean crossProduct = args[2].equals("Y");
-    boolean isSuffix = conditionPattern.equals(SUFFIX_CONDITION_REGEX_PATTERN);
+    boolean isSuffix = conditionPattern.equals(SUFFIX_CONDITION_REGEX);
 
     int numLines = Integer.parseInt(args[3]);
     affixData = ArrayUtil.grow(affixData, currentAffix * 4 + numLines * 4);
@@ -617,7 +614,6 @@ public class Dictionary {
         }
 
         appendFlags = flagParsingStrategy.parseFlags(flagPart);
-        Arrays.sort(appendFlags);
         twoStageAffix = true;
       }
       // zero affix -> empty string
@@ -676,8 +672,7 @@ public class Dictionary {
         appendFlags = NOFLAGS;
       }
 
-      encodeFlags(scratch, appendFlags);
-      int appendFlagsOrd = flagLookup.add(scratch.get());
+      int appendFlagsOrd = flags.add(appendFlags);
       if (appendFlagsOrd < 0) {
         // already exists in our hash
         appendFlagsOrd = (-appendFlagsOrd) - 1;
@@ -1064,10 +1059,11 @@ public class Dictionary {
     return sorted;
   }
 
-  private FST<IntsRef> readSortedDictionaries(Directory tempDir, String sorted) throws IOException {
+  private FST<IntsRef> readSortedDictionaries(
+      Directory tempDir, String sorted, FlagEnumerator flags) throws IOException {
     boolean success = false;
 
-    EntryGrouper grouper = new EntryGrouper();
+    EntryGrouper grouper = new EntryGrouper(flags);
 
     try (ByteSequencesReader reader =
         new ByteSequencesReader(tempDir.openChecksumInput(sorted, IOContext.READONCE), sorted)) {
@@ -1104,7 +1100,6 @@ public class Dictionary {
             wordForm = ArrayUtil.growExact(wordForm, wordForm.length + 1);
             wordForm[wordForm.length - 1] = HIDDEN_FLAG;
           }
-          Arrays.sort(wordForm);
           entry = line.substring(0, flagSep);
         }
         // we possibly have morphological data
@@ -1191,9 +1186,13 @@ public class Dictionary {
         new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, IntSequenceOutputs.getSingleton());
     private final List<char[]> group = new ArrayList<>();
     private final List<Integer> stemExceptionIDs = new ArrayList<>();
-    private final BytesRefBuilder flagsScratch = new BytesRefBuilder();
     private final IntsRefBuilder scratchInts = new IntsRefBuilder();
     private String currentEntry = null;
+    private final FlagEnumerator flagEnumerator;
+
+    EntryGrouper(FlagEnumerator flagEnumerator) {
+      this.flagEnumerator = flagEnumerator;
+    }
 
     void add(String entry, char[] flags, int stemExceptionID) throws IOException {
       if (!entry.equals(currentEntry)) {
@@ -1229,12 +1228,7 @@ public class Dictionary {
           continue;
         }
 
-        encodeFlags(flagsScratch, flags);
-        int ord = flagLookup.add(flagsScratch.get());
-        if (ord < 0) {
-          ord = -ord - 1; // already exists in our hash
-        }
-        currentOrds.append(ord);
+        currentOrds.append(flagEnumerator.add(flags));
         if (hasStemExceptions) {
           currentOrds.append(stemExceptionIDs.get(i));
         }
@@ -1248,34 +1242,13 @@ public class Dictionary {
     }
   }
 
-  static boolean hasHiddenFlag(char[] flags) {
-    return hasFlag(flags, HIDDEN_FLAG);
-  }
-
-  char[] decodeFlags(int entryId, BytesRef b) {
-    this.flagLookup.get(entryId, b);
-
-    if (b.length == 0) {
-      return CharsRef.EMPTY_CHARS;
-    }
-    int len = b.length >>> 1;
-    char[] flags = new char[len];
-    int upto = 0;
-    int end = b.offset + b.length;
-    for (int i = b.offset; i < end; i += 2) {
-      flags[upto++] = (char) ((b.bytes[i] << 8) | (b.bytes[i + 1] & 0xff));
-    }
-    return flags;
-  }
-
-  private static void encodeFlags(BytesRefBuilder b, char[] flags) {
-    int len = flags.length << 1;
-    b.grow(len);
-    b.clear();
-    for (int flag : flags) {
-      b.append((byte) ((flag >> 8) & 0xff));
-      b.append((byte) (flag & 0xff));
+  private static boolean hasHiddenFlag(char[] flags) {
+    for (char flag : flags) {
+      if (flag == HIDDEN_FLAG) {
+        return true;
+      }
     }
+    return false;
   }
 
   private void parseAlias(String line) {
@@ -1341,18 +1314,18 @@ public class Dictionary {
         .collect(Collectors.toList());
   }
 
-  boolean isForbiddenWord(char[] word, int length, BytesRef scratch) {
+  boolean isForbiddenWord(char[] word, int length) {
     if (forbiddenword != FLAG_UNSET) {
       IntsRef forms = lookupWord(word, 0, length);
-      return forms != null && hasFlag(forms, forbiddenword, scratch);
+      return forms != null && hasFlag(forms, forbiddenword);
     }
     return false;
   }
 
-  boolean hasFlag(IntsRef forms, char flag, BytesRef scratch) {
+  boolean hasFlag(IntsRef forms, char flag) {
     int formStep = formStep();
     for (int i = 0; i < forms.length; i += formStep) {
-      if (hasFlag(forms.ints[forms.offset + i], flag, scratch)) {
+      if (hasFlag(forms.ints[forms.offset + i], flag)) {
         return true;
       }
     }
@@ -1468,12 +1441,8 @@ public class Dictionary {
     }
   }
 
-  boolean hasFlag(int entryId, char flag, BytesRef scratch) {
-    return flag != FLAG_UNSET && hasFlag(decodeFlags(entryId, scratch), flag);
-  }
-
-  static boolean hasFlag(char[] flags, char flag) {
-    return flag != FLAG_UNSET && Arrays.binarySearch(flags, flag) >= 0;
+  boolean hasFlag(int entryId, char flag) {
+    return flagLookup.hasFlag(entryId, flag);
   }
 
   CharSequence cleanInput(CharSequence input, StringBuilder reuse) {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/FlagEnumerator.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/FlagEnumerator.java
new file mode 100644
index 0000000..57aac40
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/FlagEnumerator.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.hunspell;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.lucene.util.ArrayUtil;
+
+/**
+ * A structure similar to {@link org.apache.lucene.util.BytesRefHash}, but specialized for sorted
+ * char sequences used for Hunspell flags. It deduplicates flag sequences, gives them unique ids,
+ * stores the sequences in a contiguous char[] (via {@link #finish()} and allows to query presence
+ * of the flags later via {@link Lookup#hasFlag}.
+ */
+class FlagEnumerator {
+  private final StringBuilder builder = new StringBuilder();
+  private final Map<String, Integer> indices = new HashMap<>();
+
+  FlagEnumerator() {
+    add(new char[0]); // no flags -> ord 0
+  }
+
+  int add(char[] chars) {
+    Arrays.sort(chars);
+    String key = new String(chars);
+    if (key.length() > Character.MAX_VALUE) {
+      throw new IllegalArgumentException("Too many flags: " + key);
+    }
+
+    Integer existing = indices.get(key);
+    if (existing != null) {
+      return existing;
+    }
+
+    int result = builder.length();
+    indices.put(key, result);
+    builder.append((char) key.length());
+    builder.append(key);
+    return result;
+  }
+
+  Lookup finish() {
+    char[] result = new char[builder.length()];
+    builder.getChars(0, builder.length(), result, 0);
+    return new Lookup(result);
+  }
+
+  static class Lookup {
+    private final char[] data;
+
+    private Lookup(char[] data) {
+      this.data = data;
+    }
+
+    boolean hasFlag(int entryId, char flag) {
+      if (entryId < 0 || flag == Dictionary.FLAG_UNSET) return false;
+
+      int length = data[entryId];
+      for (int i = entryId + 1; i < entryId + 1 + length; i++) {
+        char c = data[i];
+        if (c == flag) return true;
+        if (c > flag) return false;
+      }
+      return false;
+    }
+
+    char[] getFlags(int entryId) {
+      return ArrayUtil.copyOfSubArray(data, entryId + 1, entryId + 1 + data[entryId]);
+    }
+  }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java
index 53bf53e..e694f4f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java
@@ -26,7 +26,6 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
 
@@ -38,7 +37,6 @@ import org.apache.lucene.util.IntsRef;
 public class SpellChecker {
   final Dictionary dictionary;
   final Stemmer stemmer;
-  private final BytesRef scratch = new BytesRef();
 
   public SpellChecker(Dictionary dictionary) {
     this.dictionary = dictionary;
@@ -66,7 +64,7 @@ public class SpellChecker {
     }
 
     char[] wordChars = word.toCharArray();
-    if (dictionary.isForbiddenWord(wordChars, wordChars.length, scratch)) {
+    if (dictionary.isForbiddenWord(wordChars, wordChars.length)) {
       return false;
     }
 
@@ -135,7 +133,7 @@ public class SpellChecker {
   }
 
   Boolean checkSimpleWord(char[] wordChars, int length, WordCase originalCase) {
-    if (dictionary.isForbiddenWord(wordChars, length, scratch)) {
+    if (dictionary.isForbiddenWord(wordChars, length)) {
       return false;
     }
 
@@ -253,7 +251,7 @@ public class SpellChecker {
     if (originalCase == WordCase.TITLE || originalCase == WordCase.UPPER) return false;
 
     IntsRef forms = dictionary.lookupWord(chars, offset, length);
-    return forms != null && dictionary.hasFlag(forms, dictionary.forceUCase, scratch);
+    return forms != null && dictionary.hasFlag(forms, dictionary.forceUCase);
   }
 
   private boolean equalsIgnoreCase(CharsRef cr1, CharsRef cr2) {
@@ -341,7 +339,7 @@ public class SpellChecker {
         words.add(forms);
 
         if (dictionary.compoundRules != null
-            && dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words, scratch))) {
+            && dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) {
           if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
             return true;
           }
@@ -364,8 +362,7 @@ public class SpellChecker {
     if (forms == null) return false;
 
     words.add(forms);
-    boolean result =
-        dictionary.compoundRules.stream().anyMatch(r -> r.fullyMatches(words, scratch));
+    boolean result = dictionary.compoundRules.stream().anyMatch(r -> r.fullyMatches(words));
     words.remove(words.size() - 1);
     return result;
   }
@@ -474,7 +471,7 @@ public class SpellChecker {
         if (!spell(chunk)) {
           for (String chunkSug : suggest(chunk)) {
             String replaced = word.substring(0, chunkStart) + chunkSug + word.substring(chunkEnd);
-            if (!dictionary.isForbiddenWord(replaced.toCharArray(), replaced.length(), scratch)) {
+            if (!dictionary.isForbiddenWord(replaced.toCharArray(), replaced.length())) {
               result.add(replaced);
             }
           }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index 44e2675..9c5afc7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -24,7 +24,6 @@ import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@@ -37,7 +36,6 @@ import org.apache.lucene.util.fst.FST;
  */
 final class Stemmer {
   private final Dictionary dictionary;
-  private final BytesRef scratch = new BytesRef();
   private final StringBuilder segment = new StringBuilder();
 
   // used for normalization
@@ -96,7 +94,7 @@ final class Stemmer {
       word = scratchBuffer;
     }
 
-    if (dictionary.isForbiddenWord(word, length, scratch)) {
+    if (dictionary.isForbiddenWord(word, length)) {
       return Collections.emptyList();
     }
 
@@ -251,25 +249,25 @@ final class Stemmer {
     IntsRef forms = dictionary.lookupWord(word, offset, length);
     if (forms != null) {
       for (int i = 0; i < forms.length; i += formStep) {
-        char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
-        if (!acceptCase(originalCase, wordFlags, word, offset, length)) {
+        int entryId = forms.ints[forms.offset + i];
+        if (!acceptCase(originalCase, entryId, word, offset, length)) {
           continue;
         }
         // we can't add this form, it's a pseudostem requiring an affix
-        if (Dictionary.hasFlag(wordFlags, dictionary.needaffix)) {
+        if (dictionary.hasFlag(entryId, dictionary.needaffix)) {
           continue;
         }
         // we can't add this form, it only belongs inside a compound word
-        if (!context.isCompound() && Dictionary.hasFlag(wordFlags, dictionary.onlyincompound)) {
+        if (!context.isCompound() && dictionary.hasFlag(entryId, dictionary.onlyincompound)) {
           continue;
         }
         if (context.isCompound()) {
           if (context != WordContext.COMPOUND_END
-              && Dictionary.hasFlag(wordFlags, dictionary.compoundForbid)) {
+              && dictionary.hasFlag(entryId, dictionary.compoundForbid)) {
             return false;
           }
-          if (!Dictionary.hasFlag(wordFlags, dictionary.compoundFlag)
-              && !Dictionary.hasFlag(wordFlags, context.requiredFlag(dictionary))) {
+          if (!dictionary.hasFlag(entryId, dictionary.compoundFlag)
+              && !dictionary.hasFlag(entryId, context.requiredFlag(dictionary))) {
             continue;
           }
         }
@@ -300,8 +298,8 @@ final class Stemmer {
   }
 
   private boolean acceptCase(
-      WordCase originalCase, char[] wordFlags, char[] word, int offset, int length) {
-    boolean keepCase = Dictionary.hasFlag(wordFlags, dictionary.keepcase);
+      WordCase originalCase, int entryId, char[] word, int offset, int length) {
+    boolean keepCase = dictionary.hasFlag(entryId, dictionary.keepcase);
     if (originalCase != null) {
       if (keepCase
           && dictionary.checkSharpS
@@ -311,7 +309,7 @@ final class Stemmer {
       }
       return !keepCase;
     }
-    return !Dictionary.hasHiddenFlag(wordFlags);
+    return !dictionary.hasFlag(entryId, Dictionary.HIDDEN_FLAG);
   }
 
   private boolean containsSharpS(char[] word, int offset, int length) {
@@ -593,32 +591,30 @@ final class Stemmer {
     int append = dictionary.affixData(affix, Dictionary.AFFIX_APPEND);
 
     if (context.isCompound()) {
-      if (!isPrefix && dictionary.hasFlag(append, dictionary.compoundForbid, scratch)) {
+      if (!isPrefix && dictionary.hasFlag(append, dictionary.compoundForbid)) {
         return false;
       }
       WordContext allowed = isPrefix ? WordContext.COMPOUND_BEGIN : WordContext.COMPOUND_END;
-      if (context != allowed && !dictionary.hasFlag(append, dictionary.compoundPermit, scratch)) {
+      if (context != allowed && !dictionary.hasFlag(append, dictionary.compoundPermit)) {
         return false;
       }
       if (context == WordContext.COMPOUND_END
           && !isPrefix
           && !previousWasPrefix
-          && dictionary.hasFlag(append, dictionary.onlyincompound, scratch)) {
+          && dictionary.hasFlag(append, dictionary.onlyincompound)) {
         return false;
       }
     }
 
     if (recursionDepth == 0) {
       // check if affix is allowed in a non-compound word
-      return context.isCompound()
-          || !dictionary.hasFlag(append, dictionary.onlyincompound, scratch);
+      return context.isCompound() || !dictionary.hasFlag(append, dictionary.onlyincompound);
     }
 
     if (isCrossProduct(affix)) {
       // cross check incoming continuation class (flag of previous affix) against list.
-      char[] appendFlags = dictionary.decodeFlags(append, scratch);
-      if (context.isCompound() || !Dictionary.hasFlag(appendFlags, dictionary.onlyincompound)) {
-        return previousWasPrefix || Dictionary.hasFlag(appendFlags, prevFlag);
+      if (context.isCompound() || !dictionary.hasFlag(append, dictionary.onlyincompound)) {
+        return previousWasPrefix || dictionary.hasFlag(append, prevFlag);
       }
     }
 
@@ -686,15 +682,15 @@ final class Stemmer {
     IntsRef forms = skipLookup ? null : dictionary.lookupWord(strippedWord, offset, length);
     if (forms != null) {
       for (int i = 0; i < forms.length; i += formStep) {
-        char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
-        if (Dictionary.hasFlag(wordFlags, flag) || isFlagAppendedByAffix(prefixId, flag)) {
+        int entryId = forms.ints[forms.offset + i];
+        if (dictionary.hasFlag(entryId, flag) || isFlagAppendedByAffix(prefixId, flag)) {
           // confusing: in this one exception, we already chained the first prefix against the
           // second,
           // so it doesnt need to be checked against the word
           boolean chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
           if (!chainedPrefix && prefixId >= 0) {
             char prefixFlag = dictionary.affixData(prefixId, Dictionary.AFFIX_FLAG);
-            if (!Dictionary.hasFlag(wordFlags, prefixFlag)
+            if (!dictionary.hasFlag(entryId, prefixFlag)
                 && !isFlagAppendedByAffix(affix, prefixFlag)) {
               continue;
             }
@@ -710,17 +706,17 @@ final class Stemmer {
           }
 
           // we are looking for a case variant, but this word does not allow it
-          if (!acceptCase(originalCase, wordFlags, strippedWord, offset, length)) {
+          if (!acceptCase(originalCase, entryId, strippedWord, offset, length)) {
             continue;
           }
-          if (!context.isCompound() && Dictionary.hasFlag(wordFlags, dictionary.onlyincompound)) {
+          if (!context.isCompound() && dictionary.hasFlag(entryId, dictionary.onlyincompound)) {
             continue;
           }
           if (context.isCompound()) {
             char cFlag = context.requiredFlag(dictionary);
-            if (!Dictionary.hasFlag(wordFlags, cFlag)
+            if (!dictionary.hasFlag(entryId, cFlag)
                 && !isFlagAppendedByAffix(affix, cFlag)
-                && !Dictionary.hasFlag(wordFlags, dictionary.compoundFlag)
+                && !dictionary.hasFlag(entryId, dictionary.compoundFlag)
                 && !isFlagAppendedByAffix(affix, dictionary.compoundFlag)) {
               continue;
             }
@@ -798,7 +794,7 @@ final class Stemmer {
   private boolean isFlagAppendedByAffix(int affixId, char flag) {
     if (affixId < 0 || flag == Dictionary.FLAG_UNSET) return false;
     int appendId = dictionary.affixData(affixId, Dictionary.AFFIX_APPEND);
-    return dictionary.hasFlag(appendId, flag, scratch);
+    return dictionary.hasFlag(appendId, flag);
   }
 
   private boolean isCrossProduct(int affix) {
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
index 8c4bc30..f2dcbe0 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
@@ -24,7 +24,6 @@ import java.nio.charset.StandardCharsets;
 import java.text.ParseException;
 import org.apache.lucene.store.ByteBuffersDirectory;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
@@ -46,16 +45,20 @@ public class TestDictionary extends LuceneTestCase {
     assertNotNull(ordList);
     assertEquals(1, ordList.length);
 
-    BytesRef ref = new BytesRef();
-    char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
-    assertEquals(1, flags.length);
+    assertEquals('B', assertSingleFlag(dictionary, ordList));
 
     int offset = random().nextInt(10);
     ordList = dictionary.lookupWord((" ".repeat(offset) + "lucen").toCharArray(), offset, 5);
     assertNotNull(ordList);
     assertEquals(1, ordList.length);
-    flags = dictionary.decodeFlags(ordList.ints[0], ref);
+    assertEquals('A', assertSingleFlag(dictionary, ordList));
+  }
+
+  private static char assertSingleFlag(Dictionary dictionary, IntsRef ordList) {
+    int entryId = ordList.ints[0];
+    char[] flags = dictionary.flagLookup.getFlags(entryId);
     assertEquals(1, flags.length);
+    return flags[0];
   }
 
   public void testCompressedDictionary() throws Exception {
@@ -63,9 +66,7 @@ public class TestDictionary extends LuceneTestCase {
     assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
     assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
     IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
-    BytesRef ref = new BytesRef();
-    char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
-    assertEquals(1, flags.length);
+    assertSingleFlag(dictionary, ordList);
   }
 
   public void testCompressedBeforeSetDictionary() throws Exception {
@@ -73,9 +74,7 @@ public class TestDictionary extends LuceneTestCase {
     assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
     assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
     IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
-    BytesRef ref = new BytesRef();
-    char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
-    assertEquals(1, flags.length);
+    assertSingleFlag(dictionary, ordList);
   }
 
   public void testCompressedEmptyAliasDictionary() throws Exception {
@@ -83,9 +82,7 @@ public class TestDictionary extends LuceneTestCase {
     assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
     assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
     IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
-    BytesRef ref = new BytesRef();
-    char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
-    assertEquals(1, flags.length);
+    assertSingleFlag(dictionary, ordList);
   }
 
   // malformed rule causes ParseException
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestPerformance.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestPerformance.java
index e26cae7..f859262b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestPerformance.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestPerformance.java
@@ -59,12 +59,12 @@ public class TestPerformance extends LuceneTestCase {
 
   @Test
   public void de() throws Exception {
-    checkPerformance("de", 100_000);
+    checkPerformance("de", 200_000);
   }
 
   @Test
   public void fr() throws Exception {
-    checkPerformance("fr", 20_000);
+    checkPerformance("fr", 40_000);
   }
 
   private void checkPerformance(String code, int wordCount) throws Exception {