You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/01/15 12:52:55 UTC
[lucene-solr] branch master updated: LUCENE-9664: Hunspell support:
fix most IntelliJ warnings, cleanup (#2202)
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 82f6f16 LUCENE-9664: Hunspell support: fix most IntelliJ warnings, cleanup (#2202)
82f6f16 is described below
commit 82f6f161ae20c3a03ad2ac2ae4fed12425839f3c
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Fri Jan 15 13:52:34 2021 +0100
LUCENE-9664: Hunspell support: fix most IntelliJ warnings, cleanup (#2202)
---
.../lucene/analysis/hunspell/Dictionary.java | 151 ++++++++++-----------
.../apache/lucene/analysis/hunspell/Stemmer.java | 84 +++++-------
.../lucene/analysis/hunspell/TestDictionary.java | 38 +++---
3 files changed, 124 insertions(+), 149 deletions(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index a4b2f6c..b932bf8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -140,8 +140,6 @@ public class Dictionary {
// when set, some words have exceptional stems, and the last entry is a pointer to stemExceptions
boolean hasStemExceptions;
- private final Path tempPath = getDefaultTempDir(); // TODO: make this configurable?
-
boolean ignoreCase;
boolean complexPrefixes;
// if no affixes have continuation classes, no need to do 2-level affix stripping
@@ -210,6 +208,7 @@ public class Dictionary {
this.needsOutputCleaning = false; // set if we have an OCONV
flagLookup.add(new BytesRef()); // no flags -> ord 0
+ Path tempPath = getDefaultTempDir(); // TODO: make this configurable?
Path aff = Files.createTempFile(tempPath, "affix", "aff");
OutputStream out = new BufferedOutputStream(Files.newOutputStream(aff));
InputStream aff1 = null;
@@ -252,33 +251,33 @@ public class Dictionary {
}
/** Looks up Hunspell word forms from the dictionary */
- IntsRef lookupWord(char word[], int offset, int length) {
+ IntsRef lookupWord(char[] word, int offset, int length) {
return lookup(words, word, offset, length);
}
// only for testing
- IntsRef lookupPrefix(char word[], int offset, int length) {
- return lookup(prefixes, word, offset, length);
+ IntsRef lookupPrefix(char[] word) {
+ return lookup(prefixes, word, 0, word.length);
}
// only for testing
- IntsRef lookupSuffix(char word[], int offset, int length) {
- return lookup(suffixes, word, offset, length);
+ IntsRef lookupSuffix(char[] word) {
+ return lookup(suffixes, word, 0, word.length);
}
- IntsRef lookup(FST<IntsRef> fst, char word[], int offset, int length) {
+ IntsRef lookup(FST<IntsRef> fst, char[] word, int offset, int length) {
if (fst == null) {
return null;
}
final FST.BytesReader bytesReader = fst.getBytesReader();
- final FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
+ final FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<>());
// Accumulate output as we go
final IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
IntsRef output = NO_OUTPUT;
int l = offset + length;
try {
- for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
+ for (int i = offset, cp; i < l; i += Character.charCount(cp)) {
cp = Character.codePointAt(word, i, l);
if (fst.findTargetArc(cp, arc, arc, bytesReader) == null) {
return null;
@@ -320,7 +319,7 @@ public class Dictionary {
seenStrips.put("", 0);
LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
- String line = null;
+ String line;
while ((line = reader.readLine()) != null) {
// ignore any BOM marker on first line
if (reader.getLineNumber() == 1 && line.startsWith("\uFEFF")) {
@@ -344,31 +343,31 @@ public class Dictionary {
complexPrefixes =
true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
} else if (line.startsWith(CIRCUMFIX_KEY)) {
- String parts[] = line.split("\\s+");
+ String[] parts = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber());
}
circumfix = flagParsingStrategy.parseFlag(parts[1]);
} else if (line.startsWith(KEEPCASE_KEY)) {
- String parts[] = line.split("\\s+");
+ String[] parts = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal KEEPCASE declaration", reader.getLineNumber());
}
keepcase = flagParsingStrategy.parseFlag(parts[1]);
} else if (line.startsWith(NEEDAFFIX_KEY) || line.startsWith(PSEUDOROOT_KEY)) {
- String parts[] = line.split("\\s+");
+ String[] parts = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal NEEDAFFIX declaration", reader.getLineNumber());
}
needaffix = flagParsingStrategy.parseFlag(parts[1]);
} else if (line.startsWith(ONLYINCOMPOUND_KEY)) {
- String parts[] = line.split("\\s+");
+ String[] parts = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal ONLYINCOMPOUND declaration", reader.getLineNumber());
}
onlyincompound = flagParsingStrategy.parseFlag(parts[1]);
} else if (line.startsWith(IGNORE_KEY)) {
- String parts[] = line.split("\\s+");
+ String[] parts = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal IGNORE declaration", reader.getLineNumber());
}
@@ -376,7 +375,7 @@ public class Dictionary {
Arrays.sort(ignore);
needsInputCleaning = true;
} else if (line.startsWith(ICONV_KEY) || line.startsWith(OCONV_KEY)) {
- String parts[] = line.split("\\s+");
+ String[] parts = line.split("\\s+");
String type = parts[0];
if (parts.length != 2) {
throw new ParseException("Illegal " + type + " declaration", reader.getLineNumber());
@@ -475,10 +474,10 @@ public class Dictionary {
BytesRefBuilder scratch = new BytesRefBuilder();
StringBuilder sb = new StringBuilder();
- String args[] = header.split("\\s+");
+ String[] args = header.split("\\s+");
boolean crossProduct = args[2].equals("Y");
- boolean isSuffix = conditionPattern == SUFFIX_CONDITION_REGEX_PATTERN;
+ boolean isSuffix = conditionPattern.equals(SUFFIX_CONDITION_REGEX_PATTERN);
int numLines = Integer.parseInt(args[3]);
affixData = ArrayUtil.grow(affixData, (currentAffix << 3) + (numLines << 3));
@@ -488,7 +487,7 @@ public class Dictionary {
for (int i = 0; i < numLines; i++) {
assert affixWriter.getPosition() == currentAffix << 3;
String line = reader.readLine();
- String ruleArgs[] = line.split("\\s+");
+ String[] ruleArgs = line.split("\\s+");
// from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
// condition is optional
@@ -501,7 +500,7 @@ public class Dictionary {
char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
String strip = ruleArgs[2].equals("0") ? "" : ruleArgs[2];
String affixArg = ruleArgs[3];
- char appendFlags[] = null;
+ char[] appendFlags = null;
// first: parse continuation classes out of affix
int flagSep = affixArg.lastIndexOf('/');
@@ -585,7 +584,7 @@ public class Dictionary {
affixWriter.writeShort((short) flag);
affixWriter.writeShort((short) stripOrd.intValue());
// encode crossProduct into patternIndex
- int patternOrd = patternIndex.intValue() << 1 | (crossProduct ? 1 : 0);
+ int patternOrd = patternIndex << 1 | (crossProduct ? 1 : 0);
affixWriter.writeShort((short) patternOrd);
affixWriter.writeShort((short) appendFlagsOrd);
@@ -598,12 +597,7 @@ public class Dictionary {
affixArg = new StringBuilder(affixArg).reverse().toString();
}
- List<Integer> list = affixes.get(affixArg);
- if (list == null) {
- list = new ArrayList<>();
- affixes.put(affixArg, list);
- }
- list.add(currentAffix);
+ affixes.computeIfAbsent(affixArg, __ -> new ArrayList<>()).add(currentAffix);
currentAffix++;
}
}
@@ -614,7 +608,7 @@ public class Dictionary {
for (int i = 0; i < num; i++) {
String line = reader.readLine();
- String parts[] = line.split("\\s+");
+ String[] parts = line.split("\\s+");
if (parts.length != 3) {
throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
}
@@ -707,7 +701,7 @@ public class Dictionary {
* definition
*/
static FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
- String parts[] = flagLine.split("\\s+");
+ String[] parts = flagLine.split("\\s+");
if (parts.length != 2) {
throw new IllegalArgumentException("Illegal FLAG specification: " + flagLine);
}
@@ -724,11 +718,11 @@ public class Dictionary {
throw new IllegalArgumentException("Unknown flag type: " + flagType);
}
- final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping
- final char MORPH_SEPARATOR =
+ private static final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping
+ private static final char MORPH_SEPARATOR =
0x1e; // separator for boundary of entry (may be followed by morph data)
- String unescapeEntry(String entry) {
+ private String unescapeEntry(String entry) {
StringBuilder sb = new StringBuilder();
int end = morphBoundary(entry);
for (int i = 0; i < end; i++) {
@@ -738,9 +732,7 @@ public class Dictionary {
i++;
} else if (ch == '/') {
sb.append(FLAG_SEPARATOR);
- } else if (ch == MORPH_SEPARATOR || ch == FLAG_SEPARATOR) {
- // BINARY EXECUTABLES EMBEDDED IN ZULU DICTIONARIES!!!!!!!
- } else {
+ } else if (!shouldSkipEscapedChar(ch)) {
sb.append(ch);
}
}
@@ -748,9 +740,7 @@ public class Dictionary {
if (end < entry.length()) {
for (int i = end; i < entry.length(); i++) {
char c = entry.charAt(i);
- if (c == FLAG_SEPARATOR || c == MORPH_SEPARATOR) {
- // BINARY EXECUTABLES EMBEDDED IN ZULU DICTIONARIES!!!!!!!
- } else {
+ if (!shouldSkipEscapedChar(c)) {
sb.append(c);
}
}
@@ -758,6 +748,11 @@ public class Dictionary {
return sb.toString();
}
+ private static boolean shouldSkipEscapedChar(char ch) {
+ return ch == FLAG_SEPARATOR
+ || ch == MORPH_SEPARATOR; // BINARY EXECUTABLES EMBEDDED IN ZULU DICTIONARIES!!!!!!!
+ }
+
static int morphBoundary(String line) {
int end = indexOfSpaceOrTab(line, 0);
if (end == -1) {
@@ -812,9 +807,9 @@ public class Dictionary {
try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
for (InputStream dictionary : dictionaries) {
BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
- String line =
- lines.readLine(); // first line is number of entries (approximately, sometimes)
+ lines.readLine(); // first line is number of entries (approximately, sometimes)
+ String line;
while ((line = lines.readLine()) != null) {
// wild and unpredictable code comment rules
if (line.isEmpty()
@@ -825,7 +820,7 @@ public class Dictionary {
}
line = unescapeEntry(line);
// if we havent seen any stem exceptions, try to parse one
- if (hasStemExceptions == false) {
+ if (!hasStemExceptions) {
int morphStart = line.indexOf(MORPH_SEPARATOR);
if (morphStart >= 0 && morphStart < line.length()) {
hasStemExceptions = parseStemException(line.substring(morphStart + 1)) != null;
@@ -861,35 +856,28 @@ public class Dictionary {
new OfflineSorter(
tempDir,
tempFileNamePrefix,
- new Comparator<BytesRef>() {
- BytesRef scratch1 = new BytesRef();
- BytesRef scratch2 = new BytesRef();
-
- @Override
- public int compare(BytesRef o1, BytesRef o2) {
- scratch1.bytes = o1.bytes;
- scratch1.offset = o1.offset;
- scratch1.length = o1.length;
-
- for (int i = scratch1.length - 1; i >= 0; i--) {
- if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR
- || scratch1.bytes[scratch1.offset + i] == MORPH_SEPARATOR) {
- scratch1.length = i;
+ new Comparator<>() {
+ final BytesRef scratch1 = new BytesRef();
+ final BytesRef scratch2 = new BytesRef();
+
+ private void initScratch(BytesRef o, BytesRef scratch) {
+ scratch.bytes = o.bytes;
+ scratch.offset = o.offset;
+ scratch.length = o.length;
+
+ for (int i = scratch.length - 1; i >= 0; i--) {
+ if (scratch.bytes[scratch.offset + i] == FLAG_SEPARATOR
+ || scratch.bytes[scratch.offset + i] == MORPH_SEPARATOR) {
+ scratch.length = i;
break;
}
}
+ }
- scratch2.bytes = o2.bytes;
- scratch2.offset = o2.offset;
- scratch2.length = o2.length;
-
- for (int i = scratch2.length - 1; i >= 0; i--) {
- if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR
- || scratch2.bytes[scratch2.offset + i] == MORPH_SEPARATOR) {
- scratch2.length = i;
- break;
- }
- }
+ @Override
+ public int compare(BytesRef o1, BytesRef o2) {
+ initScratch(o1, scratch1);
+ initScratch(o2, scratch2);
int cmp = scratch1.compareTo(scratch2);
if (cmp == 0) {
@@ -933,7 +921,7 @@ public class Dictionary {
String line = scratch.utf8ToString();
String entry;
- char wordForm[];
+ char[] wordForm;
int end;
int flagSep = line.indexOf(FLAG_SEPARATOR);
@@ -980,7 +968,7 @@ public class Dictionary {
words.add(scratchInts.get(), currentOrds.get());
}
// swap current
- if (cmp > 0 || currentEntry == null) {
+ if (cmp > 0) {
currentEntry = entry;
currentOrds = new IntsRefBuilder(); // must be this way
}
@@ -994,6 +982,7 @@ public class Dictionary {
}
// finalize last entry
+ assert currentEntry != null;
Util.toUTF32(currentEntry, scratchInts);
words.add(scratchInts.get(), currentOrds.get());
success2 = true;
@@ -1011,7 +1000,7 @@ public class Dictionary {
return CharsRef.EMPTY_CHARS;
}
int len = b.length >>> 1;
- char flags[] = new char[len];
+ char[] flags = new char[len];
int upto = 0;
int end = b.offset + b.length;
for (int i = b.offset; i < end; i += 2) {
@@ -1020,19 +1009,18 @@ public class Dictionary {
return flags;
}
- static void encodeFlags(BytesRefBuilder b, char flags[]) {
+ private static void encodeFlags(BytesRefBuilder b, char[] flags) {
int len = flags.length << 1;
b.grow(len);
b.clear();
- for (int i = 0; i < flags.length; i++) {
- int flag = flags[i];
+ for (int flag : flags) {
b.append((byte) ((flag >> 8) & 0xff));
b.append((byte) (flag & 0xff));
}
}
private void parseAlias(String line) {
- String ruleArgs[] = line.split("\\s+");
+ String[] ruleArgs = line.split("\\s+");
if (aliases == null) {
// first line should be the aliases count
final int count = Integer.parseInt(ruleArgs[1]);
@@ -1102,7 +1090,7 @@ public class Dictionary {
* @return Parsed flag
*/
char parseFlag(String rawFlag) {
- char flags[] = parseFlags(rawFlag);
+ char[] flags = parseFlags(rawFlag);
if (flags.length != 1) {
throw new IllegalArgumentException("expected only one flag, got: " + rawFlag);
}
@@ -1140,9 +1128,9 @@ public class Dictionary {
char[] flags = new char[rawFlagParts.length];
int upto = 0;
- for (int i = 0; i < rawFlagParts.length; i++) {
+ for (String rawFlagPart : rawFlagParts) {
// note, removing the trailing X/leading I for nepali... what is the rule here?!
- String replacement = rawFlagParts[i].replaceAll("[^0-9]", "");
+ String replacement = rawFlagPart.replaceAll("[^0-9]", "");
// note, ignoring empty flags (this happens in danish, for example)
if (replacement.isEmpty()) {
continue;
@@ -1185,13 +1173,13 @@ public class Dictionary {
builder.append(combined);
}
- char flags[] = new char[builder.length()];
+ char[] flags = new char[builder.length()];
builder.getChars(0, builder.length(), flags, 0);
return flags;
}
}
- static boolean hasFlag(char flags[], char flag) {
+ static boolean hasFlag(char[] flags, char flag) {
return Arrays.binarySearch(flags, flag) >= 0;
}
@@ -1247,7 +1235,7 @@ public class Dictionary {
// TODO: this could be more efficient!
static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
final FST.BytesReader bytesReader = fst.getBytesReader();
- final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
+ final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<>());
final CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
// temporary stuff
@@ -1290,6 +1278,7 @@ public class Dictionary {
private static Path DEFAULT_TEMP_DIR;
/** Used by test framework */
+ @SuppressWarnings("unused")
public static void setDefaultTempDir(Path tempDir) {
DEFAULT_TEMP_DIR = tempDir;
}
@@ -1306,7 +1295,7 @@ public class Dictionary {
throw new IOException("Java has no temporary folder property (java.io.tmpdir)?");
}
Path tempDirectory = Paths.get(tempDirPath);
- if (Files.isWritable(tempDirectory) == false) {
+ if (!Files.isWritable(tempDirectory)) {
throw new IOException(
"Java's temporary folder not present or writeable?: " + tempDirectory.toAbsolutePath());
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index d067d65..c0f2299 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -43,7 +43,7 @@ final class Stemmer {
// used for normalization
private final StringBuilder scratchSegment = new StringBuilder();
- private char scratchBuffer[] = new char[32];
+ private char[] scratchBuffer = new char[32];
// it's '1' if we have no stem exceptions, otherwise every other form
// is really an ID pointing to the exception table
@@ -86,7 +86,7 @@ final class Stemmer {
* @param word Word to find the stems for
* @return List of stems for the word
*/
- public List<CharsRef> stem(char word[], int length) {
+ public List<CharsRef> stem(char[] word, int length) {
if (dictionary.needsInputCleaning) {
scratchSegment.setLength(0);
@@ -128,7 +128,7 @@ final class Stemmer {
private static final int UPPER_CASE = 2;
/** returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word */
- private int caseOf(char word[], int length) {
+ private int caseOf(char[] word, int length) {
if (dictionary.ignoreCase || length == 0 || !Character.isUpperCase(word[0])) {
return EXACT_CASE;
}
@@ -152,7 +152,7 @@ final class Stemmer {
}
/** folds titlecase variant of word to titleBuffer */
- private void caseFoldTitle(char word[], int length) {
+ private void caseFoldTitle(char[] word, int length) {
titleBuffer = ArrayUtil.grow(titleBuffer, length);
System.arraycopy(word, 0, titleBuffer, 0, length);
for (int i = 1; i < length; i++) {
@@ -161,13 +161,13 @@ final class Stemmer {
}
/** folds lowercase variant of word (title cased) to lowerBuffer */
- private void caseFoldLower(char word[], int length) {
+ private void caseFoldLower(char[] word, int length) {
lowerBuffer = ArrayUtil.grow(lowerBuffer, length);
System.arraycopy(word, 0, lowerBuffer, 0, length);
lowerBuffer[0] = dictionary.caseFold(lowerBuffer[0]);
}
- private List<CharsRef> doStem(char word[], int length, boolean caseVariant) {
+ private List<CharsRef> doStem(char[] word, int length, boolean caseVariant) {
List<CharsRef> stems = new ArrayList<>();
IntsRef forms = dictionary.lookupWord(word, 0, length);
if (forms != null) {
@@ -177,7 +177,7 @@ final class Stemmer {
boolean checkOnlyInCompound = dictionary.onlyincompound != -1;
if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) {
dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch);
- char wordFlags[] = Dictionary.decodeFlags(scratch);
+ char[] wordFlags = Dictionary.decodeFlags(scratch);
// we are looking for a case variant, but this word does not allow it
if (checkKeepCase && Dictionary.hasFlag(wordFlags, (char) dictionary.keepcase)) {
continue;
@@ -196,8 +196,7 @@ final class Stemmer {
}
}
try {
- boolean v =
- stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
+ stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
@@ -210,7 +209,7 @@ final class Stemmer {
* @param word Word to find the stems for
* @return List of stems for the word
*/
- public List<CharsRef> uniqueStems(char word[], int length) {
+ public List<CharsRef> uniqueStems(char[] word, int length) {
List<CharsRef> stems = stem(word, length);
if (stems.size() < 2) {
return stems;
@@ -226,7 +225,7 @@ final class Stemmer {
return deduped;
}
- private CharsRef newStem(char buffer[], int length, IntsRef forms, int formID) {
+ private CharsRef newStem(char[] buffer, int length, IntsRef forms, int formID) {
final String exception;
if (dictionary.hasStemExceptions) {
int exceptionID = forms.ints[forms.offset + formID + 1];
@@ -251,7 +250,7 @@ final class Stemmer {
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
- char cleaned[] = new char[scratchSegment.length()];
+ char[] cleaned = new char[scratchSegment.length()];
scratchSegment.getChars(0, cleaned.length, cleaned, 0);
return new CharsRef(cleaned, 0, cleaned.length);
} else {
@@ -264,15 +263,15 @@ final class Stemmer {
}
// some state for traversing FSTs
- final FST.BytesReader prefixReaders[] = new FST.BytesReader[3];
+ private final FST.BytesReader[] prefixReaders = new FST.BytesReader[3];
@SuppressWarnings({"unchecked", "rawtypes"})
- final FST.Arc<IntsRef> prefixArcs[] = new FST.Arc[3];
+ private final FST.Arc<IntsRef>[] prefixArcs = new FST.Arc[3];
- final FST.BytesReader suffixReaders[] = new FST.BytesReader[3];
+ private final FST.BytesReader[] suffixReaders = new FST.BytesReader[3];
@SuppressWarnings({"unchecked", "rawtypes"})
- final FST.Arc<IntsRef> suffixArcs[] = new FST.Arc[3];
+ private final FST.Arc<IntsRef>[] suffixArcs = new FST.Arc[3];
/**
* Generates a list of stems for the provided word
@@ -296,7 +295,7 @@ final class Stemmer {
* @return List of stems, or empty list if no stems are found
*/
private List<CharsRef> stem(
- char word[],
+ char[] word,
int length,
int previous,
int prevFlag,
@@ -330,12 +329,10 @@ final class Stemmer {
output = fst.outputs.add(output, arc.output());
}
}
- IntsRef prefixes = null;
if (!arc.isFinal()) {
continue;
- } else {
- prefixes = fst.outputs.add(output, arc.nextFinalOutput());
}
+ IntsRef prefixes = fst.outputs.add(output, arc.nextFinalOutput());
for (int j = 0; j < prefixes.length; j++) {
int prefix = prefixes.ints[prefixes.offset + j];
@@ -357,13 +354,13 @@ final class Stemmer {
} else {
// check if affix is allowed in a non-compound word
dictionary.flagLookup.get(append, scratch);
- char appendFlags[] = Dictionary.decodeFlags(scratch);
+ char[] appendFlags = Dictionary.decodeFlags(scratch);
compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
}
} else if (crossProduct) {
// cross check incoming continuation class (flag of previous affix) against list.
dictionary.flagLookup.get(append, scratch);
- char appendFlags[] = Dictionary.decodeFlags(scratch);
+ char[] appendFlags = Dictionary.decodeFlags(scratch);
assert prevFlag >= 0;
boolean allowed =
dictionary.onlyincompound == -1
@@ -374,8 +371,7 @@ final class Stemmer {
}
if (compatible) {
- int deAffixedStart = i;
- int deAffixedLength = length - deAffixedStart;
+ int deAffixedLength = length - i;
int stripStart = dictionary.stripOffsets[stripOrd];
int stripEnd = dictionary.stripOffsets[stripOrd + 1];
@@ -387,14 +383,14 @@ final class Stemmer {
stripStart,
stripLength,
word,
- deAffixedStart,
+ i,
deAffixedLength)) {
continue;
}
- char strippedWord[] = new char[stripLength + deAffixedLength];
+ char[] strippedWord = new char[stripLength + deAffixedLength];
System.arraycopy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
- System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
+ System.arraycopy(word, i, strippedWord, stripLength, deAffixedLength);
List<CharsRef> stemList =
applyAffix(
@@ -431,12 +427,10 @@ final class Stemmer {
output = fst.outputs.add(output, arc.output());
}
}
- IntsRef suffixes = null;
if (!arc.isFinal()) {
continue;
- } else {
- suffixes = fst.outputs.add(output, arc.nextFinalOutput());
}
+ IntsRef suffixes = fst.outputs.add(output, arc.nextFinalOutput());
for (int j = 0; j < suffixes.length; j++) {
int suffix = suffixes.ints[suffixes.offset + j];
@@ -458,13 +452,13 @@ final class Stemmer {
} else {
// check if affix is allowed in a non-compound word
dictionary.flagLookup.get(append, scratch);
- char appendFlags[] = Dictionary.decodeFlags(scratch);
+ char[] appendFlags = Dictionary.decodeFlags(scratch);
compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
}
} else if (crossProduct) {
// cross check incoming continuation class (flag of previous affix) against list.
dictionary.flagLookup.get(append, scratch);
- char appendFlags[] = Dictionary.decodeFlags(scratch);
+ char[] appendFlags = Dictionary.decodeFlags(scratch);
assert prevFlag >= 0;
boolean allowed =
dictionary.onlyincompound == -1
@@ -494,7 +488,7 @@ final class Stemmer {
continue;
}
- char strippedWord[] = new char[stripLength + deAffixedLength];
+ char[] strippedWord = new char[stripLength + deAffixedLength];
System.arraycopy(word, 0, strippedWord, 0, deAffixedLength);
System.arraycopy(
dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
@@ -524,7 +518,7 @@ final class Stemmer {
// just check the stem
// but this is a little bit more complicated.
private boolean checkCondition(
- int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
+ int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len) {
if (condition != 0) {
CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
int state = 0;
@@ -559,7 +553,7 @@ final class Stemmer {
* @return List of stems for the word, or an empty list if none are found
*/
List<CharsRef> applyAffix(
- char strippedWord[],
+ char[] strippedWord,
int length,
int affix,
int prefixFlag,
@@ -572,9 +566,7 @@ final class Stemmer {
affixReader.setPosition(8 * affix);
char flag = (char) (affixReader.readShort() & 0xffff);
affixReader.skipBytes(2); // strip
- int condition = (char) (affixReader.readShort() & 0xffff);
- boolean crossProduct = (condition & 1) == 1;
- condition >>>= 1;
+ boolean crossProduct = ((int) (char) (affixReader.readShort() & 0xffff) & 1) == 1;
char append = (char) (affixReader.readShort() & 0xffff);
List<CharsRef> stems = new ArrayList<>();
@@ -583,18 +575,18 @@ final class Stemmer {
if (forms != null) {
for (int i = 0; i < forms.length; i += formStep) {
dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch);
- char wordFlags[] = Dictionary.decodeFlags(scratch);
+ char[] wordFlags = Dictionary.decodeFlags(scratch);
if (Dictionary.hasFlag(wordFlags, flag)) {
// confusing: in this one exception, we already chained the first prefix against the
// second,
// so it doesnt need to be checked against the word
boolean chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
- if (chainedPrefix == false
+ if (!chainedPrefix
&& prefixFlag >= 0
&& !Dictionary.hasFlag(wordFlags, (char) prefixFlag)) {
// see if we can chain prefix thru the suffix continuation class (only if it has any!)
dictionary.flagLookup.get(append, scratch);
- char appendFlags[] = Dictionary.decodeFlags(scratch);
+ char[] appendFlags = Dictionary.decodeFlags(scratch);
if (!hasCrossCheckedFlag((char) prefixFlag, appendFlags, false)) {
continue;
}
@@ -604,7 +596,7 @@ final class Stemmer {
// to ensure it has it, and vice versa
if (dictionary.circumfix != -1) {
dictionary.flagLookup.get(append, scratch);
- char appendFlags[] = Dictionary.decodeFlags(scratch);
+ char[] appendFlags = Dictionary.decodeFlags(scratch);
boolean suffixCircumfix = Dictionary.hasFlag(appendFlags, (char) dictionary.circumfix);
if (circumfix != suffixCircumfix) {
continue;
@@ -631,7 +623,7 @@ final class Stemmer {
// have that flag
if (dictionary.circumfix != -1 && !circumfix && prefix) {
dictionary.flagLookup.get(append, scratch);
- char appendFlags[] = Dictionary.decodeFlags(scratch);
+ char[] appendFlags = Dictionary.decodeFlags(scratch);
circumfix = Dictionary.hasFlag(appendFlags, (char) dictionary.circumfix);
}
@@ -654,7 +646,7 @@ final class Stemmer {
true,
circumfix,
caseVariant));
- } else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
+ } else if (!dictionary.complexPrefixes && dictionary.twoStageAffix) {
// we took away a suffix.
// COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
// COMPLEXPREFIXES = false: combine with another suffix
@@ -688,9 +680,7 @@ final class Stemmer {
true,
circumfix,
caseVariant));
- } else if (prefix == false
- && dictionary.complexPrefixes == false
- && dictionary.twoStageAffix) {
+ } else if (!prefix && !dictionary.complexPrefixes && dictionary.twoStageAffix) {
// we took away a prefix, then a suffix: go look for another suffix
stems.addAll(
stem(
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
index 34852cf..d8bc47c 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
@@ -42,18 +42,19 @@ public class TestDictionary extends LuceneTestCase {
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
- assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}, 0, 1).length);
- assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}, 0, 1).length);
+ assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
+ assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
assertNotNull(ordList);
assertEquals(1, ordList.length);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
- char flags[] = Dictionary.decodeFlags(ref);
+ char[] flags = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
- ordList = dictionary.lookupWord(new char[] {'l', 'u', 'c', 'e', 'n'}, 0, 5);
+ int offset = random().nextInt(10);
+ ordList = dictionary.lookupWord((" ".repeat(offset) + "lucen").toCharArray(), offset, 5);
assertNotNull(ordList);
assertEquals(1, ordList.length);
dictionary.flagLookup.get(ordList.ints[0], ref);
@@ -71,12 +72,12 @@ public class TestDictionary extends LuceneTestCase {
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
- assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}, 0, 1).length);
- assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}, 0, 1).length);
+ assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
+ assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
- char flags[] = Dictionary.decodeFlags(ref);
+ char[] flags = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
@@ -90,12 +91,12 @@ public class TestDictionary extends LuceneTestCase {
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
- assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}, 0, 1).length);
- assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}, 0, 1).length);
+ assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
+ assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
- char flags[] = Dictionary.decodeFlags(ref);
+ char[] flags = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
@@ -109,12 +110,12 @@ public class TestDictionary extends LuceneTestCase {
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
- assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}, 0, 1).length);
- assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}, 0, 1).length);
+ assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
+ assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
- char flags[] = Dictionary.decodeFlags(ref);
+ char[] flags = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
@@ -131,9 +132,7 @@ public class TestDictionary extends LuceneTestCase {
ParseException expected =
expectThrows(
ParseException.class,
- () -> {
- new Dictionary(tempDir, "dictionary", affixStream, dictStream);
- });
+ () -> new Dictionary(tempDir, "dictionary", affixStream, dictStream));
assertTrue(
expected
.getMessage()
@@ -153,10 +152,7 @@ public class TestDictionary extends LuceneTestCase {
Exception expected =
expectThrows(
- Exception.class,
- () -> {
- new Dictionary(tempDir, "dictionary", affixStream, dictStream);
- });
+ Exception.class, () -> new Dictionary(tempDir, "dictionary", affixStream, dictStream));
assertTrue(expected.getMessage().startsWith("expected only one flag"));
affixStream.close();
@@ -272,7 +268,7 @@ public class TestDictionary extends LuceneTestCase {
Dictionary.getDictionaryEncoding(new ByteArrayInputStream(new byte[0])));
}
- public void testFlagWithCrazyWhitespace() throws Exception {
+ public void testFlagWithCrazyWhitespace() {
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8"));
}