You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2022/01/03 09:39:48 UTC
[lucene] branch branch_9x updated: LUCENE-10335: Deprecate helper methods for resource loading in IOUtils and StopwordAnalyzerBase that are not compatible with module system; add utility method IOUtils#requireResourceNonNull(T) and add ModuleResourceLoader as complement to ClasspathResourceLoader
This is an automated email from the ASF dual-hosted git repository.
uschindler pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 3ec3456 LUCENE-10335: Deprecate helper methods for resource loading in IOUtils and StopwordAnalyzerBase that are not compatible with module system; add utility method IOUtils#requireResourceNonNull(T) and add ModuleResourceLoader as complement to ClasspathResourceLoader
3ec3456 is described below
commit 3ec3456d2153f45f031bad49fc9fe5445c1f5af6
Author: Uwe Schindler <us...@apache.org>
AuthorDate: Mon Jan 3 10:38:19 2022 +0100
LUCENE-10335: Deprecate helper methods for resource loading in IOUtils and StopwordAnalyzerBase that are not compatible with module system; add utility method IOUtils#requireResourceNonNull(T) and add ModuleResourceLoader as complement to ClasspathResourceLoader
Co-authored-by: Tomoko Uchida <to...@gmail.com>
---
lucene/CHANGES.txt | 8 ++
.../apache/lucene/analysis/ar/ArabicAnalyzer.java | 9 +-
.../lucene/analysis/bg/BulgarianAnalyzer.java | 8 +-
.../apache/lucene/analysis/bn/BengaliAnalyzer.java | 8 +-
.../lucene/analysis/br/BrazilianAnalyzer.java | 6 +-
.../apache/lucene/analysis/ca/CatalanAnalyzer.java | 8 +-
.../apache/lucene/analysis/cjk/CJKAnalyzer.java | 9 +-
.../apache/lucene/analysis/ckb/SoraniAnalyzer.java | 6 +-
.../apache/lucene/analysis/cz/CzechAnalyzer.java | 6 +-
.../apache/lucene/analysis/da/DanishAnalyzer.java | 6 +-
.../apache/lucene/analysis/de/GermanAnalyzer.java | 6 +-
.../apache/lucene/analysis/el/GreekAnalyzer.java | 9 +-
.../apache/lucene/analysis/es/SpanishAnalyzer.java | 6 +-
.../lucene/analysis/et/EstonianAnalyzer.java | 8 +-
.../apache/lucene/analysis/eu/BasqueAnalyzer.java | 9 +-
.../apache/lucene/analysis/fa/PersianAnalyzer.java | 8 +-
.../apache/lucene/analysis/fi/FinnishAnalyzer.java | 6 +-
.../apache/lucene/analysis/fr/FrenchAnalyzer.java | 6 +-
.../apache/lucene/analysis/ga/IrishAnalyzer.java | 9 +-
.../lucene/analysis/gl/GalicianAnalyzer.java | 6 +-
.../apache/lucene/analysis/hi/HindiAnalyzer.java | 8 +-
.../lucene/analysis/hu/HungarianAnalyzer.java | 6 +-
.../lucene/analysis/hy/ArmenianAnalyzer.java | 8 +-
.../lucene/analysis/id/IndonesianAnalyzer.java | 8 +-
.../apache/lucene/analysis/it/ItalianAnalyzer.java | 6 +-
.../lucene/analysis/lt/LithuanianAnalyzer.java | 8 +-
.../apache/lucene/analysis/lv/LatvianAnalyzer.java | 6 +-
.../apache/lucene/analysis/ne/NepaliAnalyzer.java | 8 +-
.../apache/lucene/analysis/nl/DutchAnalyzer.java | 6 +-
.../lucene/analysis/no/NorwegianAnalyzer.java | 6 +-
.../lucene/analysis/pt/PortugueseAnalyzer.java | 6 +-
.../lucene/analysis/ro/RomanianAnalyzer.java | 9 +-
.../apache/lucene/analysis/ru/RussianAnalyzer.java | 6 +-
.../apache/lucene/analysis/sr/SerbianAnalyzer.java | 7 +-
.../apache/lucene/analysis/sv/SwedishAnalyzer.java | 6 +-
.../apache/lucene/analysis/ta/TamilAnalyzer.java | 8 +-
.../apache/lucene/analysis/te/TeluguAnalyzer.java | 8 +-
.../apache/lucene/analysis/th/ThaiAnalyzer.java | 8 +-
.../apache/lucene/analysis/tr/TurkishAnalyzer.java | 8 +-
.../analysis/util/FilesystemResourceLoader.java | 34 ++++--
.../analysis/util/StringMockResourceLoader.java | 10 --
.../lucene/analysis/ja/JapaneseAnalyzer.java | 17 ++-
.../lucene/analysis/ja/dict/BinaryDictionary.java | 11 +-
.../lucene/analysis/ja/dict/ConnectionCosts.java | 16 +--
.../analysis/ja/StringMockResourceLoader.java | 10 --
.../analysis/uk/UkrainianMorfologikAnalyzer.java | 15 +--
.../morfologik/TestMorfologikFilterFactory.java | 5 -
.../lucene/analysis/ko/dict/BinaryDictionary.java | 11 +-
.../lucene/analysis/ko/dict/ConnectionCosts.java | 17 +--
.../analysis/ko/StringMockResourceLoader.java | 10 --
.../analysis/cn/smart/SmartChineseAnalyzer.java | 6 +-
.../apache/lucene/analysis/pl/PolishAnalyzer.java | 6 +-
lucene/core.tests/src/test/module-info.java | 4 +
.../lucene/core/testresources/accessible.txt | 1 +
.../core/tests/TestModuleResourceLoader.java | 68 ++++++++++++
.../org/apache/lucene/core/tests/nonaccessible.txt | 1 +
.../lucene/analysis/StopwordAnalyzerBase.java | 25 +++--
.../org/apache/lucene/analysis/WordlistLoader.java | 114 +++++++++++++++++++--
.../lucene/util/ClasspathResourceLoader.java | 24 +++--
.../src/java/org/apache/lucene/util/IOUtils.java | 30 +++++-
.../apache/lucene/util/ModuleResourceLoader.java} | 47 +++++----
.../org/apache/lucene/util/ResourceLoader.java | 9 +-
.../lucene/expressions/js/JavascriptCompiler.java | 5 +-
.../lucene/search/suggest/TestLookupBenchmark.java | 6 +-
.../apache/lucene/tests/util/LuceneTestCase.java | 15 ++-
65 files changed, 556 insertions(+), 244 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f5aa920..1293cdc 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -18,6 +18,11 @@ API Changes
org.apache.lucene.* to org.apache.lucene.tests.* to avoid package name conflicts with the
core module. (Dawid Weiss)
+* LUCENE-10335: Deprecate helper methods for resource loading in IOUtils and StopwordAnalyzerBase
+ that are not compatible with module system (Class#getResourceAsStream() and Class#getResource()
+ are caller sensitive in Java 11). Instead add utility method IOUtils#requireResourceNonNull(T)
+ to test existence of resource based on null return value. (Uwe Schindler, Dawid Weiss)
+
New Features
---------------------
@@ -56,6 +61,9 @@ New Features
* LUCENE-10243: StandardTokenizer, UAX29URLEmailTokenizer, and HTMLStripCharFilter have
been upgraded to Unicode 12.1 (Robert Muir)
+* LUCENE-10335: Add ModuleResourceLoader as complement to ClasspathResourceLoader.
+ (Uwe Schindler)
+
Improvements
---------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
index 3f3dc0c..90be7a5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
@@ -26,9 +26,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Arabic.
@@ -74,7 +76,12 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
static {
try {
- DEFAULT_STOP_SET = loadStopwordSet(false, ArabicAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ DEFAULT_STOP_SET =
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ ArabicAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
index 4c240b4..ae62c99 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
@@ -26,8 +26,10 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Bulgarian.
@@ -66,7 +68,11 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, BulgarianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ BulgarianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliAnalyzer.java
index e637320..40ee363 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliAnalyzer.java
@@ -25,10 +25,12 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* Analyzer for Bengali.
@@ -67,7 +69,11 @@ public final class BengaliAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, BengaliAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ BengaliAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
throw new UncheckedIOException("Unable to load default stopword set", ex);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
index 082a658..75cd15b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.br;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -64,8 +63,9 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
- IOUtils.getDecodingReader(
- BrazilianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
+ IOUtils.requireResourceNonNull(
+ BrazilianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
"#");
} catch (IOException ex) {
// default set should always be present as it is part of the
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
index 53ab4af..1896f50 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
@@ -27,10 +27,12 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ElisionFilter;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.CatalanStemmer;
/**
@@ -67,7 +69,11 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, CatalanAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ CatalanAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
index c41b87a..6b523fd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@@ -25,7 +25,9 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* An {@link Analyzer} that tokenizes text with {@link StandardTokenizer}, normalizes content with
@@ -58,7 +60,12 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase {
static {
try {
- DEFAULT_STOP_SET = loadStopwordSet(false, CJKAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ DEFAULT_STOP_SET =
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ CJKAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
index bf52479..d99a1e1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ckb;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -64,8 +63,9 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
- IOUtils.getDecodingReader(
- SoraniAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SoraniAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
index 8496b3b..23213a9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.cz;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -60,8 +59,9 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_SET =
WordlistLoader.getWordSet(
- IOUtils.getDecodingReader(
- CzechAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
+ IOUtils.requireResourceNonNull(
+ CzechAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
"#");
} catch (IOException ex) {
// default set should always be present as it is part of the
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
index 45b4844..71cd075 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.da;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -65,8 +64,9 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
index 9e2829a..d24de0d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.de;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -75,8 +74,9 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
index 3ec5b59..ec9ed61 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@@ -25,8 +25,10 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for the Greek language.
@@ -57,7 +59,12 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
static {
try {
- DEFAULT_SET = loadStopwordSet(false, GreekAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ DEFAULT_SET =
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ GreekAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
index cc60256..9b2e7e2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.es;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -64,8 +63,9 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/EstonianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/EstonianAnalyzer.java
index 81a56ac..e8c3cb0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/EstonianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/EstonianAnalyzer.java
@@ -26,9 +26,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.EstonianStemmer;
/** {@link Analyzer} for Estonian. */
@@ -57,7 +59,11 @@ public final class EstonianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, EstonianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ EstonianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
index f28b846..37d8999 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
@@ -26,9 +26,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.BasqueStemmer;
/**
@@ -60,7 +62,12 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
static {
try {
- DEFAULT_STOP_SET = loadStopwordSet(false, BasqueAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ DEFAULT_STOP_SET =
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ BasqueAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index c59d806..f0202db 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -26,9 +26,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Persian.
@@ -71,7 +73,11 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, PersianAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ PersianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
index 8119560..e9cfe4d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.fi;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -65,8 +64,9 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
index 29a4b6f..ee13b8f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.fr;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
@@ -81,8 +80,9 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
index 41ebd3b..3b031e7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
@@ -26,10 +26,12 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ElisionFilter;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.IrishStemmer;
/**
@@ -72,7 +74,12 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
static {
try {
- DEFAULT_STOP_SET = loadStopwordSet(false, IrishAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ DEFAULT_STOP_SET =
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ IrishAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
index 5700dff..e601e94 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.gl;
import java.io.IOException;
import java.io.Reader;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -62,8 +61,9 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
- IOUtils.getDecodingReader(
- GalicianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ GalicianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
index 591ae1b..cc18a43 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
@@ -25,10 +25,12 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* Analyzer for Hindi.
@@ -67,7 +69,11 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, HindiAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ HindiAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
index 01ff42e..fd98d40 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.hu;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -65,8 +64,9 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
index ec76e5e..3d6a34b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
@@ -26,9 +26,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.ArmenianStemmer;
/**
@@ -61,7 +63,11 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, ArmenianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ ArmenianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
index 4ae0c48..343b5fb 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
@@ -25,8 +25,10 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* Analyzer for Indonesian (Bahasa)
@@ -56,7 +58,11 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, IndonesianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ IndonesianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
index bd0e42b..525f723 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.it;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
@@ -74,8 +73,9 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
index 6b19640..e8519e0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
@@ -26,9 +26,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.LithuanianStemmer;
/**
@@ -61,7 +63,11 @@ public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, LithuanianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ LithuanianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
index ee54510..7efb67b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.lv;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -63,8 +62,9 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
- IOUtils.getDecodingReader(
- LatvianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ LatvianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ne/NepaliAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ne/NepaliAnalyzer.java
index a08e3aa..03b69df 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ne/NepaliAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ne/NepaliAnalyzer.java
@@ -25,11 +25,13 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.NepaliStemmer;
/**
@@ -69,7 +71,11 @@ public final class NepaliAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, NepaliAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ NepaliAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index a002429..89c67f9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.nl;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArrayMap;
import org.apache.lucene.analysis.CharArraySet;
@@ -69,8 +68,9 @@ public final class DutchAnalyzer extends Analyzer {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
index a949bae..a14f4e6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.no;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -65,8 +64,9 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
index a68f893..2c079fe 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.pt;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -64,8 +63,9 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
index bce13b9..12f7465 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@@ -26,9 +26,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.RomanianStemmer;
/**
@@ -63,8 +65,11 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(
- false, RomanianAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ RomanianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
index 475556f..9ef407d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ru;
import java.io.IOException;
import java.io.Reader;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -52,8 +51,9 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/SerbianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/SerbianAnalyzer.java
index ed089c1..d320eb1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/SerbianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/SerbianAnalyzer.java
@@ -23,6 +23,7 @@ import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.SerbianStemmer;
/**
@@ -58,7 +59,11 @@ public class SerbianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, SerbianAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ SerbianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
index ede41fc..b663163 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.sv;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -65,8 +64,9 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(
- SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ IOUtils.requireResourceNonNull(
+ SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ta/TamilAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ta/TamilAnalyzer.java
index 23c6e2f..3f17273 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ta/TamilAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ta/TamilAnalyzer.java
@@ -25,11 +25,13 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.TamilStemmer;
/**
@@ -68,7 +70,11 @@ public final class TamilAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, TamilAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ TamilAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java
index f0e9b4e..c7d39a7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java
@@ -24,10 +24,12 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
/**
* Analyzer for Telugu.
@@ -61,7 +63,11 @@ public final class TeluguAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, TeluguAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ TeluguAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
throw new UncheckedIOException("Unable to load default stopword set", ex);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
index ead83c3..c5b2b11 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@@ -26,7 +26,9 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
+import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
@@ -59,7 +61,11 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, ThaiAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ ThaiAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
index 84c417b..c676445 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@@ -25,9 +25,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.TurkishStemmer;
/**
@@ -62,7 +64,11 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(false, TurkishAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ TurkishAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
+ STOPWORDS_COMMENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilesystemResourceLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilesystemResourceLoader.java
index 6024f6d..7b39e79 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilesystemResourceLoader.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilesystemResourceLoader.java
@@ -22,7 +22,9 @@ import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
+import java.util.Objects;
import org.apache.lucene.util.ClasspathResourceLoader;
+import org.apache.lucene.util.ModuleResourceLoader;
import org.apache.lucene.util.ResourceLoader;
/**
@@ -41,24 +43,36 @@ public final class FilesystemResourceLoader implements ResourceLoader {
private final ResourceLoader delegate;
/**
- * Creates a resource loader that resolves resources against the given base directory (may be
- * {@code null} to refer to CWD). Files not found in file system and class lookups are delegated
- * to context classloader.
+ * Creates a resource loader that resolves resources against the given base directory. Files not
+ * found in file system and class lookups are delegated to {@link ClassLoader}.
+ *
+ * <p>To use this constructor with the Java Module System, you must open all modules that contain
+ * resources to the {@code org.apache.lucene.core} module, otherwise resources can't be looked up.
+ * It is recommended to use {@link #FilesystemResourceLoader(Path, Module)} for such use cases as
+ * this would limit to certain modules.
*/
public FilesystemResourceLoader(Path baseDirectory, ClassLoader delegate) {
this(baseDirectory, new ClasspathResourceLoader(delegate));
}
/**
- * Creates a resource loader that resolves resources against the given base directory (may be
- * {@code null} to refer to CWD). Files not found in file system and class lookups are delegated
- * to the given delegate {@link ResourceLoader}.
+ * Creates a resource loader that resolves resources against the given base directory. Files not
+ * found in file system and class lookups are delegated to {@link ModuleResourceLoader}.
+ *
+ * <p>To use this constructor, you must open the module to the {@code org.apache.lucene.core}
+ * module, otherwise resources can't be looked up.
+ */
+ public FilesystemResourceLoader(Path baseDirectory, Module delegate) {
+ this(baseDirectory, new ModuleResourceLoader(delegate));
+ }
+
+ /**
+ * Creates a resource loader that resolves resources against the given base directory. Files not
+ * found in file system and class lookups are delegated to the given delegate {@link
+ * ResourceLoader}.
*/
public FilesystemResourceLoader(Path baseDirectory, ResourceLoader delegate) {
- if (baseDirectory == null) {
- throw new NullPointerException();
- }
- if (!Files.isDirectory(baseDirectory))
+ if (!Files.isDirectory(Objects.requireNonNull(baseDirectory)))
throw new IllegalArgumentException(baseDirectory + " is not a directory");
if (delegate == null)
throw new IllegalArgumentException("delegate ResourceLoader may not be null");
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
index 00f8c66..87764d6 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
@@ -40,16 +40,6 @@ public class StringMockResourceLoader implements ResourceLoader {
}
@Override
- public <T> T newInstance(String cname, Class<T> expectedType) {
- Class<? extends T> clazz = findClass(cname, expectedType);
- try {
- return clazz.getConstructor().newInstance();
- } catch (Exception e) {
- throw new RuntimeException("Cannot create instance: " + cname, e);
- }
- }
-
- @Override
public InputStream openResource(String resource) throws IOException {
return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
index 0ad2b6e..8c79a2d 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ja;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.CharArraySet;
@@ -27,9 +28,11 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.cjk.CJKWidthCharFilter;
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
import org.apache.lucene.analysis.ja.dict.UserDictionary;
+import org.apache.lucene.util.IOUtils;
/**
* Analyzer for Japanese that uses morphological analysis.
@@ -77,9 +80,19 @@ public class JapaneseAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET =
- loadStopwordSet(true, JapaneseAnalyzer.class, "stopwords.txt", "#"); // ignore case
+ WordlistLoader.getWordSet(
+ IOUtils.getDecodingReader(
+ IOUtils.requireResourceNonNull(
+ JapaneseAnalyzer.class.getResourceAsStream("stopwords.txt"),
+ "stopwords.txt"),
+ StandardCharsets.UTF_8),
+ "#",
+ new CharArraySet(16, true)); // ignore case
final CharArraySet tagset =
- loadStopwordSet(false, JapaneseAnalyzer.class, "stoptags.txt", "#");
+ WordlistLoader.getWordSet(
+ IOUtils.requireResourceNonNull(
+ JapaneseAnalyzer.class.getResourceAsStream("stoptags.txt"), "stoptags.txt"),
+ "#");
DEFAULT_STOP_TAGS = new HashSet<>();
for (Object element : tagset) {
char[] chars = (char[]) element;
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
index ccc4be3..993ec43 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
@@ -75,8 +75,11 @@ public abstract class BinaryDictionary implements Dictionary {
throw new IllegalArgumentException(
"resourcePath must be supplied with FILE resource scheme");
}
- this.resourcePath = getClass().getName().replace('.', '/');
+ this.resourcePath = getClass().getSimpleName();
} else {
+ if (resourceScheme == ResourceScheme.CLASSPATH && !resourcePath.startsWith("/")) {
+ resourcePath = "/".concat(resourcePath);
+ }
this.resourcePath = resourcePath;
}
InputStream mapIS = null, dictIS = null, posIS = null;
@@ -204,11 +207,7 @@ public abstract class BinaryDictionary implements Dictionary {
}
private static InputStream getClassResource(String path) throws IOException {
- final InputStream is = BinaryDictionary.class.getClassLoader().getResourceAsStream(path);
- if (is == null) {
- throw new FileNotFoundException("Not in classpath: " + path);
- }
- return is;
+ return IOUtils.requireResourceNonNull(BinaryDictionary.class.getResourceAsStream(path), path);
}
public void lookupWordIds(int sourceId, IntsRef ref) {
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
index fc49254..8a4868b 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
@@ -23,7 +23,6 @@ import java.nio.ByteBuffer;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.InputStreamDataInput;
-import org.apache.lucene.util.IOUtils;
/** n-gram connection cost data */
public final class ConnectionCosts {
@@ -40,11 +39,9 @@ public final class ConnectionCosts {
* @param path - where to load resources from, without the ".dat" suffix
*/
public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String path) throws IOException {
- InputStream is = null;
- boolean success = false;
- try {
- is = BinaryDictionary.getResource(scheme, path.replace('.', '/') + FILENAME_SUFFIX);
- is = new BufferedInputStream(is);
+ try (InputStream is =
+ new BufferedInputStream(
+ BinaryDictionary.getResource(scheme, "/" + path.replace('.', '/') + FILENAME_SUFFIX))) {
final DataInput in = new InputStreamDataInput(is);
CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
forwardSize = in.readVInt();
@@ -61,13 +58,6 @@ public final class ConnectionCosts {
}
}
buffer = tmpBuffer.asReadOnlyBuffer();
- success = true;
- } finally {
- if (success) {
- IOUtils.close(is);
- } else {
- IOUtils.closeWhileHandlingException(is);
- }
}
}
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
index e6d9f3f..d38acab 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
@@ -40,16 +40,6 @@ class StringMockResourceLoader implements ResourceLoader {
}
@Override
- public <T> T newInstance(String cname, Class<T> expectedType) {
- Class<? extends T> clazz = findClass(cname, expectedType);
- try {
- return clazz.getConstructor().newInstance();
- } catch (Exception e) {
- throw new RuntimeException("Cannot create instance: " + cname, e);
- }
- }
-
- @Override
public InputStream openResource(String resource) throws IOException {
return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
}
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
index b80ccb6..eb29448 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
@@ -19,8 +19,6 @@ package org.apache.lucene.analysis.uk;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
-import java.util.Objects;
import morfologik.stemming.Dictionary;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
@@ -77,9 +75,7 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
if (is == null) {
throw new IOException("Could not locate the required stopwords resource.");
}
- wordList =
- WordlistLoader.getSnowballWordSet(
- IOUtils.getDecodingReader(is, StandardCharsets.UTF_8));
+ wordList = WordlistLoader.getSnowballWordSet(is);
}
// First, try to look up the resource module by name.
@@ -100,13 +96,12 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
dictionary = Dictionary.read(fsaStream, metaStream);
}
} else {
+ var name = "ua/net/nlp/ukrainian.dict";
dictionary =
Dictionary.read(
- Objects.requireNonNull(
- UkrainianMorfologikAnalyzer.class
- .getClassLoader()
- .getResource("ua/net/nlp/ukrainian.dict"),
- "Could not locate the required Ukrainian dictionary resource."));
+ IOUtils.requireResourceNonNull(
+ UkrainianMorfologikAnalyzer.class.getClassLoader().getResource(name),
+ name));
}
defaultResources = new DefaultResources(wordList, dictionary);
} catch (IOException e) {
diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
index 19ad5a4..9f6a66d 100644
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
@@ -39,11 +39,6 @@ public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
throw new UnsupportedOperationException();
}
-
- @Override
- public <T> T newInstance(String cname, Class<T> expectedType) {
- throw new UnsupportedOperationException();
- }
}
public void testDefaultDictionary() throws Exception {
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
index 000786f..f002fc3 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
@@ -74,8 +74,11 @@ public abstract class BinaryDictionary implements Dictionary {
throw new IllegalArgumentException(
"resourcePath must be supplied with FILE resource scheme");
}
- this.resourcePath = getClass().getName().replace('.', '/');
+ this.resourcePath = getClass().getSimpleName();
} else {
+ if (resourceScheme == ResourceScheme.CLASSPATH && !resourcePath.startsWith("/")) {
+ resourcePath = "/".concat(resourcePath);
+ }
this.resourcePath = resourcePath;
}
InputStream mapIS = null, dictIS = null, posIS = null;
@@ -178,11 +181,7 @@ public abstract class BinaryDictionary implements Dictionary {
}
private static InputStream getClassResource(String path) throws IOException {
- final InputStream is = BinaryDictionary.class.getClassLoader().getResourceAsStream(path);
- if (is == null) {
- throw new FileNotFoundException("Not in classpath: " + path);
- }
- return is;
+ return IOUtils.requireResourceNonNull(BinaryDictionary.class.getResourceAsStream(path), path);
}
public void lookupWordIds(int sourceId, IntsRef ref) {
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
index b0d9da5..896c379 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
@@ -23,7 +23,6 @@ import java.nio.ByteBuffer;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.InputStreamDataInput;
-import org.apache.lucene.util.IOUtils;
/** n-gram connection cost data */
public final class ConnectionCosts {
@@ -41,11 +40,10 @@ public final class ConnectionCosts {
*/
public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String resourcePath)
throws IOException {
- InputStream is = null;
- boolean success = false;
- try {
- is = BinaryDictionary.getResource(scheme, resourcePath.replace('.', '/') + FILENAME_SUFFIX);
- is = new BufferedInputStream(is);
+ try (InputStream is =
+ new BufferedInputStream(
+ BinaryDictionary.getResource(
+ scheme, "/" + resourcePath.replace('.', '/') + FILENAME_SUFFIX))) {
final DataInput in = new InputStreamDataInput(is);
CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
this.forwardSize = in.readVInt();
@@ -62,13 +60,6 @@ public final class ConnectionCosts {
}
}
buffer = tmpBuffer.asReadOnlyBuffer();
- success = true;
- } finally {
- if (success) {
- IOUtils.close(is);
- } else {
- IOUtils.closeWhileHandlingException(is);
- }
}
}
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java
index 5a9d9dd..e29bfbe 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java
@@ -40,16 +40,6 @@ class StringMockResourceLoader implements ResourceLoader {
}
@Override
- public <T> T newInstance(String cname, Class<T> expectedType) {
- Class<? extends T> clazz = findClass(cname, expectedType);
- try {
- return clazz.getConstructor().newInstance();
- } catch (Exception e) {
- throw new RuntimeException("Cannot create instance: " + cname, e);
- }
- }
-
- @Override
public InputStream openResource(String resource) throws IOException {
return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
}
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
index f2e17dd..a1f451e 100644
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cn.smart;
import java.io.IOException;
import java.io.UncheckedIOException;
-import java.nio.charset.StandardCharsets;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
@@ -86,8 +85,9 @@ public final class SmartChineseAnalyzer extends Analyzer {
// make sure it is unmodifiable as we expose it in the outer class
return CharArraySet.unmodifiableSet(
WordlistLoader.getWordSet(
- IOUtils.getDecodingReader(
- SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
+ IOUtils.requireResourceNonNull(
+ SmartChineseAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
STOPWORD_FILE_COMMENT));
}
}
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
index 89d5a42..69cf463 100644
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.pl;
import java.io.IOException;
import java.io.Reader;
-import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@@ -75,8 +74,9 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
try {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
- IOUtils.getDecodingReader(
- PolishAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
+ IOUtils.requireResourceNonNull(
+ PolishAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
+ DEFAULT_STOPWORD_FILE),
"#");
} catch (IOException ex) {
// default set should always be present as it is part of the
diff --git a/lucene/core.tests/src/test/module-info.java b/lucene/core.tests/src/test/module-info.java
index 9b3cf5a..21fb271 100644
--- a/lucene/core.tests/src/test/module-info.java
+++ b/lucene/core.tests/src/test/module-info.java
@@ -22,4 +22,8 @@ module org.apache.lucene.core.tests {
requires junit;
exports org.apache.lucene.core.tests;
+
+ // this is to test ModuleResourceLoader
+ opens org.apache.lucene.core.testresources to
+ org.apache.lucene.core;
}
diff --git a/lucene/core.tests/src/test/org/apache/lucene/core/testresources/accessible.txt b/lucene/core.tests/src/test/org/apache/lucene/core/testresources/accessible.txt
new file mode 100644
index 0000000..a2a6c1a
--- /dev/null
+++ b/lucene/core.tests/src/test/org/apache/lucene/core/testresources/accessible.txt
@@ -0,0 +1 @@
+This file should be accessible by ModuleResourceLoader.
diff --git a/lucene/core.tests/src/test/org/apache/lucene/core/tests/TestModuleResourceLoader.java b/lucene/core.tests/src/test/org/apache/lucene/core/tests/TestModuleResourceLoader.java
new file mode 100644
index 0000000..ac111e6
--- /dev/null
+++ b/lucene/core.tests/src/test/org/apache/lucene/core/tests/TestModuleResourceLoader.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.core.tests;
+
+import java.io.IOException;
+import org.apache.lucene.util.ModuleResourceLoader;
+import org.apache.lucene.util.ResourceLoader;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestModuleResourceLoader extends Assert {
+ private static final Module MODULE = TestModuleResourceLoader.class.getModule();
+
+ private final ResourceLoader loader = new ModuleResourceLoader(MODULE);
+
+ @BeforeClass
+ public static void beforeClass() {
+ assertTrue("Test class must be in a named module", MODULE.isNamed());
+ }
+
+ @Test
+ public void testModuleResources() throws Exception {
+ try (var stream = loader.openResource("org/apache/lucene/core/testresources/accessible.txt")) {
+ stream.available();
+ }
+
+ assertNotNull(
+ "resource should exist when loaded by test from classloader",
+ getClass().getResource("/org/apache/lucene/core/testresources/accessible.txt"));
+
+ try (var stream = loader.openResource("org/apache/lucene/core/tests/nonaccessible.txt")) {
+ stream.available();
+ fail("Should throw exception");
+ } catch (IOException e) {
+ assertTrue(e.getMessage().startsWith("Resource not found:"));
+ }
+ }
+
+ @Test
+ public void testModuleClassloading() throws Exception {
+ assertSame(
+ TestModuleResourceLoader.class,
+ loader.findClass(TestModuleResourceLoader.class.getName(), Object.class));
+
+ var cname = "org.foobar.Something";
+ try {
+ loader.findClass(cname, Object.class);
+ fail("Should throw exception");
+ } catch (RuntimeException e) {
+ assertEquals("Cannot load class: " + cname, e.getMessage());
+ }
+ }
+}
diff --git a/lucene/core.tests/src/test/org/apache/lucene/core/tests/nonaccessible.txt b/lucene/core.tests/src/test/org/apache/lucene/core/tests/nonaccessible.txt
new file mode 100644
index 0000000..046f4bd
--- /dev/null
+++ b/lucene/core.tests/src/test/org/apache/lucene/core/tests/nonaccessible.txt
@@ -0,0 +1 @@
+This file should not be accessible by ModuleResourceLoader.
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java b/lucene/core/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
index 77675fd..cbf7c56 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
@@ -71,20 +71,27 @@ public abstract class StopwordAnalyzerBase extends Analyzer {
* @param comment comment string to ignore in the stopword file
* @return a CharArraySet containing the distinct stopwords from the given file
* @throws IOException if loading the stopwords throws an {@link IOException}
+ * @deprecated {@link Class#getResourceAsStream(String)} is caller sensitive and cannot load
+ * resources across Java Modules. Please call the {@code getResourceAsStream()} and {@link
+ * WordlistLoader#getWordSet(Reader, String, CharArraySet)} or other methods directly.
*/
+ @Deprecated(forRemoval = true, since = "9.1")
protected static CharArraySet loadStopwordSet(
final boolean ignoreCase,
final Class<? extends Analyzer> aClass,
final String resource,
final String comment)
throws IOException {
- Reader reader = null;
- try {
- reader =
- IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
+ var argModule = aClass.getModule();
+ if (argModule.isNamed() && argModule != StopwordAnalyzerBase.class.getModule()) {
+ throw new UnsupportedOperationException(
+ "loadStopwordSet(class,...) does not work when Java Module System is enabled.");
+ }
+ try (Reader reader =
+ IOUtils.getDecodingReader(
+ IOUtils.requireResourceNonNull(aClass.getResourceAsStream(resource), resource),
+ StandardCharsets.UTF_8)) {
return WordlistLoader.getWordSet(reader, comment, new CharArraySet(16, ignoreCase));
- } finally {
- IOUtils.close(reader);
}
}
@@ -96,12 +103,8 @@ public abstract class StopwordAnalyzerBase extends Analyzer {
* @throws IOException if loading the stopwords throws an {@link IOException}
*/
protected static CharArraySet loadStopwordSet(Path stopwords) throws IOException {
- Reader reader = null;
- try {
- reader = Files.newBufferedReader(stopwords, StandardCharsets.UTF_8);
+ try (Reader reader = Files.newBufferedReader(stopwords, StandardCharsets.UTF_8)) {
return WordlistLoader.getWordSet(reader);
- } finally {
- IOUtils.close(reader);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/WordlistLoader.java b/lucene/core/src/java/org/apache/lucene/analysis/WordlistLoader.java
index 1c67cc9..7437fc1 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/WordlistLoader.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/WordlistLoader.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.IOUtils;
@@ -76,17 +77,30 @@ public class WordlistLoader {
}
/**
- * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet
- * (omitting leading and trailing whitespace). Every line of the Reader should contain only one
- * word. The words need to be in lowercase if you make use of an Analyzer which uses
- * LowerCaseFilter (like StandardAnalyzer).
+ * Reads lines from an InputStream with UTF-8 charset and adds every line as an entry to a
+ * CharArraySet (omitting leading and trailing whitespace). Every line of the Reader should
+ * contain only one word. The words need to be in lowercase if you make use of an Analyzer which
+ * uses LowerCaseFilter (like StandardAnalyzer).
*
- * @param reader Reader containing the wordlist
- * @param comment The string representing a comment.
- * @return A CharArraySet with the reader's words
+ * @param stream InputStream containing the wordlist
+ * @return A {@link CharArraySet} with the reader's words
*/
- public static CharArraySet getWordSet(Reader reader, String comment) throws IOException {
- return getWordSet(reader, comment, new CharArraySet(INITIAL_CAPACITY, false));
+ public static CharArraySet getWordSet(InputStream stream) throws IOException {
+ return getWordSet(stream, StandardCharsets.UTF_8);
+ }
+
+ /**
+ * Reads lines from an InputStream with the given charset and adds every line as an entry to a
+ * CharArraySet (omitting leading and trailing whitespace). Every line of the Reader should
+ * contain only one word. The words need to be in lowercase if you make use of an Analyzer which
+ * uses LowerCaseFilter (like StandardAnalyzer).
+ *
+ * @param stream InputStream containing the wordlist
+ * @param charset Charset of the wordlist
+ * @return A {@link CharArraySet} with the reader's words
+ */
+ public static CharArraySet getWordSet(InputStream stream, Charset charset) throws IOException {
+ return getWordSet(IOUtils.getDecodingReader(stream, charset));
}
/**
@@ -118,6 +132,50 @@ public class WordlistLoader {
}
/**
+ * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet
+ * (omitting leading and trailing whitespace). Every line of the Reader should contain only one
+ * word. The words need to be in lowercase if you make use of an Analyzer which uses
+ * LowerCaseFilter (like StandardAnalyzer).
+ *
+ * @param reader Reader containing the wordlist
+ * @param comment The string representing a comment.
+ * @return A CharArraySet with the reader's words
+ */
+ public static CharArraySet getWordSet(Reader reader, String comment) throws IOException {
+ return getWordSet(reader, comment, new CharArraySet(INITIAL_CAPACITY, false));
+ }
+
+ /**
+ * Reads lines from an InputStream with UTF-8 charset and adds every non-comment line as an entry
+ * to a CharArraySet (omitting leading and trailing whitespace). Every line of the Reader should
+ * contain only one word. The words need to be in lowercase if you make use of an Analyzer which
+ * uses LowerCaseFilter (like StandardAnalyzer).
+ *
+ * @param stream InputStream in UTF-8 encoding containing the wordlist
+ * @param comment The string representing a comment.
+ * @return A CharArraySet with the reader's words
+ */
+ public static CharArraySet getWordSet(InputStream stream, String comment) throws IOException {
+ return getWordSet(stream, StandardCharsets.UTF_8, comment);
+ }
+
+ /**
+ * Reads lines from an InputStream with the given charset and adds every non-comment line as an
+ * entry to a CharArraySet (omitting leading and trailing whitespace). Every line of the Reader
+ * should contain only one word. The words need to be in lowercase if you make use of an Analyzer
+ * which uses LowerCaseFilter (like StandardAnalyzer).
+ *
+ * @param stream InputStream containing the wordlist
+ * @param charset Charset of the wordlist
+ * @param comment The string representing a comment.
+ * @return A CharArraySet with the reader's words
+ */
+ public static CharArraySet getWordSet(InputStream stream, Charset charset, String comment)
+ throws IOException {
+ return getWordSet(IOUtils.getDecodingReader(stream, charset), comment);
+ }
+
+ /**
* Reads stopwords from a stopword list in Snowball format.
*
* <p>The snowball format is the following:
@@ -171,6 +229,44 @@ public class WordlistLoader {
}
/**
+ * Reads stopwords from a stopword list in Snowball format.
+ *
+ * <p>The snowball format is the following:
+ *
+ * <ul>
+ * <li>Lines may contain multiple words separated by whitespace.
+ * <li>The comment character is the vertical line (|).
+ * <li>Lines may contain trailing comments.
+ * </ul>
+ *
+ * @param stream InputStream in UTF-8 encoding containing a Snowball stopword list
+ * @return A {@link CharArraySet} with the reader's words
+ */
+ public static CharArraySet getSnowballWordSet(InputStream stream) throws IOException {
+ return getSnowballWordSet(stream, StandardCharsets.UTF_8);
+ }
+
+ /**
+ * Reads stopwords from a stopword list in Snowball format.
+ *
+ * <p>The snowball format is the following:
+ *
+ * <ul>
+ * <li>Lines may contain multiple words separated by whitespace.
+ * <li>The comment character is the vertical line (|).
+ * <li>Lines may contain trailing comments.
+ * </ul>
+ *
+ * @param stream InputStream containing a Snowball stopword list
+ * @param charset Charset of the stopword list
+ * @return A {@link CharArraySet} with the reader's words
+ */
+ public static CharArraySet getSnowballWordSet(InputStream stream, Charset charset)
+ throws IOException {
+ return getSnowballWordSet(IOUtils.getDecodingReader(stream, charset));
+ }
+
+ /**
* Reads a stem dictionary. Each line contains:
*
* <pre>word<b>\t</b>stem</pre>
diff --git a/lucene/core/src/java/org/apache/lucene/util/ClasspathResourceLoader.java b/lucene/core/src/java/org/apache/lucene/util/ClasspathResourceLoader.java
index 7bf607f..5dc4b88 100644
--- a/lucene/core/src/java/org/apache/lucene/util/ClasspathResourceLoader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ClasspathResourceLoader.java
@@ -22,6 +22,13 @@ import java.io.InputStream;
/**
* Simple {@link ResourceLoader} that uses {@link ClassLoader#getResourceAsStream(String)} and
* {@link Class#forName(String,boolean,ClassLoader)} to open resources and classes, respectively.
+ *
+ * <p>To use this class with the Java Module System, you must open all modules on classpath that
+ * contain resources to the {@code org.apache.lucene.core} module, otherwise resources can't be
+ * looked up. It is recommended to use {@link ModuleResourceLoader} for such use cases as this would
+ * limit to certain modules.
+ *
+ * @see ModuleResourceLoader
*/
public final class ClasspathResourceLoader implements ResourceLoader {
private final Class<?> clazz;
@@ -54,7 +61,12 @@ public final class ClasspathResourceLoader implements ResourceLoader {
(clazz != null)
? clazz.getResourceAsStream(resource)
: loader.getResourceAsStream(resource);
- if (stream == null) throw new IOException("Resource not found: " + resource);
+ if (stream == null) {
+ throw new IOException(
+ "Resource not found (if you use Java Module System, make sure to open "
+ + "module and package containing resources to 'org.apache.lucene.core' module): "
+ + resource);
+ }
return stream;
}
@@ -66,14 +78,4 @@ public final class ClasspathResourceLoader implements ResourceLoader {
throw new RuntimeException("Cannot load class: " + cname, e);
}
}
-
- @Override
- public <T> T newInstance(String cname, Class<T> expectedType) {
- Class<? extends T> clazz = findClass(cname, expectedType);
- try {
- return clazz.getConstructor().newInstance();
- } catch (Exception e) {
- throw new RuntimeException("Cannot create instance: " + cname, e);
- }
- }
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java
index fc675d4..5acbff1 100644
--- a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
import java.io.BufferedReader;
import java.io.Closeable;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
@@ -157,6 +158,23 @@ public final class IOUtils {
}
/**
+ * Wrap all calls to {@link Class#getResource(String)} or {@link
+ * Class#getResourceAsStream(String)} using this method to enforce existence of the resource. This
+ * code works around those methods returning {@code null} to signal non-existence.
+ *
+ * @param resource return value of above methods
+ * @param name of resource
+ * @return the resource passed in if existent
+ * @throws FileNotFoundException if resource was not found
+ */
+ public static <T> T requireResourceNonNull(T resource, String name) throws IOException {
+ if (resource == null) {
+ throw new FileNotFoundException("The resource '" + name + "' was not found.");
+ }
+ return resource;
+ }
+
+ /**
* Opens a Reader for the given resource using a {@link CharsetDecoder}. Unlike Java's defaults
* this reader will throw an exception if your it detects the read charset doesn't match the
* expected {@link Charset}.
@@ -168,13 +186,21 @@ public final class IOUtils {
* @param resource the resource name to load
* @param charSet the expected charset
* @return a reader to read the given file
+ * @deprecated {@link Class#getResourceAsStream(String)} is caller sensitive and cannot load
+ * resources across Java Modules. Please call the {@code getResourceAsStream()} directly and
+ * use {@link #requireResourceNonNull(Object,String)} to signal missing resources {@code null}
*/
+ @Deprecated(forRemoval = true, since = "9.1")
public static Reader getDecodingReader(Class<?> clazz, String resource, Charset charSet)
throws IOException {
- InputStream stream = null;
+ var argModule = clazz.getModule();
+ if (argModule.isNamed() && argModule != IOUtils.class.getModule()) {
+ throw new UnsupportedOperationException(
+ "getDecodingReader(class,...) does not work when Java Module System is enabled.");
+ }
+ InputStream stream = requireResourceNonNull(clazz.getResourceAsStream(resource), resource);
boolean success = false;
try {
- stream = clazz.getResourceAsStream(resource);
final Reader reader = getDecodingReader(stream, charSet);
success = true;
return reader;
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java b/lucene/core/src/java/org/apache/lucene/util/ModuleResourceLoader.java
similarity index 53%
copy from lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
copy to lucene/core/src/java/org/apache/lucene/util/ModuleResourceLoader.java
index e6d9f3f..4798dc8 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ModuleResourceLoader.java
@@ -14,43 +14,42 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.analysis.ja;
+package org.apache.lucene.util;
-import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import org.apache.lucene.util.ResourceLoader;
-/** Fake resource loader for tests: works if you want to fake reading a single file */
-class StringMockResourceLoader implements ResourceLoader {
- String text;
+/**
+ * Simple {@link ResourceLoader} that uses {@link Module#getResourceAsStream(String)} and {@link
+ * Class#forName(Module,String)} to open resources and classes, respectively. Resource paths must be
+ * absolute to module's root.
+ *
+ * <p>To use this class, you must open the module to the {@code org.apache.lucene.core} module,
+ * otherwise resources can't be looked up.
+ */
+public final class ModuleResourceLoader implements ResourceLoader {
+ private final Module module;
- public StringMockResourceLoader(String text) {
- this.text = text;
+ /** Creates an instance using the given Java Module to load resources and classes. */
+ public ModuleResourceLoader(Module module) {
+ this.module = module;
}
@Override
- public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
- try {
- return Class.forName(cname).asSubclass(expectedType);
- } catch (Exception e) {
- throw new RuntimeException("Cannot load class: " + cname, e);
- }
+ public InputStream openResource(String resource) throws IOException {
+ final var stream = module.getResourceAsStream(resource);
+ if (stream == null) throw new IOException("Resource not found: " + resource);
+ return stream;
}
@Override
- public <T> T newInstance(String cname, Class<T> expectedType) {
- Class<? extends T> clazz = findClass(cname, expectedType);
+ public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
try {
- return clazz.getConstructor().newInstance();
+ final var clazz = Class.forName(module, cname);
+ if (clazz == null) throw new ClassNotFoundException(cname);
+ return clazz.asSubclass(expectedType);
} catch (Exception e) {
- throw new RuntimeException("Cannot create instance: " + cname, e);
+ throw new RuntimeException("Cannot load class: " + cname, e);
}
}
-
- @Override
- public InputStream openResource(String resource) throws IOException {
- return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
- }
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/ResourceLoader.java b/lucene/core/src/java/org/apache/lucene/util/ResourceLoader.java
index e2ee4ff..7955893 100644
--- a/lucene/core/src/java/org/apache/lucene/util/ResourceLoader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ResourceLoader.java
@@ -30,5 +30,12 @@ public interface ResourceLoader {
/** Creates an instance of the name and expected type */
// TODO: fix exception handling
- public <T> T newInstance(String cname, Class<T> expectedType);
+ public default <T> T newInstance(String cname, Class<T> expectedType) {
+ Class<? extends T> clazz = findClass(cname, expectedType);
+ try {
+ return clazz.getConstructor().newInstance();
+ } catch (Exception e) {
+ throw new RuntimeException("Cannot create instance: " + cname, e);
+ }
+ }
}
diff --git a/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java b/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java
index 11744ca..06fc8bd 100644
--- a/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java
+++ b/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java
@@ -730,10 +730,11 @@ public final class JavascriptCompiler {
Map<String, Method> map = new HashMap<>();
try {
final Properties props = new Properties();
+ var name = JavascriptCompiler.class.getSimpleName() + ".properties";
try (Reader in =
IOUtils.getDecodingReader(
- JavascriptCompiler.class,
- JavascriptCompiler.class.getSimpleName() + ".properties",
+ IOUtils.requireResourceNonNull(
+ JavascriptCompiler.class.getResourceAsStream(name), name),
StandardCharsets.UTF_8)) {
props.load(in);
}
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestLookupBenchmark.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestLookupBenchmark.java
index fe9da12..f76faf5 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestLookupBenchmark.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestLookupBenchmark.java
@@ -43,6 +43,7 @@ import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.analysis.MockTokenizer;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.IOUtils;
import org.junit.BeforeClass;
import org.junit.Ignore;
@@ -91,8 +92,9 @@ public class TestLookupBenchmark extends LuceneTestCase {
/** Collect the multilingual input for benchmarks/ tests. */
public static List<Input> readTop50KWiki() throws Exception {
List<Input> input = new ArrayList<>();
- URL resource = TestLookupBenchmark.class.getResource("Top50KWiki.utf8");
- assert resource != null : "Resource missing: Top50KWiki.utf8";
+ var name = "Top50KWiki.utf8";
+ URL resource =
+ IOUtils.requireResourceNonNull(TestLookupBenchmark.class.getResource(name), name);
String line = null;
BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), UTF_8));
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java
index 0d3d1ee..0038e08 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java
@@ -62,6 +62,7 @@ import java.lang.annotation.Target;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.URI;
+import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystem;
import java.nio.file.NoSuchFileException;
@@ -151,6 +152,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CommandLineUtil;
import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.SuppressForbidden;
@@ -2079,19 +2081,16 @@ public abstract class LuceneTestCase extends Assert {
*/
protected Path getDataPath(String name) throws IOException {
try {
- return Paths.get(this.getClass().getResource(name).toURI());
- } catch (Exception e) {
- throw new IOException("Cannot find resource: " + name, e);
+ return Paths.get(
+ IOUtils.requireResourceNonNull(this.getClass().getResource(name), name).toURI());
+ } catch (URISyntaxException e) {
+ throw new AssertionError(e);
}
}
/** Gets a resource from the test's classpath as {@link InputStream}. */
protected InputStream getDataInputStream(String name) throws IOException {
- InputStream in = this.getClass().getResourceAsStream(name);
- if (in == null) {
- throw new IOException("Cannot find resource: " + name);
- }
- return in;
+ return IOUtils.requireResourceNonNull(this.getClass().getResourceAsStream(name), name);
}
public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader)