You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2022/05/06 14:19:45 UTC
[lucene] branch branch_9x updated: LUCENE-10558: Implement URL ctor to support classpath/module usage in Kuromoji and Nori dictionaries (#868)
This is an automated email from the ASF dual-hosted git repository.
uschindler pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 749bfb8072b LUCENE-10558: Implement URL ctor to support classpath/module usage in Kuromoji and Nori dictionaries (#868)
749bfb8072b is described below
commit 749bfb8072b480eb76fc9c3969676bcf94d3c0f9
Author: Uwe Schindler <us...@apache.org>
AuthorDate: Fri May 6 16:19:40 2022 +0200
LUCENE-10558: Implement URL ctor to support classpath/module usage in Kuromoji and Nori dictionaries (#868)
---
lucene/CHANGES.txt | 9 ++++
lucene/MIGRATE.md | 13 +++++
.../lucene/analysis/ja/dict/BinaryDictionary.java | 12 ++---
.../lucene/analysis/ja/dict/ConnectionCosts.java | 21 +++++---
.../analysis/ja/dict/TokenInfoDictionary.java | 44 ++++++++++-------
.../lucene/analysis/ja/dict/UnknownDictionary.java | 31 +++++++-----
.../analysis/ja/dict/TestExternalDictionary.java | 56 ++++++++++++++++++++++
.../lucene/analysis/ko/dict/BinaryDictionary.java | 21 ++++++++
.../lucene/analysis/ko/dict/ConnectionCosts.java | 25 ++++++----
.../analysis/ko/dict/TokenInfoDictionary.java | 43 +++++++++++------
.../lucene/analysis/ko/dict/UnknownDictionary.java | 30 ++++++++----
.../analysis/ko/dict/TestExternalDictionary.java | 56 ++++++++++++++++++++++
12 files changed, 289 insertions(+), 72 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5d08c8518ad..40660f722fb 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -14,6 +14,11 @@ API Changes
taxoEpoch decide. Add a test case that demonstrates the inconsistencies caused when you reuse taxoArrays on older
checkpoints. (Gautam Worah)
+* LUCENE-10558: Add new constructors to Kuromoji and Nori dictionary classes to support classpath /
+ module system usage. It is now possible to use JDK's Class/ClassLoader/Module#getResource(...) apis
+ and pass their returned URL to dictionary constructors to load resources from Classpath or Module
+ resources. (Uwe Schindler, Tomoko Uchida, Mike Sokolov)
+
New Features
---------------------
@@ -110,6 +115,10 @@ Bug Fixes
* LUCENE-10518: Relax field consistency check for old indices (Nhat Nguyen)
+* LUCENE-10558: Restore behaviour of deprecated Kuromoji and Nori dictionary constructors for
+ custom dictionary support. Please also use new URL-based constructors for classpath/module
+ system ressources. (Uwe Schindler, Tomoko Uchida, Mike Sokolov)
+
Build
---------------------
diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md
index 9cf569d48c8..378cce193fe 100644
--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@@ -51,6 +51,19 @@ the [Log4j JDK Logging Adapter](https://logging.apache.org/log4j/2.x/log4j-jul/i
in combination with the corresponding system property:
`java.util.logging.manager=org.apache.logging.log4j.jul.LogManager`.
+### Kuromoji and Nori analysis component constructors for custom dictionaries
+
+The Kuromoji and Nori analysis modules had some way to customize the backing dictionaries
+by passing a path to file or classpath resources using some inconsistently implemented
+APIs. This was buggy from the beginning, but some users made use of it. Due to move to Java
+module system, especially the resource lookup on classpath stopped to work correctly.
+The Lucene team therefore implemented new APIs to create dictionary implementations
+with custom data files. Unfortunately there were some shortcomings in the 9.1 version,
+also when using the now deprecated ctors, so users are advised to upgrade to
+Lucene 9.2 or stay with 9.0.
+
+See LUCENE-10558 for more details and workarounds.
+
## Migration from Lucene 8.x to Lucene 9.0
### Rename of binary artifacts from '**-analyzers-**' to '**-analysis-**' (LUCENE-9562)
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
index 4b75bed4ea6..78255b0b450 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
@@ -25,6 +25,7 @@ import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.file.Files;
import java.nio.file.Paths;
+import java.util.Objects;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.InputStreamDataInput;
@@ -140,7 +141,11 @@ public abstract class BinaryDictionary implements Dictionary {
throws IOException {
switch (scheme) {
case CLASSPATH:
- return getClassResource(path);
+ Objects.requireNonNull(
+ path,
+ "Deprecated API no longer works with null paths, to load default resources use default ctors.");
+ return IOUtils.requireResourceNonNull(
+ BinaryDictionary.class.getClassLoader().getResourceAsStream(path), path);
case FILE:
return Files.newInputStream(Paths.get(path));
default:
@@ -148,11 +153,6 @@ public abstract class BinaryDictionary implements Dictionary {
}
}
- @Deprecated(forRemoval = true, since = "9.1")
- private static InputStream getClassResource(String path) throws IOException {
- return IOUtils.requireResourceNonNull(BinaryDictionary.class.getResourceAsStream(path), path);
- }
-
public void lookupWordIds(int sourceId, IntsRef ref) {
ref.ints = targetMap;
ref.offset = targetMapOffsets[sourceId];
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
index 16084c644a5..8adc2e74076 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.ja.dict;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.InputStreamDataInput;
@@ -42,15 +42,13 @@ public final class ConnectionCosts {
/**
* @param scheme - scheme for loading resources (FILE or CLASSPATH).
* @param path - where to load resources from, without the ".dat" suffix
- * @deprecated replaced by {@link #ConnectionCosts(Path)}
+ * @deprecated replaced by {@link #ConnectionCosts(Path)} for files and {@link
+ * #ConnectionCosts(URL)} for classpath/module resources.
*/
@Deprecated(forRemoval = true, since = "9.1")
@SuppressWarnings("removal")
public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String path) throws IOException {
- this(
- scheme == BinaryDictionary.ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(path + FILENAME_SUFFIX))
- : ConnectionCosts::getClassResource);
+ this(() -> BinaryDictionary.getResource(scheme, path.replace('.', '/') + FILENAME_SUFFIX));
}
/**
@@ -63,6 +61,17 @@ public final class ConnectionCosts {
this(() -> Files.newInputStream(connectionCostsFile));
}
+ /**
+ * Create a {@link ConnectionCosts} from an external resource URL (e.g. from Classpath with {@link
+ * ClassLoader#getResource(String)}).
+ *
+ * @param connectionCostsUrl where to load connection costs resource
+ * @throws IOException if resource was not found or broken
+ */
+ public ConnectionCosts(URL connectionCostsUrl) throws IOException {
+ this(() -> connectionCostsUrl.openStream());
+ }
+
private ConnectionCosts() throws IOException {
this(ConnectionCosts::getClassResource);
}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
index 55b4633b448..42fa603d11c 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ja.dict;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.util.IOSupplier;
@@ -41,27 +41,20 @@ public final class TokenInfoDictionary extends BinaryDictionary {
/**
* @param resourceScheme - scheme for loading resources (FILE or CLASSPATH).
- * @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH
- * scheme only, use this class's name as the path.
- * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)}
+ * @param resourcePath - where to load resources (dictionaries) from.
+ * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)} for files and
+ * {@link #TokenInfoDictionary(URL, URL, URL, URL)} for classpath/module resources
*/
@Deprecated(forRemoval = true, since = "9.1")
@SuppressWarnings("removal")
public TokenInfoDictionary(ResourceScheme resourceScheme, String resourcePath)
throws IOException {
this(
- resourceScheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX))
- : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
- resourceScheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX))
- : () -> getClassResource(POSDICT_FILENAME_SUFFIX),
- resourceScheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX))
- : () -> getClassResource(DICT_FILENAME_SUFFIX),
- resourceScheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + FST_FILENAME_SUFFIX))
- : () -> getClassResource(FST_FILENAME_SUFFIX));
+ () ->
+ BinaryDictionary.getResource(resourceScheme, resourcePath + TARGETMAP_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(resourceScheme, resourcePath + POSDICT_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(resourceScheme, resourcePath + DICT_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(resourceScheme, resourcePath + FST_FILENAME_SUFFIX));
}
/**
@@ -82,6 +75,25 @@ public final class TokenInfoDictionary extends BinaryDictionary {
() -> Files.newInputStream(fstFile));
}
+ /**
+ * Create a {@link TokenInfoDictionary} from an external resource URL (e.g. from Classpath with
+ * {@link ClassLoader#getResource(String)}).
+ *
+ * @param targetMapUrl where to load target map resource
+ * @param posDictUrl where to load POS dictionary resource
+ * @param dictUrl where to load dictionary entries resource
+ * @param fstUrl where to load encoded FST data resource
+ * @throws IOException if resource was not found or broken
+ */
+ public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl)
+ throws IOException {
+ this(
+ () -> targetMapUrl.openStream(),
+ () -> posDictUrl.openStream(),
+ () -> dictUrl.openStream(),
+ () -> fstUrl.openStream());
+ }
+
private TokenInfoDictionary() throws IOException {
this(
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
index e5cff9209e3..bb7c3b5b4b5 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
@@ -18,9 +18,9 @@ package org.apache.lucene.analysis.ja.dict;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
import org.apache.lucene.util.IOUtils;
/** Dictionary for unknown-word handling. */
@@ -32,21 +32,16 @@ public final class UnknownDictionary extends BinaryDictionary {
* @param scheme scheme for loading resources (FILE or CLASSPATH).
* @param path where to load resources from; a path, including the file base name without
* extension; this is used to match multiple files with the same base name.
- * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)}
+ * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)} for files and {@link
+ * #UnknownDictionary(URL, URL, URL)} for classpath/module resources
*/
@Deprecated(forRemoval = true, since = "9.1")
@SuppressWarnings("removal")
public UnknownDictionary(ResourceScheme scheme, String path) throws IOException {
super(
- scheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(path + TARGETMAP_FILENAME_SUFFIX))
- : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
- scheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(path + POSDICT_FILENAME_SUFFIX))
- : () -> getClassResource(POSDICT_FILENAME_SUFFIX),
- scheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(path + DICT_FILENAME_SUFFIX))
- : () -> getClassResource(DICT_FILENAME_SUFFIX));
+ () -> BinaryDictionary.getResource(scheme, path + TARGETMAP_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(scheme, path + POSDICT_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(scheme, path + DICT_FILENAME_SUFFIX));
}
/**
@@ -64,6 +59,20 @@ public final class UnknownDictionary extends BinaryDictionary {
() -> Files.newInputStream(dictFile));
}
+ /**
+ * Create a {@link UnknownDictionary} from an external resource URL (e.g. from Classpath with
+ * {@link ClassLoader#getResource(String)}).
+ *
+ * @param targetMapUrl where to load target map resource
+ * @param posDictUrl where to load POS dictionary resource
+ * @param dictUrl where to load dictionary entries resource
+ * @throws IOException if resource was not found or broken
+ */
+ public UnknownDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl) throws IOException {
+ super(
+ () -> targetMapUrl.openStream(), () -> posDictUrl.openStream(), () -> dictUrl.openStream());
+ }
+
private UnknownDictionary() throws IOException {
super(
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
index bc44723996a..0209154d09d 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
@@ -32,6 +32,7 @@ import org.junit.Before;
public class TestExternalDictionary extends LuceneTestCase {
private Path dir;
+ private ClassLoader loader = getClass().getClassLoader();
@Override
@Before
@@ -98,4 +99,59 @@ public class TestExternalDictionary extends LuceneTestCase {
new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
assertEquals(1, cc.get(0, 1));
}
+
+ public void testLoadExternalUrlTokenInfoDictionary() throws Exception {
+ String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
+ TokenInfoDictionary dict =
+ new TokenInfoDictionary(
+ loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + FST_FILENAME_SUFFIX));
+ assertNotNull(dict.getFST());
+ }
+
+ public void testLoadExternalUrlUnknownDictionary() throws Exception {
+ String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
+ UnknownDictionary dict =
+ new UnknownDictionary(
+ loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX));
+ assertNotNull(dict.getCharacterDefinition());
+ }
+
+ public void testLoadExternalUrlConnectionCosts() throws Exception {
+ String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
+ ConnectionCosts cc =
+ new ConnectionCosts(loader.getResource(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
+ assertEquals(1, cc.get(0, 1));
+ }
+
+ @Deprecated(forRemoval = true, since = "9.1")
+ @SuppressWarnings("removal")
+ public void testDeprecatedLoadExternalTokenInfoDictionary() throws Exception {
+ String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
+ TokenInfoDictionary dict =
+ new TokenInfoDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+ assertNotNull(dict.getFST());
+ }
+
+ @Deprecated(forRemoval = true, since = "9.1")
+ @SuppressWarnings("removal")
+ public void testDeprecatedLoadExternalUnknownDictionary() throws Exception {
+ String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
+ UnknownDictionary dict =
+ new UnknownDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+ assertNotNull(dict.getCharacterDefinition());
+ }
+
+ @Deprecated(forRemoval = true, since = "9.1")
+ @SuppressWarnings("removal")
+ public void testDeprecatedLoadExternalConnectionCosts() throws Exception {
+ String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
+ ConnectionCosts cc =
+ new ConnectionCosts(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+ assertEquals(1, cc.get(0, 1));
+ }
}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
index aba6782182b..767eba2e6fd 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
@@ -23,11 +23,15 @@ import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Objects;
import org.apache.lucene.analysis.ko.POS;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.util.IOSupplier;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
/** Base class for a binary-encoded in-memory dictionary. */
@@ -114,6 +118,23 @@ public abstract class BinaryDictionary implements Dictionary {
targetMapOffsets[sourceId] = targetMap.length;
}
+ @Deprecated(forRemoval = true, since = "9.1")
+ public static final InputStream getResource(ResourceScheme scheme, String path)
+ throws IOException {
+ switch (scheme) {
+ case CLASSPATH:
+ Objects.requireNonNull(
+ path,
+ "Deprecated API no longer works with null paths, to load default resources use default ctors.");
+ return IOUtils.requireResourceNonNull(
+ BinaryDictionary.class.getClassLoader().getResourceAsStream(path), path);
+ case FILE:
+ return Files.newInputStream(Paths.get(path));
+ default:
+ throw new IllegalStateException("unknown resource scheme " + scheme);
+ }
+ }
+
public void lookupWordIds(int sourceId, IntsRef ref) {
ref.ints = targetMap;
ref.offset = targetMapOffsets[sourceId];
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
index 41dc4aa713a..6e68963a286 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.ko.dict;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.InputStreamDataInput;
@@ -41,16 +41,14 @@ public final class ConnectionCosts {
/**
* @param scheme - scheme for loading resources (FILE or CLASSPATH).
- * @param resourcePath - where to load resources from, without the ".dat" suffix
+ * @param path - where to load resources from, without the ".dat" suffix
+ * @deprecated replaced by {@link #ConnectionCosts(Path)} for files and {@link
+ * #ConnectionCosts(URL)} for classpath/module resources.
*/
@Deprecated(forRemoval = true, since = "9.1")
@SuppressWarnings("removal")
- public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String resourcePath)
- throws IOException {
- this(
- scheme == BinaryDictionary.ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + FILENAME_SUFFIX))
- : ConnectionCosts::getClassResource);
+ public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String path) throws IOException {
+ this(() -> BinaryDictionary.getResource(scheme, path.replace('.', '/') + FILENAME_SUFFIX));
}
/**
@@ -63,6 +61,17 @@ public final class ConnectionCosts {
this(() -> Files.newInputStream(connectionCostsFile));
}
+ /**
+ * Create a {@link ConnectionCosts} from an external resource URL (e.g. from Classpath with {@link
+ * ClassLoader#getResource(String)}).
+ *
+ * @param connectionCostsUrl where to load connection costs resource
+ * @throws IOException if resource was not found or broken
+ */
+ public ConnectionCosts(URL connectionCostsUrl) throws IOException {
+ this(() -> connectionCostsUrl.openStream());
+ }
+
private ConnectionCosts() throws IOException {
this(ConnectionCosts::getClassResource);
}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
index c5182a5123b..910ad3f4f0a 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ko.dict;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.util.IOSupplier;
@@ -49,26 +49,20 @@ public final class TokenInfoDictionary extends BinaryDictionary {
/**
* @param resourceScheme - scheme for loading resources (FILE or CLASSPATH).
- * @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH
- * scheme only, use this class's name as the path.
+ * @param resourcePath - where to load resources (dictionaries) from.
+ * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)} for files and
+ * {@link #TokenInfoDictionary(URL, URL, URL, URL)} for classpath/module resources
*/
@Deprecated(forRemoval = true, since = "9.1")
@SuppressWarnings("removal")
public TokenInfoDictionary(ResourceScheme resourceScheme, String resourcePath)
throws IOException {
this(
- resourceScheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX))
- : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
- resourceScheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX))
- : () -> getClassResource(POSDICT_FILENAME_SUFFIX),
- resourceScheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX))
- : () -> getClassResource(DICT_FILENAME_SUFFIX),
- resourceScheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + FST_FILENAME_SUFFIX))
- : () -> getClassResource(FST_FILENAME_SUFFIX));
+ () ->
+ BinaryDictionary.getResource(resourceScheme, resourcePath + TARGETMAP_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(resourceScheme, resourcePath + POSDICT_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(resourceScheme, resourcePath + DICT_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(resourceScheme, resourcePath + FST_FILENAME_SUFFIX));
}
/**
@@ -89,6 +83,25 @@ public final class TokenInfoDictionary extends BinaryDictionary {
() -> Files.newInputStream(fstFile));
}
+ /**
+ * Create a {@link TokenInfoDictionary} from an external resource URL (e.g. from Classpath with
+ * {@link ClassLoader#getResource(String)}).
+ *
+ * @param targetMapUrl where to load target map resource
+ * @param posDictUrl where to load POS dictionary resource
+ * @param dictUrl where to load dictionary entries resource
+ * @param fstUrl where to load encoded FST data resource
+ * @throws IOException if resource was not found or broken
+ */
+ public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl)
+ throws IOException {
+ this(
+ () -> targetMapUrl.openStream(),
+ () -> posDictUrl.openStream(),
+ () -> dictUrl.openStream(),
+ () -> fstUrl.openStream());
+ }
+
private TokenInfoDictionary(
IOSupplier<InputStream> targetMapResource,
IOSupplier<InputStream> posResource,
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
index 10eb1611b82..d7ad14de2cd 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
@@ -18,9 +18,9 @@ package org.apache.lucene.analysis.ko.dict;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
import org.apache.lucene.util.IOUtils;
/** Dictionary for unknown-word handling. */
@@ -31,20 +31,16 @@ public final class UnknownDictionary extends BinaryDictionary {
* @param scheme scheme for loading resources (FILE or CLASSPATH).
* @param resourcePath where to load resources from; a path, including the file base name without
* extension; this is used to match multiple files with the same base name.
+ * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)} for files and {@link
+ * #UnknownDictionary(URL, URL, URL)} for classpath/module resources
*/
@Deprecated(forRemoval = true, since = "9.1")
@SuppressWarnings("removal")
public UnknownDictionary(ResourceScheme scheme, String resourcePath) throws IOException {
super(
- scheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX))
- : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
- scheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX))
- : () -> getClassResource(POSDICT_FILENAME_SUFFIX),
- scheme == ResourceScheme.FILE
- ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX))
- : () -> getClassResource(DICT_FILENAME_SUFFIX));
+ () -> BinaryDictionary.getResource(scheme, resourcePath + TARGETMAP_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(scheme, resourcePath + POSDICT_FILENAME_SUFFIX),
+ () -> BinaryDictionary.getResource(scheme, resourcePath + DICT_FILENAME_SUFFIX));
}
/**
@@ -62,6 +58,20 @@ public final class UnknownDictionary extends BinaryDictionary {
() -> Files.newInputStream(dictFile));
}
+ /**
+ * Create a {@link UnknownDictionary} from an external resource URL (e.g. from Classpath with
+ * {@link ClassLoader#getResource(String)}).
+ *
+ * @param targetMapUrl where to load target map resource
+ * @param posDictUrl where to load POS dictionary resource
+ * @param dictUrl where to load dictionary entries resource
+ * @throws IOException if resource was not found or broken
+ */
+ public UnknownDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl) throws IOException {
+ super(
+ () -> targetMapUrl.openStream(), () -> posDictUrl.openStream(), () -> dictUrl.openStream());
+ }
+
private UnknownDictionary() throws IOException {
super(
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
index 5f8edab8934..ba749e92461 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
@@ -32,6 +32,7 @@ import org.junit.Before;
public class TestExternalDictionary extends LuceneTestCase {
private Path dir;
+ private ClassLoader loader = getClass().getClassLoader();
@Override
@Before
@@ -98,4 +99,59 @@ public class TestExternalDictionary extends LuceneTestCase {
new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
assertEquals(0, cc.get(1, 1));
}
+
+ public void testLoadExternalUrlTokenInfoDictionary() throws Exception {
+ String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
+ TokenInfoDictionary dict =
+ new TokenInfoDictionary(
+ loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + FST_FILENAME_SUFFIX));
+ assertNotNull(dict.getFST());
+ }
+
+ public void testLoadExternalUrlUnknownDictionary() throws Exception {
+ String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
+ UnknownDictionary dict =
+ new UnknownDictionary(
+ loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
+ loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX));
+ assertNotNull(dict.getCharacterDefinition());
+ }
+
+ public void testLoadExternalUrlConnectionCosts() throws Exception {
+ String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
+ ConnectionCosts cc =
+ new ConnectionCosts(loader.getResource(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
+ assertEquals(0, cc.get(1, 1));
+ }
+
+ @Deprecated(forRemoval = true, since = "9.1")
+ @SuppressWarnings("removal")
+ public void testDeprecatedLoadExternalTokenInfoDictionary() throws Exception {
+ String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
+ TokenInfoDictionary dict =
+ new TokenInfoDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+ assertNotNull(dict.getFST());
+ }
+
+ @Deprecated(forRemoval = true, since = "9.1")
+ @SuppressWarnings("removal")
+ public void testDeprecatedLoadExternalUnknownDictionary() throws Exception {
+ String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
+ UnknownDictionary dict =
+ new UnknownDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+ assertNotNull(dict.getCharacterDefinition());
+ }
+
+ @Deprecated(forRemoval = true, since = "9.1")
+ @SuppressWarnings("removal")
+ public void testDeprecatedLoadExternalConnectionCosts() throws Exception {
+ String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
+ ConnectionCosts cc =
+ new ConnectionCosts(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+ assertEquals(0, cc.get(1, 1));
+ }
}