You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2022/05/06 14:19:45 UTC

[lucene] branch branch_9x updated: LUCENE-10558: Implement URL ctor to support classpath/module usage in Kuromoji and Nori dictionaries (#868)

This is an automated email from the ASF dual-hosted git repository.

uschindler pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 749bfb8072b LUCENE-10558: Implement URL ctor to support classpath/module usage in Kuromoji and Nori dictionaries (#868)
749bfb8072b is described below

commit 749bfb8072b480eb76fc9c3969676bcf94d3c0f9
Author: Uwe Schindler <us...@apache.org>
AuthorDate: Fri May 6 16:19:40 2022 +0200

    LUCENE-10558: Implement URL ctor to support classpath/module usage in Kuromoji and Nori dictionaries (#868)
---
 lucene/CHANGES.txt                                 |  9 ++++
 lucene/MIGRATE.md                                  | 13 +++++
 .../lucene/analysis/ja/dict/BinaryDictionary.java  | 12 ++---
 .../lucene/analysis/ja/dict/ConnectionCosts.java   | 21 +++++---
 .../analysis/ja/dict/TokenInfoDictionary.java      | 44 ++++++++++-------
 .../lucene/analysis/ja/dict/UnknownDictionary.java | 31 +++++++-----
 .../analysis/ja/dict/TestExternalDictionary.java   | 56 ++++++++++++++++++++++
 .../lucene/analysis/ko/dict/BinaryDictionary.java  | 21 ++++++++
 .../lucene/analysis/ko/dict/ConnectionCosts.java   | 25 ++++++----
 .../analysis/ko/dict/TokenInfoDictionary.java      | 43 +++++++++++------
 .../lucene/analysis/ko/dict/UnknownDictionary.java | 30 ++++++++----
 .../analysis/ko/dict/TestExternalDictionary.java   | 56 ++++++++++++++++++++++
 12 files changed, 289 insertions(+), 72 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5d08c8518ad..40660f722fb 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -14,6 +14,11 @@ API Changes
   taxoEpoch decide. Add a test case that demonstrates the inconsistencies caused when you reuse taxoArrays on older
   checkpoints. (Gautam Worah)
 
+* LUCENE-10558: Add new constructors to Kuromoji and Nori dictionary classes to support classpath /
+  module system usage. It is now possible to use JDK's Class/ClassLoader/Module#getResource(...) apis
+  and pass their returned URL to dictionary constructors to load resources from Classpath or Module
+  resources. (Uwe Schindler, Tomoko Uchida, Mike Sokolov)
+
 New Features
 ---------------------
 
@@ -110,6 +115,10 @@ Bug Fixes
 
 * LUCENE-10518: Relax field consistency check for old indices (Nhat Nguyen)
 
+* LUCENE-10558: Restore behaviour of deprecated Kuromoji and Nori dictionary constructors for
+  custom dictionary support. Please also use new URL-based constructors for classpath/module
+  system ressources.  (Uwe Schindler, Tomoko Uchida, Mike Sokolov)
+
 Build
 ---------------------
 
diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md
index 9cf569d48c8..378cce193fe 100644
--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@@ -51,6 +51,19 @@ the [Log4j JDK Logging Adapter](https://logging.apache.org/log4j/2.x/log4j-jul/i
 in combination with the corresponding system property:
 `java.util.logging.manager=org.apache.logging.log4j.jul.LogManager`.
 
+### Kuromoji and Nori analysis component constructors for custom dictionaries
+
+The Kuromoji and Nori analysis modules had some way to customize the backing dictionaries
+by passing a path to file or classpath resources using some inconsistently implemented
+APIs. This was buggy from the beginning, but some users made use of it. Due to move to Java
+module system, especially the resource lookup on classpath stopped to work correctly.
+The Lucene team therefore implemented new APIs to create dictionary implementations
+with custom data files. Unfortunately there were some shortcomings in the 9.1 version,
+also when using the now deprecated ctors, so users are advised to upgrade to
+Lucene 9.2 or stay with 9.0.
+
+See LUCENE-10558 for more details and workarounds.
+
 ## Migration from Lucene 8.x to Lucene 9.0
 
 ### Rename of binary artifacts from '**-analyzers-**' to '**-analysis-**' (LUCENE-9562)
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
index 4b75bed4ea6..78255b0b450 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
@@ -25,6 +25,7 @@ import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.file.Files;
 import java.nio.file.Paths;
+import java.util.Objects;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
@@ -140,7 +141,11 @@ public abstract class BinaryDictionary implements Dictionary {
       throws IOException {
     switch (scheme) {
       case CLASSPATH:
-        return getClassResource(path);
+        Objects.requireNonNull(
+            path,
+            "Deprecated API no longer works with null paths, to load default resources use default ctors.");
+        return IOUtils.requireResourceNonNull(
+            BinaryDictionary.class.getClassLoader().getResourceAsStream(path), path);
       case FILE:
         return Files.newInputStream(Paths.get(path));
       default:
@@ -148,11 +153,6 @@ public abstract class BinaryDictionary implements Dictionary {
     }
   }
 
-  @Deprecated(forRemoval = true, since = "9.1")
-  private static InputStream getClassResource(String path) throws IOException {
-    return IOUtils.requireResourceNonNull(BinaryDictionary.class.getResourceAsStream(path), path);
-  }
-
   public void lookupWordIds(int sourceId, IntsRef ref) {
     ref.ints = targetMap;
     ref.offset = targetMapOffsets[sourceId];
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
index 16084c644a5..8adc2e74076 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.ja.dict;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
@@ -42,15 +42,13 @@ public final class ConnectionCosts {
   /**
    * @param scheme - scheme for loading resources (FILE or CLASSPATH).
    * @param path - where to load resources from, without the ".dat" suffix
-   * @deprecated replaced by {@link #ConnectionCosts(Path)}
+   * @deprecated replaced by {@link #ConnectionCosts(Path)} for files and {@link
+   *     #ConnectionCosts(URL)} for classpath/module resources.
    */
   @Deprecated(forRemoval = true, since = "9.1")
   @SuppressWarnings("removal")
   public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String path) throws IOException {
-    this(
-        scheme == BinaryDictionary.ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(path + FILENAME_SUFFIX))
-            : ConnectionCosts::getClassResource);
+    this(() -> BinaryDictionary.getResource(scheme, path.replace('.', '/') + FILENAME_SUFFIX));
   }
 
   /**
@@ -63,6 +61,17 @@ public final class ConnectionCosts {
     this(() -> Files.newInputStream(connectionCostsFile));
   }
 
+  /**
+   * Create a {@link ConnectionCosts} from an external resource URL (e.g. from Classpath with {@link
+   * ClassLoader#getResource(String)}).
+   *
+   * @param connectionCostsUrl where to load connection costs resource
+   * @throws IOException if resource was not found or broken
+   */
+  public ConnectionCosts(URL connectionCostsUrl) throws IOException {
+    this(() -> connectionCostsUrl.openStream());
+  }
+
   private ConnectionCosts() throws IOException {
     this(ConnectionCosts::getClassResource);
   }
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
index 55b4633b448..42fa603d11c 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ja.dict;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOSupplier;
@@ -41,27 +41,20 @@ public final class TokenInfoDictionary extends BinaryDictionary {
 
   /**
    * @param resourceScheme - scheme for loading resources (FILE or CLASSPATH).
-   * @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH
-   *     scheme only, use this class's name as the path.
-   * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)}
+   * @param resourcePath - where to load resources (dictionaries) from.
+   * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)} for files and
+   *     {@link #TokenInfoDictionary(URL, URL, URL, URL)} for classpath/module resources
    */
   @Deprecated(forRemoval = true, since = "9.1")
   @SuppressWarnings("removal")
   public TokenInfoDictionary(ResourceScheme resourceScheme, String resourcePath)
       throws IOException {
     this(
-        resourceScheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX))
-            : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
-        resourceScheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX))
-            : () -> getClassResource(POSDICT_FILENAME_SUFFIX),
-        resourceScheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX))
-            : () -> getClassResource(DICT_FILENAME_SUFFIX),
-        resourceScheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + FST_FILENAME_SUFFIX))
-            : () -> getClassResource(FST_FILENAME_SUFFIX));
+        () ->
+            BinaryDictionary.getResource(resourceScheme, resourcePath + TARGETMAP_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(resourceScheme, resourcePath + POSDICT_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(resourceScheme, resourcePath + DICT_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(resourceScheme, resourcePath + FST_FILENAME_SUFFIX));
   }
 
   /**
@@ -82,6 +75,25 @@ public final class TokenInfoDictionary extends BinaryDictionary {
         () -> Files.newInputStream(fstFile));
   }
 
+  /**
+   * Create a {@link TokenInfoDictionary} from an external resource URL (e.g. from Classpath with
+   * {@link ClassLoader#getResource(String)}).
+   *
+   * @param targetMapUrl where to load target map resource
+   * @param posDictUrl where to load POS dictionary resource
+   * @param dictUrl where to load dictionary entries resource
+   * @param fstUrl where to load encoded FST data resource
+   * @throws IOException if resource was not found or broken
+   */
+  public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl)
+      throws IOException {
+    this(
+        () -> targetMapUrl.openStream(),
+        () -> posDictUrl.openStream(),
+        () -> dictUrl.openStream(),
+        () -> fstUrl.openStream());
+  }
+
   private TokenInfoDictionary() throws IOException {
     this(
         () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
index e5cff9209e3..bb7c3b5b4b5 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
@@ -18,9 +18,9 @@ package org.apache.lucene.analysis.ja.dict;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import org.apache.lucene.util.IOUtils;
 
 /** Dictionary for unknown-word handling. */
@@ -32,21 +32,16 @@ public final class UnknownDictionary extends BinaryDictionary {
    * @param scheme scheme for loading resources (FILE or CLASSPATH).
    * @param path where to load resources from; a path, including the file base name without
    *     extension; this is used to match multiple files with the same base name.
-   * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)}
+   * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)} for files and {@link
+   *     #UnknownDictionary(URL, URL, URL)} for classpath/module resources
    */
   @Deprecated(forRemoval = true, since = "9.1")
   @SuppressWarnings("removal")
   public UnknownDictionary(ResourceScheme scheme, String path) throws IOException {
     super(
-        scheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(path + TARGETMAP_FILENAME_SUFFIX))
-            : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
-        scheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(path + POSDICT_FILENAME_SUFFIX))
-            : () -> getClassResource(POSDICT_FILENAME_SUFFIX),
-        scheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(path + DICT_FILENAME_SUFFIX))
-            : () -> getClassResource(DICT_FILENAME_SUFFIX));
+        () -> BinaryDictionary.getResource(scheme, path + TARGETMAP_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(scheme, path + POSDICT_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(scheme, path + DICT_FILENAME_SUFFIX));
   }
 
   /**
@@ -64,6 +59,20 @@ public final class UnknownDictionary extends BinaryDictionary {
         () -> Files.newInputStream(dictFile));
   }
 
+  /**
+   * Create a {@link UnknownDictionary} from an external resource URL (e.g. from Classpath with
+   * {@link ClassLoader#getResource(String)}).
+   *
+   * @param targetMapUrl where to load target map resource
+   * @param posDictUrl where to load POS dictionary resource
+   * @param dictUrl where to load dictionary entries resource
+   * @throws IOException if resource was not found or broken
+   */
+  public UnknownDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl) throws IOException {
+    super(
+        () -> targetMapUrl.openStream(), () -> posDictUrl.openStream(), () -> dictUrl.openStream());
+  }
+
   private UnknownDictionary() throws IOException {
     super(
         () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
index bc44723996a..0209154d09d 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
@@ -32,6 +32,7 @@ import org.junit.Before;
 public class TestExternalDictionary extends LuceneTestCase {
 
   private Path dir;
+  private ClassLoader loader = getClass().getClassLoader();
 
   @Override
   @Before
@@ -98,4 +99,59 @@ public class TestExternalDictionary extends LuceneTestCase {
         new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
     assertEquals(1, cc.get(0, 1));
   }
+
+  public void testLoadExternalUrlTokenInfoDictionary() throws Exception {
+    String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
+    TokenInfoDictionary dict =
+        new TokenInfoDictionary(
+            loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + FST_FILENAME_SUFFIX));
+    assertNotNull(dict.getFST());
+  }
+
+  public void testLoadExternalUrlUnknownDictionary() throws Exception {
+    String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
+    UnknownDictionary dict =
+        new UnknownDictionary(
+            loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX));
+    assertNotNull(dict.getCharacterDefinition());
+  }
+
+  public void testLoadExternalUrlConnectionCosts() throws Exception {
+    String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
+    ConnectionCosts cc =
+        new ConnectionCosts(loader.getResource(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
+    assertEquals(1, cc.get(0, 1));
+  }
+
+  @Deprecated(forRemoval = true, since = "9.1")
+  @SuppressWarnings("removal")
+  public void testDeprecatedLoadExternalTokenInfoDictionary() throws Exception {
+    String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
+    TokenInfoDictionary dict =
+        new TokenInfoDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+    assertNotNull(dict.getFST());
+  }
+
+  @Deprecated(forRemoval = true, since = "9.1")
+  @SuppressWarnings("removal")
+  public void testDeprecatedLoadExternalUnknownDictionary() throws Exception {
+    String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
+    UnknownDictionary dict =
+        new UnknownDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+    assertNotNull(dict.getCharacterDefinition());
+  }
+
+  @Deprecated(forRemoval = true, since = "9.1")
+  @SuppressWarnings("removal")
+  public void testDeprecatedLoadExternalConnectionCosts() throws Exception {
+    String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
+    ConnectionCosts cc =
+        new ConnectionCosts(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+    assertEquals(1, cc.get(0, 1));
+  }
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
index aba6782182b..767eba2e6fd 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
@@ -23,11 +23,15 @@ import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Objects;
 import org.apache.lucene.analysis.ko.POS;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOSupplier;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 
 /** Base class for a binary-encoded in-memory dictionary. */
@@ -114,6 +118,23 @@ public abstract class BinaryDictionary implements Dictionary {
     targetMapOffsets[sourceId] = targetMap.length;
   }
 
+  @Deprecated(forRemoval = true, since = "9.1")
+  public static final InputStream getResource(ResourceScheme scheme, String path)
+      throws IOException {
+    switch (scheme) {
+      case CLASSPATH:
+        Objects.requireNonNull(
+            path,
+            "Deprecated API no longer works with null paths, to load default resources use default ctors.");
+        return IOUtils.requireResourceNonNull(
+            BinaryDictionary.class.getClassLoader().getResourceAsStream(path), path);
+      case FILE:
+        return Files.newInputStream(Paths.get(path));
+      default:
+        throw new IllegalStateException("unknown resource scheme " + scheme);
+    }
+  }
+
   public void lookupWordIds(int sourceId, IntsRef ref) {
     ref.ints = targetMap;
     ref.offset = targetMapOffsets[sourceId];
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
index 41dc4aa713a..6e68963a286 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.ko.dict;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
@@ -41,16 +41,14 @@ public final class ConnectionCosts {
 
   /**
    * @param scheme - scheme for loading resources (FILE or CLASSPATH).
-   * @param resourcePath - where to load resources from, without the ".dat" suffix
+   * @param path - where to load resources from, without the ".dat" suffix
+   * @deprecated replaced by {@link #ConnectionCosts(Path)} for files and {@link
+   *     #ConnectionCosts(URL)} for classpath/module resources.
    */
   @Deprecated(forRemoval = true, since = "9.1")
   @SuppressWarnings("removal")
-  public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String resourcePath)
-      throws IOException {
-    this(
-        scheme == BinaryDictionary.ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + FILENAME_SUFFIX))
-            : ConnectionCosts::getClassResource);
+  public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String path) throws IOException {
+    this(() -> BinaryDictionary.getResource(scheme, path.replace('.', '/') + FILENAME_SUFFIX));
   }
 
   /**
@@ -63,6 +61,17 @@ public final class ConnectionCosts {
     this(() -> Files.newInputStream(connectionCostsFile));
   }
 
+  /**
+   * Create a {@link ConnectionCosts} from an external resource URL (e.g. from Classpath with {@link
+   * ClassLoader#getResource(String)}).
+   *
+   * @param connectionCostsUrl where to load connection costs resource
+   * @throws IOException if resource was not found or broken
+   */
+  public ConnectionCosts(URL connectionCostsUrl) throws IOException {
+    this(() -> connectionCostsUrl.openStream());
+  }
+
   private ConnectionCosts() throws IOException {
     this(ConnectionCosts::getClassResource);
   }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
index c5182a5123b..910ad3f4f0a 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ko.dict;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOSupplier;
@@ -49,26 +49,20 @@ public final class TokenInfoDictionary extends BinaryDictionary {
 
   /**
    * @param resourceScheme - scheme for loading resources (FILE or CLASSPATH).
-   * @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH
-   *     scheme only, use this class's name as the path.
+   * @param resourcePath - where to load resources (dictionaries) from.
+   * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)} for files and
+   *     {@link #TokenInfoDictionary(URL, URL, URL, URL)} for classpath/module resources
    */
   @Deprecated(forRemoval = true, since = "9.1")
   @SuppressWarnings("removal")
   public TokenInfoDictionary(ResourceScheme resourceScheme, String resourcePath)
       throws IOException {
     this(
-        resourceScheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX))
-            : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
-        resourceScheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX))
-            : () -> getClassResource(POSDICT_FILENAME_SUFFIX),
-        resourceScheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX))
-            : () -> getClassResource(DICT_FILENAME_SUFFIX),
-        resourceScheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + FST_FILENAME_SUFFIX))
-            : () -> getClassResource(FST_FILENAME_SUFFIX));
+        () ->
+            BinaryDictionary.getResource(resourceScheme, resourcePath + TARGETMAP_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(resourceScheme, resourcePath + POSDICT_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(resourceScheme, resourcePath + DICT_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(resourceScheme, resourcePath + FST_FILENAME_SUFFIX));
   }
 
   /**
@@ -89,6 +83,25 @@ public final class TokenInfoDictionary extends BinaryDictionary {
         () -> Files.newInputStream(fstFile));
   }
 
+  /**
+   * Create a {@link TokenInfoDictionary} from an external resource URL (e.g. from Classpath with
+   * {@link ClassLoader#getResource(String)}).
+   *
+   * @param targetMapUrl where to load target map resource
+   * @param posDictUrl where to load POS dictionary resource
+   * @param dictUrl where to load dictionary entries resource
+   * @param fstUrl where to load encoded FST data resource
+   * @throws IOException if resource was not found or broken
+   */
+  public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl)
+      throws IOException {
+    this(
+        () -> targetMapUrl.openStream(),
+        () -> posDictUrl.openStream(),
+        () -> dictUrl.openStream(),
+        () -> fstUrl.openStream());
+  }
+
   private TokenInfoDictionary(
       IOSupplier<InputStream> targetMapResource,
       IOSupplier<InputStream> posResource,
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
index 10eb1611b82..d7ad14de2cd 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
@@ -18,9 +18,9 @@ package org.apache.lucene.analysis.ko.dict;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import org.apache.lucene.util.IOUtils;
 
 /** Dictionary for unknown-word handling. */
@@ -31,20 +31,16 @@ public final class UnknownDictionary extends BinaryDictionary {
    * @param scheme scheme for loading resources (FILE or CLASSPATH).
    * @param resourcePath where to load resources from; a path, including the file base name without
    *     extension; this is used to match multiple files with the same base name.
+   * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)} for files and {@link
+   *     #UnknownDictionary(URL, URL, URL)} for classpath/module resources
    */
   @Deprecated(forRemoval = true, since = "9.1")
   @SuppressWarnings("removal")
   public UnknownDictionary(ResourceScheme scheme, String resourcePath) throws IOException {
     super(
-        scheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX))
-            : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
-        scheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX))
-            : () -> getClassResource(POSDICT_FILENAME_SUFFIX),
-        scheme == ResourceScheme.FILE
-            ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX))
-            : () -> getClassResource(DICT_FILENAME_SUFFIX));
+        () -> BinaryDictionary.getResource(scheme, resourcePath + TARGETMAP_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(scheme, resourcePath + POSDICT_FILENAME_SUFFIX),
+        () -> BinaryDictionary.getResource(scheme, resourcePath + DICT_FILENAME_SUFFIX));
   }
 
   /**
@@ -62,6 +58,20 @@ public final class UnknownDictionary extends BinaryDictionary {
         () -> Files.newInputStream(dictFile));
   }
 
+  /**
+   * Create a {@link UnknownDictionary} from an external resource URL (e.g. from Classpath with
+   * {@link ClassLoader#getResource(String)}).
+   *
+   * @param targetMapUrl where to load target map resource
+   * @param posDictUrl where to load POS dictionary resource
+   * @param dictUrl where to load dictionary entries resource
+   * @throws IOException if resource was not found or broken
+   */
+  public UnknownDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl) throws IOException {
+    super(
+        () -> targetMapUrl.openStream(), () -> posDictUrl.openStream(), () -> dictUrl.openStream());
+  }
+
   private UnknownDictionary() throws IOException {
     super(
         () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
index 5f8edab8934..ba749e92461 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
@@ -32,6 +32,7 @@ import org.junit.Before;
 public class TestExternalDictionary extends LuceneTestCase {
 
   private Path dir;
+  private ClassLoader loader = getClass().getClassLoader();
 
   @Override
   @Before
@@ -98,4 +99,59 @@ public class TestExternalDictionary extends LuceneTestCase {
         new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
     assertEquals(0, cc.get(1, 1));
   }
+
+  public void testLoadExternalUrlTokenInfoDictionary() throws Exception {
+    String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
+    TokenInfoDictionary dict =
+        new TokenInfoDictionary(
+            loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + FST_FILENAME_SUFFIX));
+    assertNotNull(dict.getFST());
+  }
+
+  public void testLoadExternalUrlUnknownDictionary() throws Exception {
+    String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
+    UnknownDictionary dict =
+        new UnknownDictionary(
+            loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
+            loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX));
+    assertNotNull(dict.getCharacterDefinition());
+  }
+
+  public void testLoadExternalUrlConnectionCosts() throws Exception {
+    String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
+    ConnectionCosts cc =
+        new ConnectionCosts(loader.getResource(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
+    assertEquals(0, cc.get(1, 1));
+  }
+
+  @Deprecated(forRemoval = true, since = "9.1")
+  @SuppressWarnings("removal")
+  public void testDeprecatedLoadExternalTokenInfoDictionary() throws Exception {
+    String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
+    TokenInfoDictionary dict =
+        new TokenInfoDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+    assertNotNull(dict.getFST());
+  }
+
+  @Deprecated(forRemoval = true, since = "9.1")
+  @SuppressWarnings("removal")
+  public void testDeprecatedLoadExternalUnknownDictionary() throws Exception {
+    String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
+    UnknownDictionary dict =
+        new UnknownDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+    assertNotNull(dict.getCharacterDefinition());
+  }
+
+  @Deprecated(forRemoval = true, since = "9.1")
+  @SuppressWarnings("removal")
+  public void testDeprecatedLoadExternalConnectionCosts() throws Exception {
+    String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
+    ConnectionCosts cc =
+        new ConnectionCosts(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath);
+    assertEquals(0, cc.get(1, 1));
+  }
 }