You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/02/17 14:20:45 UTC

[opennlp] branch master updated: OPENNLP-1351: Add ONNX model support for doccat and namefinder (#400)

This is an automated email from the ASF dual-hosted git repository.

jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new ba2fdcc  OPENNLP-1351: Add ONNX model support for doccat and namefinder (#400)
ba2fdcc is described below

commit ba2fdccfaddc8c5dba8b28ca28bc23108fa30982
Author: Jeff Zemerick <jz...@apache.org>
AuthorDate: Thu Feb 17 09:20:37 2022 -0500

    OPENNLP-1351: Add ONNX model support for doccat and namefinder (#400)
    
    * OPENNLP-1351: Initial commit of working on ONNX support.
    
    * OPENNLP-1351: Moving the tokenize function.
    
    * OPENNLP-1351: Only making the tokenizer once.
    
    * OPENNLP-1351: Changing the tokenizer.
    
    * Adding wordpiece tokenizer.
    
    * Adding a readme.
    
    * Updating readme.
    
    * Updating readme.
    
    * Updating readme.
    
    * Adding test notes.
    
    * Adding license to NOTICE.
    
    * OPENNLP-1351: Returning spans for NER.
    
    * Working on tests.
    
    * Clean up.
    
    * Clean up.
    
    * Adding javadocs.
    
    * Adding javadocs.
    
    * Fixing casing in tests.
    
    * Updating readme and test.
    
    * Impleenting document categorizer.
    
    * Adding doccat tests.
    
    * Adding scoreMap implementations.
    
    * Adding doccat tests.
    
    * Adding doccat models for testing.
    
    * Refactoring variable names.
    
    * Updating license.
    
    * Adding onnxruntime to the license.
    
    * Adding onnxruntime to the NOTICE file.
    
    * Making onnx tests be eval tests.
    
    * OPENNLP-1351: Addressing PR comments.
    
    * Addressing checkstyle.
    
    * Addressing checkstyle.
    
    * OPENNLP-1351: Moving test models to OpenNLP eval data.
    
    * Fixing pom file project line.
    
    * Adding unit tests.
    
    * OPENNLP-1351: Adding wordpiece tokenizer test.
    
    * OPENNLP-1351: Fixing checkstyle.
---
 .gitignore                                         |   2 +
 LICENSE                                            |  48 +++++
 NOTICE                                             |  56 +++++-
 opennlp-dl/README.md                               |  47 +++++
 opennlp-dl/pom.xml                                 |  51 ++++++
 opennlp-dl/src/main/java/opennlp/dl/Inference.java | 191 +++++++++++++++++++
 opennlp-dl/src/main/java/opennlp/dl/Tokens.java    |  62 +++++++
 .../opennlp/dl/doccat/DocumentCategorizerDL.java   | 156 ++++++++++++++++
 .../dl/doccat/DocumentCategorizerInference.java    |  61 +++++++
 .../java/opennlp/dl/namefinder/NameFinderDL.java   | 162 +++++++++++++++++
 .../dl/namefinder/TokenNameFinderInference.java    |  68 +++++++
 .../src/test/java/opennlp/dl/AbstactDLTest.java    |  39 ++++
 .../dl/doccat/DocumentCategorizerDLEval.java       | 145 +++++++++++++++
 .../opennlp/dl/namefinder/NameFinderDLEval.java    | 202 +++++++++++++++++++++
 .../opennlp/tools/tokenize/WordpieceTokenizer.java | 152 ++++++++++++++++
 .../tools/tokenize/WordpieceTokenizerTest.java     |  72 ++++++++
 pom.xml                                            |   6 +-
 17 files changed, 1517 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 81ef51f..965000f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@ nbactions.xml
 nb-configuration.xml
 *.DS_Store
 .checkstyle
+*.onnx
+vocab.txt
diff --git a/LICENSE b/LICENSE
index c275974..e7ebf3c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -229,3 +229,51 @@ The following license applies to the Snowball stemmers:
 	CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 	OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+The following license applies to the Wordpiece tokenizer implementation:
+
+    The MIT License (MIT)
+
+    Copyright (c) 2019 Rob Rua
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE.
+
+The following license applies to the ONNX Runtime:
+
+    MIT License
+
+    Copyright (c) Microsoft Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE.
\ No newline at end of file
diff --git a/NOTICE b/NOTICE
index c0b8394..2fb767e 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,12 +1,66 @@
 Apache OpenNLP
-Copyright 2017 The Apache Software Foundation
+Copyright 2021 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
 
+============================================================================
 
 The snowball stemmers in
 opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball
 were developed by Martin Porter and Richard Boulton.
 The full snowball package is available from
 http://snowball.tartarus.org/
+
+============================================================================
+
+Wordpiece tokenizer
+https://github.com/robrua/easy-bert
+
+The MIT License (MIT)
+
+Copyright (c) 2019 Rob Rua
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+============================================================================
+
+ONNX Runtime
+
+MIT License
+
+Copyright (c) Microsoft Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/opennlp-dl/README.md b/opennlp-dl/README.md
new file mode 100644
index 0000000..1c64a76
--- /dev/null
+++ b/opennlp-dl/README.md
@@ -0,0 +1,47 @@
+# OpenNLP DL
+
+This module provides OpenNLP interface implementations for ONNX models using the `onnxruntime` dependency.
+
+**Important**: This does not provide the ability to train models. Model training is done outside of OpenNLP. This code provides the ability to use ONNX models from OpenNLP.
+
+To build with example models, download the models to the `/src/test/resources` directory. (These are the exported models described below.)
+
+```
+
+export OPENNLP_DATA=/tmp/
+mkdir /tmp/dl-doccat /tmp/dl-namefinder
+
+# Document categorizer model
+wget https://www.dropbox.com/s/n9uzs8r4xm9rhxb/model.onnx?dl=0 -O $OPENNLP_DATA/dl-doccat/model.onnx
+wget https://www.dropbox.com/s/aw6yjc68jw0jts6/vocab.txt?dl=0 -O $OPENNLP_DATA/dl-doccat/vocab.txt
+
+# Namefinder model
+wget https://www.dropbox.com/s/zgogq65gs9tyfm1/model.onnx?dl=0 -O $OPENNLP_DATA/dl-namefinder/model.onnx
+wget https://www.dropbox.com/s/3byt1jggly1dg98/vocab.txt?dl=0 -O $OPENNLP_DATA/dl-/namefinder/vocab.txt
+```
+
+## TokenNameFinder
+
+* Export a Huggingface NER model to ONNX, e.g.:
+
+```
+python -m transformers.onnx --model=dslim/bert-base-NER --feature token-classification exported
+```
+
+* Copy the exported model to `src/test/resources/namefinder/model.onnx`.
+* Copy the model's [vocab.txt](https://huggingface.co/dslim/bert-base-NER/tree/main) to `src/test/resources/namefinder/vocab.txt`.
+
+Now you can run the tests in `NameFinderDLTest`.
+
+## DocumentCategorizer
+
+* Export a Huggingface classification (e.g. sentiment) model to ONNX, e.g.:
+
+```
+python -m transformers.onnx --model=nlptown/bert-base-multilingual-uncased-sentiment --feature sequence-classification exported
+```
+
+* Copy the exported model to `src/test/resources/doccat/model.onnx`.
+* Copy the model's [vocab.txt](https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment/tree/main) to `src/test/resources/namefinder/vocab.txt`.
+
+Now you can run the tests in `DocumentCategorizerDLTest`.
\ No newline at end of file
diff --git a/opennlp-dl/pom.xml b/opennlp-dl/pom.xml
new file mode 100644
index 0000000..24615b0
--- /dev/null
+++ b/opennlp-dl/pom.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.opennlp</groupId>
+    <artifactId>opennlp</artifactId>
+    <version>1.9.5-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+  <groupId>org.apache.opennlp</groupId>
+  <artifactId>opennlp-dl</artifactId>
+  <name>opennlp-dl</name>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.opennlp</groupId>
+      <artifactId>opennlp-tools</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.microsoft.onnxruntime</groupId>
+      <artifactId>onnxruntime</artifactId>
+      <version>${onnxruntime.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/opennlp-dl/src/main/java/opennlp/dl/Inference.java b/opennlp-dl/src/main/java/opennlp/dl/Inference.java
new file mode 100644
index 0000000..f936dd3
--- /dev/null
+++ b/opennlp-dl/src/main/java/opennlp/dl/Inference.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.IntStream;
+
+import ai.onnxruntime.OrtEnvironment;
+import ai.onnxruntime.OrtException;
+import ai.onnxruntime.OrtSession;
+
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.WordpieceTokenizer;
+
+/**
+ * An abstract class used by OpenNLP implementations using ONNX models.
+ */
+public abstract class Inference {
+
+  public static final String INPUT_IDS = "input_ids";
+  public static final String ATTENTION_MASK = "attention_mask";
+  public static final String TOKEN_TYPE_IDS = "token_type_ids";
+
+  protected final OrtEnvironment env;
+  protected final OrtSession session;
+
+  private final Tokenizer tokenizer;
+  private final Map<String, Integer> vocabulary;
+
+  public abstract double[][] infer(String input) throws Exception;
+
+  /**
+   * Instantiates a new inference class.
+   * @param model The ONNX model file.
+   * @param vocab The model's vocabulary file.
+   * @throws OrtException Thrown if the ONNX model cannot be loaded.
+   * @throws IOException Thrown if the ONNX model or vocabulary files cannot be opened or read.
+   */
+  public Inference(File model, File vocab) throws OrtException, IOException {
+
+    this.env = OrtEnvironment.getEnvironment();
+    this.session = env.createSession(model.getPath(), new OrtSession.SessionOptions());
+    this.vocabulary = loadVocab(vocab);
+    this.tokenizer = new WordpieceTokenizer(vocabulary.keySet());
+
+  }
+
+  /**
+   * Tokenize the input text using the {@link WordpieceTokenizer}.
+   * @param text The input text.
+   * @return The input text's {@link Tokens}.
+   */
+  public Tokens tokenize(String text) {
+
+    final String[] tokens = tokenizer.tokenize(text);
+
+    final int[] ids = new int[tokens.length];
+
+    for (int x = 0; x < tokens.length; x++) {
+      ids[x] = vocabulary.get(tokens[x]);
+    }
+
+    final long[] lids = Arrays.stream(ids).mapToLong(i -> i).toArray();
+
+    final long[] mask = new long[ids.length];
+    Arrays.fill(mask, 1);
+
+    final long[] types = new long[ids.length];
+    Arrays.fill(types, 0);
+
+    return new Tokens(tokens, lids, mask, types);
+
+  }
+
+  /**
+   * Loads a vocabulary file from disk.
+   * @param vocab The vocabulary file.
+   * @return A map of vocabulary words to integer IDs.
+   * @throws IOException Thrown if the vocabulary file cannot be opened and read.
+   */
+  public Map<String, Integer> loadVocab(File vocab) throws IOException {
+
+    final Map<String, Integer> v = new HashMap<>();
+
+    BufferedReader br = new BufferedReader(new FileReader(vocab.getPath()));
+    String line = br.readLine();
+    int x = 0;
+
+    while (line != null) {
+
+      line = br.readLine();
+      x++;
+
+      v.put(line, x);
+
+    }
+
+    return v;
+
+  }
+
+  public static int maxIndex(double[] arr) {
+    return IntStream.range(0, arr.length)
+        .reduce((i, j) -> arr[i] > arr[j] ? i : j)
+        .orElse(-1);
+  }
+
+  /**
+   * Applies softmax to an array of values.
+   * @param input An array of values.
+   * @return The output array.
+   */
+  public double[] softmax(final double[] input) {
+
+    final double[] t = new double[input.length];
+    double sum = 0.0;
+
+    for (int x = 0; x < input.length; x++) {
+      double val = Math.exp(input[x]);
+      sum += val;
+      t[x] = val;
+    }
+
+    final double[] output = new double[input.length];
+
+    for (int x = 0; x < output.length; x++) {
+      output[x] = (float) (t[x] / sum);
+    }
+
+    return output;
+
+  }
+
+  /**
+   * Converts a two-dimensional float array to doubles.
+   * @param input The input array.
+   * @return The converted array.
+   */
+  public double[][] convertFloatsToDoubles(float[][] input) {
+
+    final double[][] outputs = new double[input.length][input[0].length];
+
+    for (int i = 0; i < input.length; i++) {
+      for (int j = 0; j < input[0].length; j++) {
+        outputs[i][j] = (double) input[i][j];
+      }
+    }
+
+    return outputs;
+
+  }
+
+  /**
+   * Converts a three-dimensional float array to doubles.
+   * @param input The input array.
+   * @return The converted array.
+   */
+  public double[] convertFloatsToDoubles(float[] input) {
+
+    final double[] output = new double[input.length];
+
+    for (int i = 0; i < input.length; i++) {
+      output[i] = input[i];
+    }
+
+    return output;
+
+  }
+
+}
diff --git a/opennlp-dl/src/main/java/opennlp/dl/Tokens.java b/opennlp-dl/src/main/java/opennlp/dl/Tokens.java
new file mode 100644
index 0000000..a5d57d9
--- /dev/null
+++ b/opennlp-dl/src/main/java/opennlp/dl/Tokens.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl;
+
+/**
+ * Holds the tokens for input to an ONNX model.
+ */
+public class Tokens {
+
+  private String[] tokens;
+  private long[] ids;
+  private long[] mask;
+  private long[] types;
+
+  /**
+   * Creates a new instance to hold the tokens for input to an ONNX model.
+   * @param tokens The tokens themselves.
+   * @param ids The token IDs as retrieved from the vocabulary.
+   * @param mask The token mask. (Typically all 1.)
+   * @param types The token types. (Typically all 1.)
+   */
+  public Tokens(String[] tokens, long[] ids, long[] mask, long[] types) {
+
+    this.tokens = tokens;
+    this.ids = ids;
+    this.mask = mask;
+    this.types = types;
+
+  }
+
+  public String[] getTokens() {
+    return tokens;
+  }
+
+  public long[] getIds() {
+    return ids;
+  }
+
+  public long[] getMask() {
+    return mask;
+  }
+
+  public long[] getTypes() {
+    return types;
+  }
+
+}
diff --git a/opennlp-dl/src/main/java/opennlp/dl/doccat/DocumentCategorizerDL.java b/opennlp-dl/src/main/java/opennlp/dl/doccat/DocumentCategorizerDL.java
new file mode 100644
index 0000000..3d777fc
--- /dev/null
+++ b/opennlp-dl/src/main/java/opennlp/dl/doccat/DocumentCategorizerDL.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl.doccat;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import opennlp.dl.Inference;
+import opennlp.tools.doccat.DocumentCategorizer;
+
+/**
+ * An implementation of {@link DocumentCategorizer} that performs document classification
+ * using ONNX models.
+ */
+public class DocumentCategorizerDL implements DocumentCategorizer {
+
+  private final File model;
+  private final File vocab;
+  private final Map<Integer, String> categories;
+
+  /**
+   * Creates a new document categorizer using ONNX models.
+   * @param model The ONNX model file.
+   * @param vocab The model's vocabulary file.
+   * @param categories The categories.
+   */
+  public DocumentCategorizerDL(File model, File vocab, Map<Integer, String> categories) {
+
+    this.model = model;
+    this.vocab = vocab;
+    this.categories = categories;
+
+  }
+
+  @Override
+  public double[] categorize(String[] strings) {
+
+    try {
+
+      final DocumentCategorizerInference inference = new DocumentCategorizerInference(model, vocab);
+
+      final double[][] vectors = inference.infer(strings[0]);
+      final double[] results = inference.softmax(vectors[0]);
+
+      return results;
+
+    } catch (Exception ex) {
+      System.err.println("Unload to perform document classification inference: " + ex.getMessage());
+    }
+
+    return new double[]{};
+
+  }
+
+  @Override
+  public double[] categorize(String[] strings, Map<String, Object> map) {
+    return categorize(strings);
+  }
+
+  @Override
+  public String getBestCategory(double[] doubles) {
+    return categories.get(Inference.maxIndex(doubles));
+  }
+
+  @Override
+  public int getIndex(String s) {
+    return getKey(s);
+  }
+
+  @Override
+  public String getCategory(int i) {
+    return categories.get(i);
+  }
+
+  @Override
+  public int getNumberOfCategories() {
+    return categories.size();
+  }
+
+  @Override
+  public String getAllResults(double[] doubles) {
+    return null;
+  }
+
+  @Override
+  public Map<String, Double> scoreMap(String[] strings) {
+
+    final double[] scores = categorize(strings);
+
+    final Map<String, Double> scoreMap = new HashMap<>();
+
+    for (int x : categories.keySet()) {
+      scoreMap.put(categories.get(x), scores[x]);
+    }
+
+    return scoreMap;
+
+  }
+
+  @Override
+  public SortedMap<Double, Set<String>> sortedScoreMap(String[] strings) {
+
+    final double[] scores = categorize(strings);
+
+    final SortedMap<Double, Set<String>> scoreMap = new TreeMap<>();
+
+    for (int x : categories.keySet()) {
+
+      if (scoreMap.get(scores[x]) == null) {
+        scoreMap.put(scores[x], new HashSet<>());
+      }
+
+      scoreMap.get(scores[x]).add(categories.get(x));
+
+    }
+
+    return scoreMap;
+
+  }
+
+  private int getKey(String value) {
+
+    for (Map.Entry<Integer, String> entry : categories.entrySet()) {
+
+      if (entry.getValue().equals(value)) {
+        return entry.getKey();
+      }
+
+    }
+
+    // The String wasn't found as a value in the map.
+    return -1;
+
+  }
+
+}
diff --git a/opennlp-dl/src/main/java/opennlp/dl/doccat/DocumentCategorizerInference.java b/opennlp-dl/src/main/java/opennlp/dl/doccat/DocumentCategorizerInference.java
new file mode 100644
index 0000000..3160cb4
--- /dev/null
+++ b/opennlp-dl/src/main/java/opennlp/dl/doccat/DocumentCategorizerInference.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl.doccat;
+
+import java.io.File;
+import java.nio.LongBuffer;
+import java.util.HashMap;
+import java.util.Map;
+
+import ai.onnxruntime.OnnxTensor;
+
+import opennlp.dl.Inference;
+import opennlp.dl.Tokens;
+
+public class DocumentCategorizerInference extends Inference {
+
+  private final Map<String, Integer> vocabulary;
+
+  public DocumentCategorizerInference(File model, File vocab) throws Exception {
+
+    super(model, vocab);
+
+    this.vocabulary = loadVocab(vocab);
+
+  }
+
+  @Override
+  public double[][] infer(String text) throws Exception {
+
+    final Tokens tokens = tokenize(text);
+
+    final Map<String, OnnxTensor> inputs = new HashMap<>();
+    inputs.put(INPUT_IDS, OnnxTensor.createTensor(env,
+            LongBuffer.wrap(tokens.getIds()), new long[]{1, tokens.getIds().length}));
+
+    inputs.put(ATTENTION_MASK, OnnxTensor.createTensor(env,
+            LongBuffer.wrap(tokens.getMask()), new long[]{1, tokens.getMask().length}));
+
+    inputs.put(TOKEN_TYPE_IDS, OnnxTensor.createTensor(env,
+            LongBuffer.wrap(tokens.getTypes()), new long[]{1, tokens.getTypes().length}));
+
+    return convertFloatsToDoubles((float[][]) session.run(inputs).get(0).getValue());
+
+  }
+
+}
diff --git a/opennlp-dl/src/main/java/opennlp/dl/namefinder/NameFinderDL.java b/opennlp-dl/src/main/java/opennlp/dl/namefinder/NameFinderDL.java
new file mode 100644
index 0000000..663671a
--- /dev/null
+++ b/opennlp-dl/src/main/java/opennlp/dl/namefinder/NameFinderDL.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl.namefinder;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.dl.Inference;
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.util.Span;
+
+/**
+ * An implementation of {@link TokenNameFinder} that uses ONNX models.
+ */
+public class NameFinderDL implements TokenNameFinder {
+
+  public static final String I_PER = "I-PER";
+  public static final String B_PER = "B-PER";
+
+  private final TokenNameFinderInference inference;
+  private final Map<Integer, String> ids2Labels;
+
+  /**
+   * Creates a new NameFinderDL for entity recognition using ONNX models.
+   *
+   * @param model     The ONNX model file.
+   * @param vocab     The model's vocabulary file.
+   * @param doLowerCase Whether or not to lowercase the text prior to inference.
+   * @param ids2Labels  A map of values and their assigned labels used to train the model.
+   * @throws Exception Thrown if the models cannot be loaded.
+   */
+  public NameFinderDL(File model, File vocab, boolean doLowerCase, Map<Integer, String> ids2Labels)
+          throws Exception {
+
+    this.ids2Labels = ids2Labels;
+    this.inference = new TokenNameFinderInference(model, vocab, doLowerCase);
+
+  }
+
+  @Override
+  public Span[] find(String[] tokens) {
+
+    final List<Span> spans = new LinkedList<>();
+    final String text = String.join(" ", tokens);
+
+    try {
+
+      final double[][] v = inference.infer(text);
+
+      // Find consecutive B-PER and I-PER labels and combine the spans where necessary.
+      // There are also B-LOC and I-LOC tags for locations that might be useful at some point.
+
+      // Keep track of where the last span was so when there are multiple/duplicate
+      // spans we can get the next one instead of the first one each time.
+      int characterStart = 0;
+
+      // We are looping over the vector for each word,
+      // finding the index of the array that has the maximum value,
+      // and then finding the token classification that corresponds to that index.
+      for (int x = 0; x < v.length; x++) {
+
+        final double[] arr = v[x];
+        final int maxIndex = Inference.maxIndex(arr);
+        final String label = ids2Labels.get(maxIndex);
+
+        final double probability = arr[maxIndex];
+
+        if (B_PER.equalsIgnoreCase(label)) {
+
+          // This is the start of a person entity.
+          final String spanText;
+
+          // Find the end index of the span in the array (where the label is not I-PER).
+          final int endIndex = findSpanEnd(v, x, ids2Labels);
+
+          // If the end is -1 it means this is a single-span token.
+          // If the end is != -1 it means this is a multi-span token.
+          if (endIndex != -1) {
+
+            // Subtract one for the beginning token not part of the text.
+            spanText = String.join(" ", Arrays.copyOfRange(tokens, x - 1, endIndex));
+
+            spans.add(new Span(x - 1, endIndex, spanText, probability));
+
+            x = endIndex;
+
+          } else {
+
+            // This is a single-token span so there is nothing else to do except grab the token.
+            spanText = tokens[x];
+
+            // Subtract one for the beginning token not part of the text.
+            spans.add(new Span(x - 1, endIndex, spanText, probability));
+
+          }
+
+        }
+
+      }
+
+    } catch (Exception ex) {
+      System.err.println("Error performing namefinder inference: " + ex.getMessage());
+    }
+
+    return spans.toArray(new Span[0]);
+
+  }
+
+  @Override
+  public void clearAdaptiveData() {
+    // No use for this in this implementation.
+  }
+
+  private int findSpanEnd(double[][] v, int startIndex, Map<Integer, String> id2Labels) {
+
+    // This will be the index of the last token in the span.
+    // -1 means there is no follow-up token, so it is a single-token span.
+    int index = -1;
+
+    // Starts at the span start in the vector.
+    // Looks at the next token to see if it is an I-PER.
+    // Go until the next token is something other than I-PER.
+    // When the next token is not I-PER, return the previous index.
+
+    for (int x = startIndex + 1; x < v[0].length; x++) {
+
+      // Get the next item.
+      final double[] arr = v[x];
+
+      // See if the next token has an I-PER label.
+      final String nextTokenClassification = id2Labels.get(Inference.maxIndex(arr));
+
+      if (!I_PER.equalsIgnoreCase(nextTokenClassification)) {
+        index = x - 1;
+        break;
+      }
+
+    }
+
+    return index;
+
+  }
+
+}
diff --git a/opennlp-dl/src/main/java/opennlp/dl/namefinder/TokenNameFinderInference.java b/opennlp-dl/src/main/java/opennlp/dl/namefinder/TokenNameFinderInference.java
new file mode 100644
index 0000000..8e7273f
--- /dev/null
+++ b/opennlp-dl/src/main/java/opennlp/dl/namefinder/TokenNameFinderInference.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl.namefinder;
+
+import java.io.File;
+import java.nio.LongBuffer;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import ai.onnxruntime.OnnxTensor;
+
+import opennlp.dl.Inference;
+import opennlp.dl.Tokens;
+
+public class TokenNameFinderInference extends Inference {
+
+  private final boolean doLowerCase;
+
+  public TokenNameFinderInference(File model, File vocab, boolean doLowerCase) throws Exception {
+
+    super(model, vocab);
+
+    this.doLowerCase = doLowerCase;
+
+  }
+
+  @Override
+  public double[][] infer(String text) throws Exception {
+
+    if (doLowerCase) {
+      text = text.toLowerCase(Locale.ROOT);
+    }
+
+    final Tokens tokens = tokenize(text);
+
+    final Map<String, OnnxTensor> inputs = new HashMap<>();
+    inputs.put(INPUT_IDS, OnnxTensor.createTensor(env,
+            LongBuffer.wrap(tokens.getIds()), new long[]{1, tokens.getIds().length}));
+
+    inputs.put(ATTENTION_MASK, OnnxTensor.createTensor(env,
+            LongBuffer.wrap(tokens.getMask()), new long[]{1, tokens.getMask().length}));
+
+    inputs.put(TOKEN_TYPE_IDS, OnnxTensor.createTensor(env,
+            LongBuffer.wrap(tokens.getTypes()), new long[]{1, tokens.getTypes().length}));
+
+    final float[][][] v = (float[][][]) session.run(inputs).get(0).getValue();
+
+    return convertFloatsToDoubles(v[0]);
+
+  }
+
+}
diff --git a/opennlp-dl/src/test/java/opennlp/dl/AbstactDLTest.java b/opennlp-dl/src/test/java/opennlp/dl/AbstactDLTest.java
new file mode 100644
index 0000000..09e0c59
--- /dev/null
+++ b/opennlp-dl/src/test/java/opennlp/dl/AbstactDLTest.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+
+import opennlp.tools.util.StringUtil;
+
+public abstract class AbstactDLTest {
+
+  public static File getOpennlpDataDir() throws FileNotFoundException {
+    final String dataDirectory = System.getProperty("OPENNLP_DATA_DIR");
+    if (StringUtil.isEmpty(dataDirectory)) {
+      throw new IllegalArgumentException("The OPENNLP_DATA_DIR is not set.");
+    }
+    final File file = new File(System.getProperty("OPENNLP_DATA_DIR"));
+    if (!file.exists()) {
+      throw new FileNotFoundException("The OPENNLP_DATA_DIR path of " + dataDirectory + " was not found.");
+    }
+    return file;
+  }
+
+}
diff --git a/opennlp-dl/src/test/java/opennlp/dl/doccat/DocumentCategorizerDLEval.java b/opennlp-dl/src/test/java/opennlp/dl/doccat/DocumentCategorizerDLEval.java
new file mode 100644
index 0000000..d82373a
--- /dev/null
+++ b/opennlp-dl/src/test/java/opennlp/dl/doccat/DocumentCategorizerDLEval.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl.doccat;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.dl.AbstactDLTest;
+
+public class DocumentCategorizerDLEval extends AbstactDLTest {
+
+  @Test
+  public void categorize() throws FileNotFoundException {
+
+    // This test was written using the nlptown/bert-base-multilingual-uncased-sentiment model.
+    // You will need to update the assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "dl-doccat/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "dl-doccat/vocab.txt");
+
+    final DocumentCategorizerDL documentCategorizerDL =
+            new DocumentCategorizerDL(model, vocab, getCategories());
+
+    final double[] result = documentCategorizerDL.categorize(new String[]{"I am happy"});
+    System.out.println(Arrays.toString(result));
+
+    final double[] expected = new double[]
+        {0.007819971069693565,
+        0.006593209225684404,
+        0.04995147883892059,
+        0.3003573715686798,
+        0.6352779865264893};
+
+    Assert.assertTrue(Arrays.equals(expected, result));
+    Assert.assertEquals(5, result.length);
+
+    final String category = documentCategorizerDL.getBestCategory(result);
+    Assert.assertEquals("very good", category);
+
+  }
+
+  @Test
+  public void scoreMap() throws FileNotFoundException {
+
+    // This test was written using the nlptown/bert-base-multilingual-uncased-sentiment model.
+    // You will need to update the assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "dl-doccat/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "dl-doccat/vocab.txt");
+
+    final DocumentCategorizerDL documentCategorizerDL =
+            new DocumentCategorizerDL(model, vocab, getCategories());
+
+    final Map<String, Double> result = documentCategorizerDL.scoreMap(new String[]{"I am happy"});
+
+    Assert.assertEquals(0.6352779865264893, result.get("very good").doubleValue(), 0);
+    Assert.assertEquals(0.3003573715686798, result.get("good").doubleValue(), 0);
+    Assert.assertEquals(0.04995147883892059, result.get("neutral").doubleValue(), 0);
+    Assert.assertEquals(0.006593209225684404, result.get("bad").doubleValue(), 0);
+    Assert.assertEquals(0.007819971069693565, result.get("very bad").doubleValue(), 0);
+
+  }
+
+  @Test
+  public void sortedScoreMap() throws FileNotFoundException {
+
+    // This test was written using the nlptown/bert-base-multilingual-uncased-sentiment model.
+    // You will need to update the assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "dl-doccat/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "dl-doccat/vocab.txt");
+
+    final DocumentCategorizerDL documentCategorizerDL =
+            new DocumentCategorizerDL(model, vocab, getCategories());
+
+    final Map<Double, Set<String>> result = documentCategorizerDL.sortedScoreMap(new String[]{"I am happy"});
+
+    Assert.assertEquals(result.get(0.6352779865264893).size(), 1);
+    Assert.assertEquals(result.get(0.3003573715686798).size(), 1);
+    Assert.assertEquals(result.get(0.04995147883892059).size(), 1);
+    Assert.assertEquals(result.get(0.006593209225684404).size(), 1);
+    Assert.assertEquals(result.get(0.007819971069693565).size(), 1);
+
+  }
+
+  @Test
+  public void doccat() throws FileNotFoundException {
+
+    // This test was written using the nlptown/bert-base-multilingual-uncased-sentiment model.
+    // You will need to update the assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "dl-doccat/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "dl-doccat/vocab.txt");
+
+    final DocumentCategorizerDL documentCategorizerDL =
+            new DocumentCategorizerDL(model, vocab, getCategories());
+
+    final int index = documentCategorizerDL.getIndex("bad");
+    Assert.assertEquals(1, index);
+
+    final String category = documentCategorizerDL.getCategory(3);
+    Assert.assertEquals("good", category);
+
+    final int number = documentCategorizerDL.getNumberOfCategories();
+    Assert.assertEquals(5, number);
+
+  }
+
+  private Map<Integer, String> getCategories() {
+
+    final Map<Integer, String> categories = new HashMap<>();
+
+    categories.put(0, "very bad");
+    categories.put(1, "bad");
+    categories.put(2, "neutral");
+    categories.put(3, "good");
+    categories.put(4, "very good");
+
+    return categories;
+
+  }
+
+}
diff --git a/opennlp-dl/src/test/java/opennlp/dl/namefinder/NameFinderDLEval.java b/opennlp-dl/src/test/java/opennlp/dl/namefinder/NameFinderDLEval.java
new file mode 100644
index 0000000..f5eef93
--- /dev/null
+++ b/opennlp-dl/src/test/java/opennlp/dl/namefinder/NameFinderDLEval.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.dl.namefinder;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+import ai.onnxruntime.OrtException;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.dl.AbstactDLTest;
+
+import opennlp.tools.util.Span;
+
+public class NameFinderDLEval extends AbstactDLTest {
+
+  @Test
+  public void tokenNameFinder1Test() throws Exception {
+
+    // This test was written using the dslim/bert-base-NER model.
+    // You will need to update the ids2Labels and assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "namefinder/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "namefinder/vocab.txt");
+
+    final String[] tokens = new String[]
+        {"George", "Washington", "was", "president", "of", "the", "United", "States", "."};
+
+    final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
+    final Span[] spans = nameFinderDL.find(tokens);
+
+    for (Span span : spans) {
+      System.out.println(span.toString());
+    }
+
+    Assert.assertEquals(1, spans.length);
+    Assert.assertEquals(0, spans[0].getStart());
+    Assert.assertEquals(2, spans[0].getEnd());
+    Assert.assertEquals(0.8251646041870118, spans[0].getProb(), 0.0);
+
+  }
+
+  @Test
+  public void tokenNameFinder2Test() throws Exception {
+
+    // This test was written using the dslim/bert-base-NER model.
+    // You will need to update the ids2Labels and assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "namefinder/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "namefinder/vocab.txt");
+
+    final String[] tokens = new String[]{"His", "name", "was", "George", "Washington"};
+
+    final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
+    final Span[] spans = nameFinderDL.find(tokens);
+
+    for (Span span : spans) {
+      System.out.println(span.toString());
+    }
+
+    Assert.assertEquals(1, spans.length);
+    Assert.assertEquals(3, spans[0].getStart());
+    Assert.assertEquals(5, spans[0].getEnd());
+
+  }
+
+  @Test
+  public void tokenNameFinder3Test() throws Exception {
+
+    // This test was written using the dslim/bert-base-NER model.
+    // You will need to update the ids2Labels and assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "namefinder/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "namefinder/vocab.txt");
+
+    final String[] tokens = new String[]{"His", "name", "was", "George"};
+
+    final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
+    final Span[] spans = nameFinderDL.find(tokens);
+
+    for (Span span : spans) {
+      System.out.println(span.toString());
+    }
+
+    Assert.assertEquals(1, spans.length);
+    Assert.assertEquals(3, spans[0].getStart());
+    Assert.assertEquals(4, spans[0].getEnd());
+
+  }
+
+  @Test
+  public void tokenNameFinderNoInputTest() throws Exception {
+
+    // This test was written using the dslim/bert-base-NER model.
+    // You will need to update the ids2Labels and assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "namefinder/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "namefinder/vocab.txt");
+
+    final String[] tokens = new String[]{};
+
+    final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
+    final Span[] spans = nameFinderDL.find(tokens);
+
+    Assert.assertEquals(0, spans.length);
+
+  }
+
+  @Test
+  public void tokenNameFinderNoEntitiesTest() throws Exception {
+
+    // This test was written using the dslim/bert-base-NER model.
+    // You will need to update the ids2Labels and assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "namefinder/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "namefinder/vocab.txt");
+
+    final String[] tokens = new String[]{"I", "went", "to", "the", "park"};
+
+    final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
+    final Span[] spans = nameFinderDL.find(tokens);
+
+    Assert.assertEquals(0, spans.length);
+
+  }
+
+  @Test
+  public void tokenNameFinderMultipleEntitiesTest() throws Exception {
+
+    // This test was written using the dslim/bert-base-NER model.
+    // You will need to update the ids2Labels and assertions if you use a different model.
+
+    final File model = new File(getOpennlpDataDir(), "namefinder/model.onnx");
+    final File vocab = new File(getOpennlpDataDir(), "namefinder/vocab.txt");
+
+    final String[] tokens = new String[]{"George", "Washington", "and", "Abraham", "Lincoln",
+        "were", "presidents"};
+
+    final NameFinderDL nameFinderDL = new NameFinderDL(model, vocab, false, getIds2Labels());
+    final Span[] spans = nameFinderDL.find(tokens);
+
+    for (Span span : spans) {
+      System.out.println(span.toString());
+    }
+
+    Assert.assertEquals(2, spans.length);
+    Assert.assertEquals(0, spans[0].getStart());
+    Assert.assertEquals(2, spans[0].getEnd());
+    Assert.assertEquals(3, spans[1].getStart());
+    Assert.assertEquals(5, spans[1].getEnd());
+
+  }
+
+  @Test(expected = OrtException.class)
+  public void invalidModel() throws Exception {
+
+    // This test was written using the dslim/bert-base-NER model.
+    // You will need to update the ids2Labels and assertions if you use a different model.
+
+    final File model = new File("invalid.onnx");
+    final File vocab = new File("vocab.txt");
+
+    new NameFinderDL(model, vocab, true, getIds2Labels());
+
+  }
+
+  private Map<Integer, String> getIds2Labels() {
+
+    final Map<Integer, String> ids2Labels = new HashMap<>();
+    ids2Labels.put(0, "O");
+    ids2Labels.put(1, "B-MISC");
+    ids2Labels.put(2, "I-MISC");
+    ids2Labels.put(3, "B-PER");
+    ids2Labels.put(4, "I-PER");
+    ids2Labels.put(5, "B-ORG");
+    ids2Labels.put(6, "I-ORG");
+    ids2Labels.put(7, "B-LOC");
+    ids2Labels.put(8, "I-LOC");
+
+    return ids2Labels;
+
+  }
+
+}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WordpieceTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WordpieceTokenizer.java
new file mode 100644
index 0000000..bbd3145
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WordpieceTokenizer.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.tokenize;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import opennlp.tools.util.Span;
+
+/**
+ * A WordPiece tokenizer.
+ *
+ * Adapted from https://github.com/robrua/easy-bert under the MIT license.
+ *
+ * For reference see:
+ *  - https://www.tensorflow.org/text/guide/subwords_tokenizer#applying_wordpiece
+ *  - https://cran.r-project.org/web/packages/wordpiece/vignettes/basic_usage.html
+ */
+public class WordpieceTokenizer implements Tokenizer {
+
+  private static final String CLASSIFICATION_TOKEN = "[CLS]";
+  private static final String SEPARATOR_TOKEN = "[SEP]";
+  private static final String UNKNOWN_TOKEN = "[UNK]";
+
+  private Set<String> vocabulary;
+  private int maxTokenLength = 50;
+
+  public WordpieceTokenizer(Set<String> vocabulary) {
+    this.vocabulary = vocabulary;
+  }
+
+  public WordpieceTokenizer(Set<String> vocabulary, int maxTokenLength) {
+    this.vocabulary = vocabulary;
+    this.maxTokenLength = maxTokenLength;
+  }
+
+  @Override
+  public Span[] tokenizePos(final String text) {
+    // TODO: Implement this.
+    return null;
+  }
+
+  @Override
+  public String[] tokenize(final String text) {
+
+    final List<String> tokens = new LinkedList<>();
+    tokens.add(CLASSIFICATION_TOKEN);
+
+    // Put spaces around punctuation.
+    final String spacedPunctuation = text.replaceAll("\\p{Punct}+", " $0 ");
+
+    // Split based on whitespace.
+    final String[] split = WhitespaceTokenizer.INSTANCE.tokenize(spacedPunctuation);
+
+    // For each resulting word, if the word is found in the WordPiece vocabulary, keep it as-is.
+    // If not, starting from the beginning, pull off the biggest piece that is in the vocabulary,
+    // and prefix "##" to the remaining piece. Repeat until the entire word is represented by
+    // pieces from the vocabulary, if possible.
+    for (final String token : split) {
+
+      final char[] characters = token.toCharArray();
+
+      if (characters.length <= maxTokenLength) {
+
+        // To start, the substring is the whole token.
+        int start = 0;
+        int end;
+
+        // Look at the token from the start.
+        while (start < characters.length) {
+
+          end = characters.length;
+          boolean found = false;
+
+          // Look at the token from the end until the end is equal to the start.
+          while (start < end) {
+
+            // The substring is the part of the token we are looking at now.
+            String substring = String.valueOf(characters, start, end - start);
+
+            // This is a substring so prefix it with ##.
+            if (start > 0) {
+              substring = "##" + substring;
+            }
+
+            // See if the substring is in the vocabulary.
+            if (vocabulary.contains(substring)) {
+
+              // It is in the vocabulary so add it to the list of tokens.
+              tokens.add(substring);
+
+              // Next time we can pick up where we left off.
+              start = end;
+              found = true;
+
+              break;
+
+            }
+
+            // Subtract 1 from the end to find the next longest piece in the vocabulary.
+            end--;
+
+          }
+
+          // If the word can't be represented by vocabulary pieces replace
+          // it with a specified "unknown" token.
+          if (!found) {
+            tokens.add(UNKNOWN_TOKEN);
+            break;
+          }
+
+          // Start the next characters where we just left off.
+          start = end;
+
+        }
+
+      } else {
+
+        // If the token's length is greater than the max length just add [UNK] instead.
+        tokens.add(UNKNOWN_TOKEN);
+
+      }
+
+    }
+
+    tokens.add(SEPARATOR_TOKEN);
+
+    return tokens.toArray(new String[0]);
+
+  }
+
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/WordpieceTokenizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/WordpieceTokenizerTest.java
new file mode 100644
index 0000000..6708616
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/WordpieceTokenizerTest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.tokenize;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class WordpieceTokenizerTest {
+
+  @Test
+  public void testSentence() {
+
+    final Tokenizer tokenizer = new WordpieceTokenizer(getVocabulary());
+    final String[] tokens = tokenizer.tokenize("the quick brown fox jumps over the very lazy dog");
+
+    final String[] expected = {"[CLS]", "the", "quick", "brown", "fox", "jumps", "over", "the",
+      "[UNK]", "lazy", "dog", "[SEP]"};
+
+    Assert.assertArrayEquals(expected, tokens);
+
+  }
+
+  @Test
+  public void testSentenceWithPunctuation() {
+
+    final Tokenizer tokenizer = new WordpieceTokenizer(getVocabulary());
+    final String[] tokens = tokenizer.tokenize("The quick brown fox jumps over the very lazy dog.");
+
+    final String[] expected = {"[CLS]", "[UNK]", "quick", "brown", "fox", "jumps", "over", "the",
+      "[UNK]", "lazy", "dog", "[UNK]", "[SEP]"};
+
+    Assert.assertArrayEquals(expected, tokens);
+
+  }
+
+  private Set<String> getVocabulary() {
+
+    final Set<String> vocabulary = new HashSet<>();
+
+    vocabulary.add("the");
+    vocabulary.add("quick");
+    vocabulary.add("brown");
+    vocabulary.add("fox");
+    vocabulary.add("jumps");
+    vocabulary.add("over");
+    vocabulary.add("the");
+    vocabulary.add("lazy");
+    vocabulary.add("dog");
+
+    return vocabulary;
+
+  }
+
+}
diff --git a/pom.xml b/pom.xml
index 254400d..815642a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
 		<groupId>org.apache</groupId>
 		<artifactId>apache</artifactId>
 		<version>18</version>
-		<relativePath />
+		<relativePath/>
 	</parent>
 
 	<groupId>org.apache.opennlp</groupId>
@@ -146,6 +146,7 @@
 		<morfologik.version>2.1.7</morfologik.version>
 		<osgi.version>4.2.0</osgi.version>
 		<checkstyle.plugin.version>2.17</checkstyle.plugin.version>
+		<onnxruntime.version>1.10.0</onnxruntime.version>
 		<opennlp.forkCount>1.0C</opennlp.forkCount>
 		<coveralls.maven.plugin>4.3.0</coveralls.maven.plugin>
 		<jacoco.maven.plugin>0.7.9</jacoco.maven.plugin>
@@ -505,6 +506,7 @@
 		<module>opennlp-morfologik-addon</module>
 		<module>opennlp-docs</module>
 		<module>opennlp-distr</module>
-	</modules>
+		<module>opennlp-dl</module>
+  </modules>
 
 </project>