You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2018/05/31 13:35:16 UTC

[opennlp-sandbox] branch master updated: Add constructor to load all resources from Input Streams

This is an automated email from the ASF dual-hosted git repository.

joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


The following commit(s) were added to refs/heads/master by this push:
     new 5e9fc9b  Add constructor to load all resources from Input Streams
5e9fc9b is described below

commit 5e9fc9b78910b8afee3643c638d911368e65947b
Author: Jörn Kottmann <jo...@apache.org>
AuthorDate: Thu May 31 15:34:54 2018 +0200

    Add constructor to load all resources from Input Streams
---
 .../apache/opennlp/namefinder/SequenceTagging.java | 39 ++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/namefinder/SequenceTagging.java b/tf-ner-poc/src/main/java/org/apache/opennlp/namefinder/SequenceTagging.java
index 3713555..69a1748 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/namefinder/SequenceTagging.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/namefinder/SequenceTagging.java
@@ -19,15 +19,22 @@ package org.apache.opennlp.namefinder;
 
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.List;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
 
 import org.tensorflow.SavedModelBundle;
 import org.tensorflow.Session;
 import org.tensorflow.Tensor;
 
 import opennlp.tools.namefind.BioCodec;
+import opennlp.tools.namefind.NameSample;
 import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.tokenize.SimpleTokenizer;
 import opennlp.tools.util.Span;
 
 public class SequenceTagging implements TokenNameFinder, AutoCloseable {
@@ -46,6 +53,38 @@ public class SequenceTagging implements TokenNameFinder, AutoCloseable {
     this.indexTagger = new IndexTagger((new FileInputStream(config.getVocabTags())));
   }
 
+  public SequenceTagging(InputStream vocabWords, InputStream vocabChars,
+                         InputStream vocabTags, InputStream modelZipPackage) throws IOException {
+
+    wordIndexer = new WordIndexer(vocabWords, vocabChars);
+    indexTagger = new IndexTagger(vocabTags);
+
+    Path tmpDir = Files.createTempDirectory("opennlp2_namefinder");
+
+    // Unzip the model to a temp directory
+    ZipInputStream zis = new ZipInputStream(modelZipPackage);
+    ZipEntry zipEntry = zis.getNextEntry();
+    while(zipEntry != null){
+      Path newFile = tmpDir.resolve(zipEntry.getName());
+
+      if (zipEntry.isDirectory()) {
+        Files.createDirectories(newFile);
+      }
+      else {
+        Files.copy(zis, newFile);
+        // This is a bit of hack, but should work fine for now ...
+        newFile.toFile().deleteOnExit();
+      }
+
+      zipEntry = zis.getNextEntry();
+    }
+    zis.closeEntry();
+    zis.close();
+
+    model = SavedModelBundle.load(tmpDir.toString(), "serve");
+    session = model.session();
+  }
+
   @Override
   public Span[] find(String[] sentence) {
     TokenIds tokenIds = wordIndexer.toTokenIds(sentence);

-- 
To stop receiving notification emails like this one, please contact
joern@apache.org.