You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2018/11/29 12:25:50 UTC

[opennlp-sandbox] branch master updated: Write model and dictionaries into zip package

This is an automated email from the ASF dual-hosted git repository.

joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


The following commit(s) were added to refs/heads/master by this push:
     new 00a8fdf  Write model and dictionaries into zip package
00a8fdf is described below

commit 00a8fdf5cc64d6341218b077427281665f4f6e16
Author: Jörn Kottmann <jo...@apache.org>
AuthorDate: Thu Nov 29 13:25:28 2018 +0100

    Write model and dictionaries into zip package
---
 .../main/java/org/apache/opennlp/ModelUtil.java    | 14 ++++------
 .../org/apache/opennlp/normalizer/Normalizer.java  | 32 ++++++++++++++++------
 .../src/main/python/normalizer/normalizer.py       | 28 ++++++++++++++-----
 3 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java b/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
index fa80241..76e5c8a 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
@@ -29,20 +29,16 @@ public class ModelUtil {
   public static Path writeModelToTmpDir(InputStream modelIn) throws IOException {
     Path tmpDir = Files.createTempDirectory("opennlp2");
 
-    // Unzip the model to a temp directory
     ZipInputStream zis = new ZipInputStream(modelIn);
     ZipEntry zipEntry = zis.getNextEntry();
     while(zipEntry != null){
       Path newFile = tmpDir.resolve(zipEntry.getName());
 
-      if (zipEntry.isDirectory()) {
-        Files.createDirectories(newFile);
-      }
-      else {
-        Files.copy(zis, newFile);
-        // This is a bit of hack, but should work fine for now ...
-        newFile.toFile().deleteOnExit();
-      }
+      Files.createDirectories(newFile.getParent());
+      Files.copy(zis, newFile);
+
+      // TODO: How to delete the tmp directory after we are done loading from it ?!
+      newFile.toFile().deleteOnExit();
 
       zipEntry = zis.getNextEntry();
     }
diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
index 9c9f27b..faaf678 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
@@ -18,6 +18,7 @@
 package org.apache.opennlp.normalizer;
 
 import java.io.BufferedReader;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -44,19 +45,23 @@ public class Normalizer {
   private final Map<Character, Integer> sourceCharMap;
   private final Map<Integer, Character> targetCharMap;
 
-  Normalizer(InputStream sourceCharMapIn, InputStream targetCharMapIn,
-             InputStream modelZipPackage) throws IOException {
+  public Normalizer(InputStream modelZipPackage) throws IOException {
 
     Path tmpModelPath = ModelUtil.writeModelToTmpDir(modelZipPackage);
+    try(InputStream sourceCharMapIn = new FileInputStream(
+        tmpModelPath.resolve("source_char_dict.txt").toFile())) {
+      sourceCharMap = loadCharMap(sourceCharMapIn).entrySet()
+          .stream()
+          .collect(Collectors.toMap(Map.Entry::getValue, c -> c.getKey()));
+    }
+
+    try(InputStream targetCharMapIn = new FileInputStream(
+        tmpModelPath.resolve("target_char_dict.txt").toFile())) {
+      targetCharMap = loadCharMap(targetCharMapIn);
+    }
 
     SavedModelBundle model = SavedModelBundle.load(tmpModelPath.toString(), "serve");
     session = model.session();
-
-    sourceCharMap = loadCharMap(sourceCharMapIn).entrySet()
-        .stream()
-        .collect(Collectors.toMap(Map.Entry::getValue, c -> c.getKey()));
-
-    targetCharMap = loadCharMap(targetCharMapIn);
   }
 
   private static Map<Integer, Character> loadCharMap(InputStream in) throws IOException {
@@ -124,4 +129,15 @@ public class Normalizer {
       }
     }
   }
+
+  public static void main(String[] args) throws Exception {
+    Normalizer normalizer = new Normalizer(new FileInputStream(
+            "/home/blue/dev/opennlp-sandbox/tf-ner-poc/src/main/python/normalizer/normalizer.zip"));
+
+    String[] result = normalizer.normalize(new String[] {
+        "18 Mars 2012"
+    });
+
+    System.out.println(result[0]);
+  }
 }
diff --git a/tf-ner-poc/src/main/python/normalizer/normalizer.py b/tf-ner-poc/src/main/python/normalizer/normalizer.py
index f721bb0..b4cc674 100644
--- a/tf-ner-poc/src/main/python/normalizer/normalizer.py
+++ b/tf-ner-poc/src/main/python/normalizer/normalizer.py
@@ -16,8 +16,10 @@
 #  specific language governing permissions and limitations
 #  under the License.
 #
-
+import os
 import re
+import zipfile
+from tempfile import TemporaryDirectory
 
 import tensorflow as tf
 import numpy as np
@@ -200,16 +202,12 @@ def main():
 
     source_char_dict = encode_chars(source_train + source_dev + source_test)
 
-    write_mapping(source_char_dict, 'source_char_dict.txt')
-
     target_char_dict = encode_chars(target_train + target_dev + target_test)
 
     # TODO: Find better chars for begin and end markers
     target_char_dict['S'] = len(target_char_dict)
     target_char_dict['E'] = len(target_char_dict)
 
-    write_mapping(target_char_dict, 'target_char_dict.txt')
-
     target_dict_rev = {v: k for k, v in target_char_dict.items()}
 
     batch_size = 20
@@ -234,7 +232,7 @@ def main():
 
         eval_sess = tf.Session(graph=eval_graph)
 
-    for epoch in range(30):
+    for epoch in range(1):
         print("Epoch " + str(epoch))
 
         with train_graph.as_default():
@@ -284,9 +282,25 @@ def main():
 
             print("Dev: " + str(count_correct / len(target_dev)))
 
-            builder = tf.saved_model.builder.SavedModelBuilder("./normalizer_model" + str(epoch))
+    with TemporaryDirectory() as temp_dir:
+
+        temp_model_dir = temp_dir + "/model"
+
+
+        with eval_graph.as_default():
+            builder = tf.saved_model.builder.SavedModelBuilder(temp_model_dir)
             builder.add_meta_graph_and_variables(eval_sess, [tf.saved_model.tag_constants.SERVING])
             builder.save()
 
+        write_mapping(source_char_dict, temp_model_dir + '/source_char_dict.txt')
+        write_mapping(target_char_dict, temp_model_dir + '/target_char_dict.txt')
+
+        zipf = zipfile.ZipFile("normalizer.zip", 'w', zipfile.ZIP_DEFLATED)
+
+        for root, dirs, files in os.walk(temp_model_dir):
+            for file in files:
+                modelFile = os.path.join(root, file)
+                zipf.write(modelFile, arcname=os.path.relpath(modelFile, temp_model_dir))
+
 if __name__ == "__main__":
     main()