You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2018/11/29 12:25:50 UTC
[opennlp-sandbox] branch master updated: Write model and
dictionaries into zip package
This is an automated email from the ASF dual-hosted git repository.
joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 00a8fdf Write model and dictionaries into zip package
00a8fdf is described below
commit 00a8fdf5cc64d6341218b077427281665f4f6e16
Author: Jörn Kottmann <jo...@apache.org>
AuthorDate: Thu Nov 29 13:25:28 2018 +0100
Write model and dictionaries into zip package
---
.../main/java/org/apache/opennlp/ModelUtil.java | 14 ++++------
.../org/apache/opennlp/normalizer/Normalizer.java | 32 ++++++++++++++++------
.../src/main/python/normalizer/normalizer.py | 28 ++++++++++++++-----
3 files changed, 50 insertions(+), 24 deletions(-)
diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java b/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
index fa80241..76e5c8a 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
@@ -29,20 +29,16 @@ public class ModelUtil {
public static Path writeModelToTmpDir(InputStream modelIn) throws IOException {
Path tmpDir = Files.createTempDirectory("opennlp2");
- // Unzip the model to a temp directory
ZipInputStream zis = new ZipInputStream(modelIn);
ZipEntry zipEntry = zis.getNextEntry();
while(zipEntry != null){
Path newFile = tmpDir.resolve(zipEntry.getName());
- if (zipEntry.isDirectory()) {
- Files.createDirectories(newFile);
- }
- else {
- Files.copy(zis, newFile);
- // This is a bit of hack, but should work fine for now ...
- newFile.toFile().deleteOnExit();
- }
+ Files.createDirectories(newFile.getParent());
+ Files.copy(zis, newFile);
+
+ // TODO: How to delete the tmp directory after we are done loading from it ?!
+ newFile.toFile().deleteOnExit();
zipEntry = zis.getNextEntry();
}
diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
index 9c9f27b..faaf678 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
@@ -18,6 +18,7 @@
package org.apache.opennlp.normalizer;
import java.io.BufferedReader;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
@@ -44,19 +45,23 @@ public class Normalizer {
private final Map<Character, Integer> sourceCharMap;
private final Map<Integer, Character> targetCharMap;
- Normalizer(InputStream sourceCharMapIn, InputStream targetCharMapIn,
- InputStream modelZipPackage) throws IOException {
+ public Normalizer(InputStream modelZipPackage) throws IOException {
Path tmpModelPath = ModelUtil.writeModelToTmpDir(modelZipPackage);
+ try(InputStream sourceCharMapIn = new FileInputStream(
+ tmpModelPath.resolve("source_char_dict.txt").toFile())) {
+ sourceCharMap = loadCharMap(sourceCharMapIn).entrySet()
+ .stream()
+ .collect(Collectors.toMap(Map.Entry::getValue, c -> c.getKey()));
+ }
+
+ try(InputStream targetCharMapIn = new FileInputStream(
+ tmpModelPath.resolve("target_char_dict.txt").toFile())) {
+ targetCharMap = loadCharMap(targetCharMapIn);
+ }
SavedModelBundle model = SavedModelBundle.load(tmpModelPath.toString(), "serve");
session = model.session();
-
- sourceCharMap = loadCharMap(sourceCharMapIn).entrySet()
- .stream()
- .collect(Collectors.toMap(Map.Entry::getValue, c -> c.getKey()));
-
- targetCharMap = loadCharMap(targetCharMapIn);
}
private static Map<Integer, Character> loadCharMap(InputStream in) throws IOException {
@@ -124,4 +129,15 @@ public class Normalizer {
}
}
}
+
+ public static void main(String[] args) throws Exception {
+ Normalizer normalizer = new Normalizer(new FileInputStream(
+ "/home/blue/dev/opennlp-sandbox/tf-ner-poc/src/main/python/normalizer/normalizer.zip"));
+
+ String[] result = normalizer.normalize(new String[] {
+ "18 Mars 2012"
+ });
+
+ System.out.println(result[0]);
+ }
}
diff --git a/tf-ner-poc/src/main/python/normalizer/normalizer.py b/tf-ner-poc/src/main/python/normalizer/normalizer.py
index f721bb0..b4cc674 100644
--- a/tf-ner-poc/src/main/python/normalizer/normalizer.py
+++ b/tf-ner-poc/src/main/python/normalizer/normalizer.py
@@ -16,8 +16,10 @@
# specific language governing permissions and limitations
# under the License.
#
-
+import os
import re
+import zipfile
+from tempfile import TemporaryDirectory
import tensorflow as tf
import numpy as np
@@ -200,16 +202,12 @@ def main():
source_char_dict = encode_chars(source_train + source_dev + source_test)
- write_mapping(source_char_dict, 'source_char_dict.txt')
-
target_char_dict = encode_chars(target_train + target_dev + target_test)
# TODO: Find better chars for begin and end markers
target_char_dict['S'] = len(target_char_dict)
target_char_dict['E'] = len(target_char_dict)
- write_mapping(target_char_dict, 'target_char_dict.txt')
-
target_dict_rev = {v: k for k, v in target_char_dict.items()}
batch_size = 20
@@ -234,7 +232,7 @@ def main():
eval_sess = tf.Session(graph=eval_graph)
- for epoch in range(30):
+ for epoch in range(1):
print("Epoch " + str(epoch))
with train_graph.as_default():
@@ -284,9 +282,25 @@ def main():
print("Dev: " + str(count_correct / len(target_dev)))
- builder = tf.saved_model.builder.SavedModelBuilder("./normalizer_model" + str(epoch))
+ with TemporaryDirectory() as temp_dir:
+
+ temp_model_dir = temp_dir + "/model"
+
+
+ with eval_graph.as_default():
+ builder = tf.saved_model.builder.SavedModelBuilder(temp_model_dir)
builder.add_meta_graph_and_variables(eval_sess, [tf.saved_model.tag_constants.SERVING])
builder.save()
+ write_mapping(source_char_dict, temp_model_dir + '/source_char_dict.txt')
+ write_mapping(target_char_dict, temp_model_dir + '/target_char_dict.txt')
+
+ zipf = zipfile.ZipFile("normalizer.zip", 'w', zipfile.ZIP_DEFLATED)
+
+ for root, dirs, files in os.walk(temp_model_dir):
+ for file in files:
+ modelFile = os.path.join(root, file)
+ zipf.write(modelFile, arcname=os.path.relpath(modelFile, temp_model_dir))
+
if __name__ == "__main__":
main()