You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@opennlp.apache.org by jo...@apache.org on 2018/05/30 09:54:27 UTC

[opennlp-sandbox] branch master updated (788e73a -> f8db193)

This is an automated email from the ASF dual-hosted git repository.

joern pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git.


    from 788e73a  Map chars to indices 0..n instead of using ord(c)
     new 6294dfa  Write mapping dicts to disk
     new f8db193  Name placeholders and variables for use from Java API

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tf-ner-poc/src/main/python/namefinder.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
joern@apache.org.

[opennlp-sandbox] 02/02: Name placeholders and variables for use from Java API

Posted by jo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git

commit f8db1938765eca2f55e61c5ffe1d625dbbe9ce7a
Author: Jörn Kottmann <jo...@apache.org>
AuthorDate: Wed May 30 11:49:09 2018 +0200

    Name placeholders and variables for use from Java API
---
 tf-ner-poc/src/main/python/namefinder.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tf-ner-poc/src/main/python/namefinder.py b/tf-ner-poc/src/main/python/namefinder.py
index e757491..c1220dd 100644
--- a/tf-ner-poc/src/main/python/namefinder.py
+++ b/tf-ner-poc/src/main/python/namefinder.py
@@ -174,10 +174,10 @@ class NameFinder:
 
         with tf.variable_scope("chars"):
             # shape = (batch size, max length of sentence, max length of word)
-            char_ids = tf.placeholder(tf.int32, shape=[None, None, None])
+            char_ids = tf.placeholder(tf.int32, shape=[None, None, None], name="char_ids")
 
             # shape = (batch_size, max_length of sentence)
-            word_lengths_ph = tf.placeholder(tf.int32, shape=[None, None])
+            word_lengths_ph = tf.placeholder(tf.int32, shape=[None, None], name="word_lengths")
 
             dim_char = 100
 
@@ -211,8 +211,8 @@ class NameFinder:
             char_rep = tf.reshape(output, shape=[-1, s[1], 2*char_hidden_size])
 
         with tf.variable_scope("words"):
-            token_ids = tf.placeholder(tf.int32, shape=[None, None])
-            sequence_lengths = tf.placeholder(tf.int32, shape=[None])
+            token_ids = tf.placeholder(tf.int32, shape=[None, None], name="word_ids")
+            sequence_lengths = tf.placeholder(tf.int32, shape=[None], name="sequence_lengths")
 
             # This is a hack to make it load an embedding matrix larger than 2GB
             # Don't hardcode this 300
@@ -252,12 +252,12 @@ class NameFinder:
         ntime_steps = tf.shape(context_rep)[1]
         context_rep_flat = tf.reshape(context_rep, [-1, 2*hidden_size])
         pred = tf.matmul(context_rep_flat, W) + b
-        self.logits = tf.reshape(pred, [-1, ntime_steps, ntags])
+        self.logits = tf.reshape(pred, [-1, ntime_steps, ntags], name="logits")
 
         log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
             self.logits, labels, sequence_lengths)
 
-        self.transition_params = transition_params
+        self.transition_params = tf.identity(transition_params, name="trans_params")
 
         loss = tf.reduce_mean(-log_likelihood)
 

-- 
To stop receiving notification emails like this one, please contact
joern@apache.org.

[opennlp-sandbox] 01/02: Write mapping dicts to disk

Posted by jo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git

commit 6294dfa21b259ef0d3e9e7bea3a35c8457e72a79
Author: Jörn Kottmann <jo...@apache.org>
AuthorDate: Wed May 30 11:36:56 2018 +0200

    Write mapping dicts to disk
---
 tf-ner-poc/src/main/python/namefinder.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tf-ner-poc/src/main/python/namefinder.py b/tf-ner-poc/src/main/python/namefinder.py
index 727e9a4..e757491 100644
--- a/tf-ner-poc/src/main/python/namefinder.py
+++ b/tf-ner-poc/src/main/python/namefinder.py
@@ -323,6 +323,11 @@ def get_chunks(seq, tags):
 
     return chunks
 
+def write_mapping(tags, output_filename):
+    with open(output_filename, 'w', encoding='utf-8') as f:
+        for i, tag in enumerate(tags):
+            f.write('{}\n'.format(tag))
+
 def main():
 
     if len(sys.argv) != 5:
@@ -340,6 +345,10 @@ def main():
     embedding_ph, token_ids_ph, char_ids_ph, word_lengths_ph, sequence_lengths_ph, labels_ph, train_op \
         = name_finder.create_graph(len(char_set | char_set_dev), embeddings)
 
+    write_mapping(word_dict, 'word_dict.txt')
+    write_mapping(name_finder.label_dict, "label_dict.txt")
+    write_mapping(name_finder.label_dict, "char_dict.txt")
+
     sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                             log_device_placement=True))
 

-- 
To stop receiving notification emails like this one, please contact
joern@apache.org.