You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by if...@apache.org on 2020/06/05 04:45:06 UTC

[incubator-nlpcraft] branch NLPCRAFT-67 updated: NLPCRAFT-67: After review fixes

This is an automated email from the ASF dual-hosted git repository.

ifropc pushed a commit to branch NLPCRAFT-67
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-67 by this push:
     new 4a26831  NLPCRAFT-67: After review fixes
4a26831 is described below

commit 4a268313ba3a16afd1223dbf366255a5828f217f
Author: Ifropc <if...@apache.org>
AuthorDate: Thu Jun 4 21:44:57 2020 -0700

    NLPCRAFT-67: After review fixes
---
 src/main/python/ctxword/README.md        |  9 +++----
 src/main/python/ctxword/bertft/bertft.py | 41 +++++++++++++-------------------
 src/main/python/ctxword/bin/predict.sh   |  2 +-
 src/main/python/ctxword/server.py        | 12 ++++------
 4 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/src/main/python/ctxword/README.md b/src/main/python/ctxword/README.md
index 977482e..078a2ef 100644
--- a/src/main/python/ctxword/README.md
+++ b/src/main/python/ctxword/README.md
@@ -20,13 +20,14 @@ To install dependencies:
 To start server:  
 `$ bin/start_server.sh`  
 
-Server has single route in root which accepts POST json requests with parameters: 
+### Routes
+##### /suggestions
+Returns word replacement suggestions for following word in the sentence  
 * "sentence": Target sentence. Number of word to find synonyms for must be passed as argument
-* "lower", "upper": Positions in the sentence of start and end of collocation to find synonyms for.  
-Note: sentence is split via whitespaces, upper bound is inclusive. 
+* "index": Position in the sentence of the word to generate suggestions for.  
 * "simple" (Optional): If set to true omits verbose data.  
 * "limit": Sets limit of result words number.  
 
 Simple request could be made with a script, e.g.  
 `$ bin/predict.sh "what is the chance of rain tomorrow?" 5`  
-Would find synonym for word "rain" in this sentence.
+Would find suggestions for word "rain" in this sentence.
diff --git a/src/main/python/ctxword/bertft/bertft.py b/src/main/python/ctxword/bertft/bertft.py
index d315b49..6d27d7a 100644
--- a/src/main/python/ctxword/bertft/bertft.py
+++ b/src/main/python/ctxword/bertft/bertft.py
@@ -81,36 +81,25 @@ class Pipeline:
 
         self.log.info("Server started in %s seconds", ('{0:.4f}'.format(time.time() - start_time)))
 
-    def find_top(self, sentence, positions, k, top_bert, bert_norm, min_ftext, weights):
+    def find_top(self, sentence, index, k, top_bert, bert_norm, min_ftext, weights, min_score):
         tokenizer = self.tokenizer
         model = self.model
         ft = self.ft
 
+        k = 10 if k is None else k
+        min_score = 0 if min_score is None else min_score
+
         self.log.debug("Input: %s", sentence)
         start_time = time.time()
 
         lst = sentence.split()
-        lower = positions[0]
-        upper = positions[1] + 1
-        target = "-".join(lst[lower:upper])
-        if lower == positions[1] or target in self.ft_dict:
-            seqlst = lst[:lower]
-            seqlst.append(tokenizer.mask_token)
-            seqlst.extend(lst[upper:])
-            sequence = " ".join(seqlst)
-        else:
-            rec = list()
 
-            for i in range(lower, upper):
-                seqlst = lst[:lower]
-                seqlst.append(lst[i])
-                seqlst.extend(lst[upper:])
-                rec.append(
-                    self.find_top(" ".join(seqlst), [lower, lower], k, top_bert, bert_norm, min_ftext, weights))
+        target = lst[index]
 
-            rec = sorted(rec, key=lambda x: x.score.mean(), reverse=True)
-
-            return rec[0]
+        seqlst = lst[:index]
+        seqlst.append(tokenizer.mask_token)
+        seqlst.extend(lst[(index + 1):])
+        sequence = " ".join(seqlst)
 
         self.log.debug("Target word: %s; sequence: %s", target, sequence)
 
@@ -149,10 +138,12 @@ class Pipeline:
             if self.on_run is None and word == target:
                 continue
 
-            if sim >= min_ftext:
-                filtered.append((word, value, norm_value, sim, sentence_sim, calc_w(norm_value, sim, weights)))
+            score = calc_w(norm_value, sim, weights)
+
+            if sim >= min_ftext and score > min_score:
+                filtered.append((word, value, norm_value, sim, sentence_sim, score))
 
-            unfiltered.append((word, value, norm_value, sim, sentence_sim, calc_w(norm_value, sim, weights)))
+            unfiltered.append((word, value, norm_value, sim, sentence_sim, score))
 
         done = (time.time() - start_time)
 
@@ -178,8 +169,8 @@ class Pipeline:
 
         return filtered_top
 
-    def do_find(self, s, positions, limit):
-        return self.find_top(s, positions, limit, 200, 200, 0.25, [1, 1])
+    def do_find(self, s, index, limit, min_score):
+        return self.find_top(s, index, limit, 200, 200, 0.25, [1, 1], min_score)
 
     def dget(self, lst, pos):
         return list(map(lambda x: '{0:.2f}'.format(x[pos]), lst)) if self.on_run is not None else lget(lst, pos)
diff --git a/src/main/python/ctxword/bin/predict.sh b/src/main/python/ctxword/bin/predict.sh
index ef9d551..0fd5f22 100755
--- a/src/main/python/ctxword/bin/predict.sh
+++ b/src/main/python/ctxword/bin/predict.sh
@@ -16,4 +16,4 @@
 # limitations under the License.
 #
 
-curl -d "{\"sentence\": \"$1\",\"simple\": true, \"lower\": $2, \"upper\": $2, \"limit\": 10}" -H 'Content-Type: application/json' http://localhost:5000/synonyms
+curl -d "{\"sentence\": \"$1\",\"simple\": true, \"index\": $2, \"limit\": 10}" -H 'Content-Type: application/json' http://localhost:5000/suggestions
diff --git a/src/main/python/ctxword/server.py b/src/main/python/ctxword/server.py
index bad42cb..a5899a0 100644
--- a/src/main/python/ctxword/server.py
+++ b/src/main/python/ctxword/server.py
@@ -48,7 +48,7 @@ def present(json, name):
                            "Required '" + name + "' argument is not present")
 
 
-@app.route('/synonyms', methods=['POST'])
+@app.route('/suggestions', methods=['POST'])
 def main():
     if not request.is_json:
         raise ValidationException("Json expected")
@@ -56,13 +56,11 @@ def main():
     json = request.json
 
     sentence = present(json, 'sentence')
-    upper = present(json, 'upper')
-    lower = present(json, 'lower')
-    positions = check_condition(lower <= upper, lambda: [lower, upper],
-                                "Lower bound must be less or equal upper bound")
-    limit = present(json, 'limit')
+    index = present(json, 'index')
+    limit = json['limit'] if 'limit' in json else None
+    min_score = json['min_score'] if 'min_score' in json else None
 
-    data = pipeline.do_find(sentence, positions, limit)
+    data = pipeline.do_find(sentence, index, limit, min_score)
     if 'simple' not in json or not json['simple']:
         json_data = data.to_json(orient='table', index=False)
     else: