You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by if...@apache.org on 2020/06/05 04:45:06 UTC
[incubator-nlpcraft] branch NLPCRAFT-67 updated: NLPCRAFT-67: After
review fixes
This is an automated email from the ASF dual-hosted git repository.
ifropc pushed a commit to branch NLPCRAFT-67
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-67 by this push:
new 4a26831 NLPCRAFT-67: After review fixes
4a26831 is described below
commit 4a268313ba3a16afd1223dbf366255a5828f217f
Author: Ifropc <if...@apache.org>
AuthorDate: Thu Jun 4 21:44:57 2020 -0700
NLPCRAFT-67: After review fixes
---
src/main/python/ctxword/README.md | 9 +++----
src/main/python/ctxword/bertft/bertft.py | 41 +++++++++++++-------------------
src/main/python/ctxword/bin/predict.sh | 2 +-
src/main/python/ctxword/server.py | 12 ++++------
4 files changed, 27 insertions(+), 37 deletions(-)
diff --git a/src/main/python/ctxword/README.md b/src/main/python/ctxword/README.md
index 977482e..078a2ef 100644
--- a/src/main/python/ctxword/README.md
+++ b/src/main/python/ctxword/README.md
@@ -20,13 +20,14 @@ To install dependencies:
To start server:
`$ bin/start_server.sh`
-Server has single route in root which accepts POST json requests with parameters:
+### Routes
+##### /suggestions
+Returns word replacement suggestions for following word in the sentence
* "sentence": Target sentence. Number of word to find synonyms for must be passed as argument
-* "lower", "upper": Positions in the sentence of start and end of collocation to find synonyms for.
-Note: sentence is split via whitespaces, upper bound is inclusive.
+* "index": Position in the sentence of the word to generate suggestions for.
* "simple" (Optional): If set to true omits verbose data.
* "limit": Sets limit of result words number.
Simple request could be made with a script, e.g.
`$ bin/predict.sh "what is the chance of rain tomorrow?" 5`
-Would find synonym for word "rain" in this sentence.
+Would find suggestions for word "rain" in this sentence.
diff --git a/src/main/python/ctxword/bertft/bertft.py b/src/main/python/ctxword/bertft/bertft.py
index d315b49..6d27d7a 100644
--- a/src/main/python/ctxword/bertft/bertft.py
+++ b/src/main/python/ctxword/bertft/bertft.py
@@ -81,36 +81,25 @@ class Pipeline:
self.log.info("Server started in %s seconds", ('{0:.4f}'.format(time.time() - start_time)))
- def find_top(self, sentence, positions, k, top_bert, bert_norm, min_ftext, weights):
+ def find_top(self, sentence, index, k, top_bert, bert_norm, min_ftext, weights, min_score):
tokenizer = self.tokenizer
model = self.model
ft = self.ft
+ k = 10 if k is None else k
+ min_score = 0 if min_score is None else min_score
+
self.log.debug("Input: %s", sentence)
start_time = time.time()
lst = sentence.split()
- lower = positions[0]
- upper = positions[1] + 1
- target = "-".join(lst[lower:upper])
- if lower == positions[1] or target in self.ft_dict:
- seqlst = lst[:lower]
- seqlst.append(tokenizer.mask_token)
- seqlst.extend(lst[upper:])
- sequence = " ".join(seqlst)
- else:
- rec = list()
- for i in range(lower, upper):
- seqlst = lst[:lower]
- seqlst.append(lst[i])
- seqlst.extend(lst[upper:])
- rec.append(
- self.find_top(" ".join(seqlst), [lower, lower], k, top_bert, bert_norm, min_ftext, weights))
+ target = lst[index]
- rec = sorted(rec, key=lambda x: x.score.mean(), reverse=True)
-
- return rec[0]
+ seqlst = lst[:index]
+ seqlst.append(tokenizer.mask_token)
+ seqlst.extend(lst[(index + 1):])
+ sequence = " ".join(seqlst)
self.log.debug("Target word: %s; sequence: %s", target, sequence)
@@ -149,10 +138,12 @@ class Pipeline:
if self.on_run is None and word == target:
continue
- if sim >= min_ftext:
- filtered.append((word, value, norm_value, sim, sentence_sim, calc_w(norm_value, sim, weights)))
+ score = calc_w(norm_value, sim, weights)
+
+ if sim >= min_ftext and score > min_score:
+ filtered.append((word, value, norm_value, sim, sentence_sim, score))
- unfiltered.append((word, value, norm_value, sim, sentence_sim, calc_w(norm_value, sim, weights)))
+ unfiltered.append((word, value, norm_value, sim, sentence_sim, score))
done = (time.time() - start_time)
@@ -178,8 +169,8 @@ class Pipeline:
return filtered_top
- def do_find(self, s, positions, limit):
- return self.find_top(s, positions, limit, 200, 200, 0.25, [1, 1])
+ def do_find(self, s, index, limit, min_score):
+ return self.find_top(s, index, limit, 200, 200, 0.25, [1, 1], min_score)
def dget(self, lst, pos):
return list(map(lambda x: '{0:.2f}'.format(x[pos]), lst)) if self.on_run is not None else lget(lst, pos)
diff --git a/src/main/python/ctxword/bin/predict.sh b/src/main/python/ctxword/bin/predict.sh
index ef9d551..0fd5f22 100755
--- a/src/main/python/ctxword/bin/predict.sh
+++ b/src/main/python/ctxword/bin/predict.sh
@@ -16,4 +16,4 @@
# limitations under the License.
#
-curl -d "{\"sentence\": \"$1\",\"simple\": true, \"lower\": $2, \"upper\": $2, \"limit\": 10}" -H 'Content-Type: application/json' http://localhost:5000/synonyms
+curl -d "{\"sentence\": \"$1\",\"simple\": true, \"index\": $2, \"limit\": 10}" -H 'Content-Type: application/json' http://localhost:5000/suggestions
diff --git a/src/main/python/ctxword/server.py b/src/main/python/ctxword/server.py
index bad42cb..a5899a0 100644
--- a/src/main/python/ctxword/server.py
+++ b/src/main/python/ctxword/server.py
@@ -48,7 +48,7 @@ def present(json, name):
"Required '" + name + "' argument is not present")
-@app.route('/synonyms', methods=['POST'])
+@app.route('/suggestions', methods=['POST'])
def main():
if not request.is_json:
raise ValidationException("Json expected")
@@ -56,13 +56,11 @@ def main():
json = request.json
sentence = present(json, 'sentence')
- upper = present(json, 'upper')
- lower = present(json, 'lower')
- positions = check_condition(lower <= upper, lambda: [lower, upper],
- "Lower bound must be less or equal upper bound")
- limit = present(json, 'limit')
+ index = present(json, 'index')
+ limit = json['limit'] if 'limit' in json else None
+ min_score = json['min_score'] if 'min_score' in json else None
- data = pipeline.do_find(sentence, positions, limit)
+ data = pipeline.do_find(sentence, index, limit, min_score)
if 'simple' not in json or not json['simple']:
json_data = data.to_json(orient='table', index=False)
else: