You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by if...@apache.org on 2020/06/07 22:38:01 UTC
[incubator-nlpcraft] branch NLPCRAFT-67 updated: NLPCRAFT-67: Add
CUDA support
This is an automated email from the ASF dual-hosted git repository.
ifropc pushed a commit to branch NLPCRAFT-67
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-67 by this push:
new 3187790 NLPCRAFT-67: Add CUDA support
3187790 is described below
commit 3187790fcc4179b194d9bc5dd8e8b7692c61c0c5
Author: Ifropc <if...@apache.org>
AuthorDate: Sun Jun 7 15:37:49 2020 -0700
NLPCRAFT-67: Add CUDA support
---
src/main/python/ctxword/bertft/bertft.py | 126 +++++++++++++++++--------------
1 file changed, 71 insertions(+), 55 deletions(-)
diff --git a/src/main/python/ctxword/bertft/bertft.py b/src/main/python/ctxword/bertft/bertft.py
index adb37a9..a2067ac 100644
--- a/src/main/python/ctxword/bertft/bertft.py
+++ b/src/main/python/ctxword/bertft/bertft.py
@@ -36,9 +36,18 @@ def lget(lst, pos):
# TODO: make Model configurable
# TODO: add type check
class Pipeline:
- def __init__(self, on_run=None):
+ def __init__(self, use_cuda=True):
self.log = logging.getLogger("bertft")
+ self.use_cuda = use_cuda and torch.cuda.is_available()
+
+ if self.use_cuda:
+ self.log.debug("CUDA is available")
+ self.device = torch.device('cuda')
+ else:
+ self.log.warning("CUDA is not available")
+ self.device = torch.device('cpu')
+
start_time = time.time()
# ft_size = 100 # ~2.6 GB
ft_size = 200 # ~4.5 GB
@@ -73,84 +82,91 @@ class Pipeline:
self.tokenizer = AutoTokenizer.from_pretrained("roberta-large")
self.model = AutoModelWithLMHead.from_pretrained("roberta-large")
+ if self.use_cuda:
+ self.model.cuda()
+
self.log.info("Server started in %s seconds", ('{0:.4f}'.format(time.time() - start_time)))
def find_top(self, input_data, k, top_bert, min_ftext, weights, min_score):
- tokenizer = self.tokenizer
- model = self.model
- ft = self.ft
+ with torch.no_grad():
+ tokenizer = self.tokenizer
+ model = self.model
+ ft = self.ft
- k = 10 if k is None else k
- min_score = 0 if min_score is None else min_score
+ k = 10 if k is None else k
+ min_score = 0 if min_score is None else min_score
- start_time = time.time()
- req_start_time = start_time
+ start_time = time.time()
+ req_start_time = start_time
+
+ sentences = list(map(lambda x: self.replace_with_mask(x[0], x[1]), input_data))
- sentences = list(map(lambda x: self.replace_with_mask(x[0], x[1]), input_data))
+ encoded = tokenizer.batch_encode_plus(list(map(lambda x: x[1], sentences)), pad_to_max_length=True)
+ input_ids = torch.tensor(encoded['input_ids'], device=self.device)
+ attention_mask = torch.tensor(encoded['attention_mask'], device=self.device)
- encoded = tokenizer.batch_encode_plus(list(map(lambda x: x[1], sentences)), pad_to_max_length=True)
- input_ids = torch.tensor(encoded['input_ids'])
- attention_mask = torch.tensor(encoded['attention_mask'])
+ start_time = self.print_time(start_time, "Tokenizing finished")
+ forward = model(input_ids=input_ids, attention_mask=attention_mask)
- start_time = self.print_time(start_time, "Tokenizing finished")
- forward = model(input_ids=input_ids, attention_mask=attention_mask)
+ start_time = self.print_time(start_time, "Batch finished (Bert)")
- start_time = self.print_time(start_time, "Batch finished (Bert)")
+ mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]
+ token_logits = forward[0]
+ mask_token_logits = token_logits[0, mask_token_index, :]
- mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]
- token_logits = forward[0]
- mask_token_logits = token_logits[0, mask_token_index, :]
+ # Filter top <top_bert> results of bert output
+ topk = torch.topk(mask_token_logits, top_bert, dim=1)
- # Filter top <top_bert> results of bert output
- topk = torch.topk(mask_token_logits, top_bert, dim=1)
+ nvl = []
- nvl = []
+ for d in topk.values:
+ nmin = torch.min(d)
+ nmax = torch.max(d)
+ nvl.append((d - nmin) / (nmax - nmin))
- for d in topk.values:
- nmin = torch.min(d)
- nmax = torch.max(d)
- nvl.append((d - nmin) / (nmax - nmin))
+ start_time = self.print_time(start_time, "Bert post-processing")
- start_time = self.print_time(start_time, "Bert post-processing")
+ suggestions = []
+ for index in topk.indices:
+ lst = list(index)
+ tmp = []
+ for single in lst:
+ tmp.append(tokenizer.decode([single]).strip())
+ suggestions.append(tuple(tmp))
- suggestions = []
- for index in topk.indices:
- lst = list(index)
- tmp = []
- for single in lst:
- tmp.append(tokenizer.decode([single]).strip())
- suggestions.append(tuple(tmp))
+ start_time = self.print_time(start_time, "Bert decoding")
- start_time = self.print_time(start_time, "Bert decoding")
+ cos = torch.nn.CosineSimilarity()
- cos = torch.nn.CosineSimilarity()
+ result = []
- result = []
+ for i in range(0, len(sentences)):
+ target = sentences[i][0]
+ suggest_embeddings = torch.tensor(list(map(lambda x: ft[x], suggestions[i])), device=self.device)
+ targ_tenzsor = torch.tensor(ft[target], device=self.device).expand(suggest_embeddings.shape)
+ similarities = cos(targ_tenzsor, suggest_embeddings)
- for i in range(0, len(sentences)):
- target = sentences[i][0]
- suggest_embeddings = torch.tensor(list(map(lambda x: ft[x], suggestions[i])))
- targ_tenzsor = torch.tensor(ft[target]).expand(suggest_embeddings.shape)
- similarities = cos(targ_tenzsor, suggest_embeddings)
+ scores = nvl[i] * weights[0] + similarities * weights[1]
- scores = nvl[i] * weights[0] + similarities * weights[1]
+ result.append(
+ sorted(
+ filter(
+ lambda x: x[0] > min_score and x[1] > min_ftext,
+ zip(scores.tolist(), similarities.tolist(), suggestions[i], nvl[i].tolist())
+ ),
+ key=lambda x: x[0],
+ reverse=True
+ )[:k]
+ )
- result.append(
- sorted(
- filter(
- lambda x: x[0] > min_score and x[1] > min_ftext,
- zip(scores.tolist(), similarities.tolist(), suggestions[i], nvl[i].tolist())
- ),
- key=lambda x: x[0],
- reverse=True
- )[:k]
- )
+ self.print_time(start_time, "Fast text similarities found")
- self.print_time(start_time, "Fast text similarities found")
+ self.print_time(req_start_time, "Request processed")
- self.print_time(req_start_time, "Request processed")
+ if (self.use_cuda):
+ torch.cuda.empty_cache()
- return result
+ return result
def replace_with_mask(self, sentence, index):
lst = sentence.split()