You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by if...@apache.org on 2020/06/07 22:38:01 UTC
[incubator-nlpcraft] branch NLPCRAFT-67 updated: NLPCRAFT-67: Add CUDA support

This is an automated email from the ASF dual-hosted git repository.

ifropc pushed a commit to branch NLPCRAFT-67
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-67 by this push:
     new 3187790  NLPCRAFT-67: Add CUDA support
3187790 is described below

commit 3187790fcc4179b194d9bc5dd8e8b7692c61c0c5
Author: Ifropc <if...@apache.org>
AuthorDate: Sun Jun 7 15:37:49 2020 -0700

    NLPCRAFT-67: Add CUDA support
---
 src/main/python/ctxword/bertft/bertft.py | 126 +++++++++++++++++--------------
 1 file changed, 71 insertions(+), 55 deletions(-)

diff --git a/src/main/python/ctxword/bertft/bertft.py b/src/main/python/ctxword/bertft/bertft.py
index adb37a9..a2067ac 100644
--- a/src/main/python/ctxword/bertft/bertft.py
+++ b/src/main/python/ctxword/bertft/bertft.py
@@ -36,9 +36,18 @@ def lget(lst, pos):
 # TODO: make Model configurable
 # TODO: add type check
 class Pipeline:
-    def __init__(self, on_run=None):
+    def __init__(self, use_cuda=True):
         self.log = logging.getLogger("bertft")
 
+        self.use_cuda = use_cuda and torch.cuda.is_available()
+
+        if self.use_cuda:
+            self.log.debug("CUDA is available")
+            self.device = torch.device('cuda')
+        else:
+            self.log.warning("CUDA is not available")
+            self.device = torch.device('cpu')
+
         start_time = time.time()
         # ft_size = 100 # ~2.6 GB
         ft_size = 200  # ~4.5 GB
@@ -73,84 +82,91 @@ class Pipeline:
         self.tokenizer = AutoTokenizer.from_pretrained("roberta-large")
         self.model = AutoModelWithLMHead.from_pretrained("roberta-large")
 
+        if self.use_cuda:
+            self.model.cuda()
+
         self.log.info("Server started in %s seconds", ('{0:.4f}'.format(time.time() - start_time)))
 
     def find_top(self, input_data, k, top_bert, min_ftext, weights, min_score):
-        tokenizer = self.tokenizer
-        model = self.model
-        ft = self.ft
+        with torch.no_grad():
+            tokenizer = self.tokenizer
+            model = self.model
+            ft = self.ft
 
-        k = 10 if k is None else k
-        min_score = 0 if min_score is None else min_score
+            k = 10 if k is None else k
+            min_score = 0 if min_score is None else min_score
 
-        start_time = time.time()
-        req_start_time = start_time
+            start_time = time.time()
+            req_start_time = start_time
+
+            sentences = list(map(lambda x: self.replace_with_mask(x[0], x[1]), input_data))
 
-        sentences = list(map(lambda x: self.replace_with_mask(x[0], x[1]), input_data))
+            encoded = tokenizer.batch_encode_plus(list(map(lambda x: x[1], sentences)), pad_to_max_length=True)
+            input_ids = torch.tensor(encoded['input_ids'], device=self.device)
+            attention_mask = torch.tensor(encoded['attention_mask'], device=self.device)
 
-        encoded = tokenizer.batch_encode_plus(list(map(lambda x: x[1], sentences)), pad_to_max_length=True)
-        input_ids = torch.tensor(encoded['input_ids'])
-        attention_mask = torch.tensor(encoded['attention_mask'])
+            start_time = self.print_time(start_time, "Tokenizing finished")
+            forward = model(input_ids=input_ids, attention_mask=attention_mask)
 
-        start_time = self.print_time(start_time, "Tokenizing finished")
-        forward = model(input_ids=input_ids, attention_mask=attention_mask)
+            start_time = self.print_time(start_time, "Batch finished (Bert)")
 
-        start_time = self.print_time(start_time, "Batch finished (Bert)")
+            mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]
+            token_logits = forward[0]
+            mask_token_logits = token_logits[0, mask_token_index, :]
 
-        mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]
-        token_logits = forward[0]
-        mask_token_logits = token_logits[0, mask_token_index, :]
+            # Filter top <top_bert> results of bert output
+            topk = torch.topk(mask_token_logits, top_bert, dim=1)
 
-        # Filter top <top_bert> results of bert output
-        topk = torch.topk(mask_token_logits, top_bert, dim=1)
+            nvl = []
 
-        nvl = []
+            for d in topk.values:
+                nmin = torch.min(d)
+                nmax = torch.max(d)
+                nvl.append((d - nmin) / (nmax - nmin))
 
-        for d in topk.values:
-            nmin = torch.min(d)
-            nmax = torch.max(d)
-            nvl.append((d - nmin) / (nmax - nmin))
+            start_time = self.print_time(start_time, "Bert post-processing")
 
-        start_time = self.print_time(start_time, "Bert post-processing")
+            suggestions = []
+            for index in topk.indices:
+                lst = list(index)
+                tmp = []
+                for single in lst:
+                    tmp.append(tokenizer.decode([single]).strip())
+                suggestions.append(tuple(tmp))
 
-        suggestions = []
-        for index in topk.indices:
-            lst = list(index)
-            tmp = []
-            for single in lst:
-                tmp.append(tokenizer.decode([single]).strip())
-            suggestions.append(tuple(tmp))
+            start_time = self.print_time(start_time, "Bert decoding")
 
-        start_time = self.print_time(start_time, "Bert decoding")
+            cos = torch.nn.CosineSimilarity()
 
-        cos = torch.nn.CosineSimilarity()
+            result = []
 
-        result = []
+            for i in range(0, len(sentences)):
+                target = sentences[i][0]
+                suggest_embeddings = torch.tensor(list(map(lambda x: ft[x], suggestions[i])), device=self.device)
+                targ_tenzsor = torch.tensor(ft[target], device=self.device).expand(suggest_embeddings.shape)
+                similarities = cos(targ_tenzsor, suggest_embeddings)
 
-        for i in range(0, len(sentences)):
-            target = sentences[i][0]
-            suggest_embeddings = torch.tensor(list(map(lambda x: ft[x], suggestions[i])))
-            targ_tenzsor = torch.tensor(ft[target]).expand(suggest_embeddings.shape)
-            similarities = cos(targ_tenzsor, suggest_embeddings)
+                scores = nvl[i] * weights[0] + similarities * weights[1]
 
-            scores = nvl[i] * weights[0] + similarities * weights[1]
+                result.append(
+                    sorted(
+                        filter(
+                            lambda x: x[0] > min_score and x[1] > min_ftext,
+                            zip(scores.tolist(), similarities.tolist(), suggestions[i], nvl[i].tolist())
+                        ),
+                        key=lambda x: x[0],
+                        reverse=True
+                    )[:k]
+                )
 
-            result.append(
-                sorted(
-                    filter(
-                        lambda x: x[0] > min_score and x[1] > min_ftext,
-                        zip(scores.tolist(), similarities.tolist(), suggestions[i], nvl[i].tolist())
-                    ),
-                    key=lambda x: x[0],
-                    reverse=True
-                )[:k]
-            )
+            self.print_time(start_time, "Fast text similarities found")
 
-        self.print_time(start_time, "Fast text similarities found")
+            self.print_time(req_start_time, "Request processed")
 
-        self.print_time(req_start_time, "Request processed")
+            if (self.use_cuda):
+                torch.cuda.empty_cache()
 
-        return result
+            return result
 
     def replace_with_mask(self, sentence, index):
         lst = sentence.split()