You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/08/09 18:09:48 UTC
[GitHub] CathyZhang0822 closed pull request #11935: [MXNET-691]Add LabelBot prediction module

CathyZhang0822 closed pull request #11935: [MXNET-691]Add LabelBot prediction module
URL: https://github.com/apache/incubator-mxnet/pull/11935
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/mxnet-bot/LabelBotPredict/README.md b/mxnet-bot/LabelBotPredict/README.md
new file mode 100644
index 00000000000..62f2fc43a73
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/README.md
@@ -0,0 +1,16 @@
+# label_bot_predict_labels
+This bot will use ML models to predict labels and send daily [GitHub issue](https://github.com/apache/incubator-mxnet/issues) reports.
+It contains 2 parts:
+* Machine Learning part:
+  A web server built based on [AWS Elastic Beanstalk](https://aws.amazon.com/elasticbeanstalk/) which can response to GET/POST requests and realize self-maintenance. It mainly has 2 features:
+  * Train models: it will retrain Machine Learning models every 24 hours automatically using latest data.
+  * Predict labels: once it receives GET/POST requests with issues ID, it will send predictions back.
+* Send Daily Emails: 
+  An AWS Lambda function which will be triggered everyday. 
+  Once this lambda function is executed, it will send POST requests to the Elastic Beanstalk web server asking predictions. 
+  Then it will generate email content and send email.
+
+## Architecture
+ <div align="center">
+        <img src="https://s3-us-west-2.amazonaws.com/email-boy-images/Email+bot+-+Page+1+(1).png" ><br>
+ </div>
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/DataFetcher.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/DataFetcher.py
new file mode 100644
index 00000000000..0af27bbcd34
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/DataFetcher.py
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This scipt is served to fetch GitHub issues into a json file
+from __future__ import print_function
+import os
+import requests
+import json
+import re
+import pandas as pd
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+
+class DataFetcher:
+
+    def __init__(self,
+                 github_user = os.environ.get("github_user"),
+                 github_oauth_token = os.environ.get("github_oauth_token"),
+                 repo = os.environ.get("repo")):
+        """
+        This DataFetcher serves to fetch issues data
+        Args:
+            github_user(str): the github id. ie: "CathyZhang0822"
+            github_oauth_token(str): the github oauth token, paired with github_user to realize authorization
+            repo(str): the repo name
+        """
+        self.github_user = github_user
+        self.github_oauth_token = github_oauth_token
+        self.repo = repo
+        self.auth = (self.github_user, self.github_oauth_token)
+        self.json_data = None
+
+    def cleanstr(self, raw_string, sub_string):
+        """
+        This method is to convert all non-alphanumeric charaters from 
+        raw_string into substring
+        """
+        clean = re.sub("[^0-9a-zA-Z]", sub_string, raw_string)
+        return clean.lower()
+
+    def count_pages(self, state):
+        """
+        This method is to count how many pages of issues/labels in total
+        state can be "open"/"closed"/"all"
+        """
+        url = 'https://api.github.com/repos/%s/issues' % self.repo
+        response = requests.get(url, {'state': state},
+                                auth=self.auth)
+        assert response.status_code == 200, "Authorization failed"
+        if "link" not in response.headers:
+            return 1
+        return int(self.cleanstr(response.headers['link'], " ").split()[-3])
+    
+    def fetch_issues(self, issue_nums):
+        """
+        This method is to fetch issues data
+        issue_num: a list of issue ids
+        return issues' data in pandas dataframe format
+        """
+        assert issue_nums != [], "Empty Input!"
+        logging.info("Reading issues:{}".format(", ".join([str(num) for num in issue_nums])))
+        data = []
+        for number in issue_nums:
+            url = 'https://api.github.com/repos/' + self.repo + '/issues/' + str(number)
+            response = requests.get(url, auth=self.auth)
+            item = response.json()
+            assert 'title' in item, "{} issues doesn't exist!".format(str(number))
+            data += [{'id': str(number),'title': item['title'], 'body': item['body']}]
+        return pd.DataFrame(data)
+
+    def data2json(self,state,labels=None, other_labels = False):
+        """
+        This method is to store issues' data into a json file, return json file's name
+        state can be either "open"/"closed"/"all"
+        labels is a list of target labels we are interested in
+        other_labels can be either "True"/"False"
+        """
+        assert state in set(['all', 'open', 'closed']), "Invalid State!"
+        logging.info("Reading {} issues..".format(state))
+        pages = self.count_pages(state)
+        data = []
+        for x in range(1, pages+1):
+            url = 'https://api.github.com/repos/' + self.repo + '/issues?page=' + str(x) \
+                  + '&per_page=30'.format(repo=self.repo)
+            response = requests.get(url,
+                                    {'state':state,
+                                     'base':'master',
+                                     'sort':'created'},
+                                     auth=self.auth)
+            for item in response.json():
+                if "pull_request" in item:
+                    continue
+                if "labels" in item:
+                    issue_labels=list(set([item['labels'][i]['name'] for i in range(len(item['labels']))]))
+                else:
+                    continue
+                if labels!= None:
+                    # fetch issue which has at least one target label
+                    for label in labels:
+                        if label in issue_labels:
+                            if other_labels:
+                                # besides target labels, we still want other labels
+                                data += [{'id': item['number'],'title': item['title'], 'body': item['body'], 'labels': issue_labels}]
+                            else:
+                                # only record target labels
+                                if(label in set(["Feature", "Call for Contribution", "Feature request"])):
+                                    label = "Feature"
+                                data += [{'id': item['number'],'title': item['title'], 'body': item['body'], 'labels': label}]
+                            # if have this break, then we only pick up the first target label
+                            break
+                else:
+                    # fetch all issues
+                    data += [{'id': item['number'],'title': item['title'], 'body': item['body'], 'labels': issue_labels}]                                      
+        self.json_data = data
+        s_labels = "_".join(labels) if labels!=None else "all_labels"
+        filename = "{}_data.json_{}".format(state,s_labels)
+        logging.info("Writing json file..")
+        with open(filename,'w') as write_file:
+            json.dump(data, write_file)
+        logging.info("{} json file is ready!".format(filename))
+        return filename
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Dockerfile b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Dockerfile
new file mode 100644
index 00000000000..b8d740de214
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.6.6
+
+# Update packages
+RUN apt-get update
+
+# Install prerequisite for matplotlib
+RUN apt-get -y install libxft-dev libfreetype6 libfreetype6-dev
+
+# Bundle app source
+COPY . /src
+
+EXPOSE 8000
+WORKDIR /src
+
+#install Python modules
+RUN pip install -r requirements.txt
+
+# Run it
+ENTRYPOINT ["python", "application.py"]
+
+# Environment Variables
+ENV github_user your_github_id
+ENV github_oauth_token your_github_read_only_token
+ENV repo repo_name
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Predictor.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Predictor.py
new file mode 100644
index 00000000000..9c56dd435ab
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Predictor.py
@@ -0,0 +1,144 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from sklearn.preprocessing           import LabelEncoder
+from SentenceParser                  import SentenceParser
+from DataFetcher                     import DataFetcher
+import numpy  as np
+import pickle
+import re
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+
+class Predictor:
+	# keywords will be used to apply rule-based algorithms
+	keywords = {"ci":["ci", "ccache", "jenkins"],
+				"flaky":["flaky"],
+				"gluon":["gluon"],
+				"coda":["cuda", "cudnn"],
+				"scala":["scala"],
+				"mkldnn":["mkldnn, mkl"],
+				"onnx":["onnx"]}
+
+
+	def __init__(self):
+		"""
+		Predictor serves to apply rule-based and ML algorithms to predict labels
+		"""
+		self.tv = None
+		self.labels = None
+		self.clf = None
+		self.reload()
+
+
+	def reload(self):
+		"""
+		This method is to load models
+		"""
+		self.tv = pickle.load(open("/tmp/Vectorizer.p", "rb"))
+		self.labels = pickle.load(open("/tmp/Labels.p", "rb"))
+		self.clf = pickle.load(open("/tmp/Classifier.p", "rb"))
+
+
+	def tokenize(self, row):
+		"""
+		This method is to tokenize a sentence into a list of words
+		Args:
+			row(string): a sentence
+		Return:
+			words(list): a list of words
+		"""
+		row = re.sub('[^a-zA-Z0-9]', ' ', row).lower()
+		words = set(row.split())
+		return words
+
+
+	def rule_based(self, issues):
+		"""
+		This method applies rule_based algorithms to predict labels
+		Args: 
+			issues(list): a list of issue numbers
+		Return:
+		 	rule_based_predictions(list of lists): labels which satisfy rules
+		"""
+		DF = DataFetcher()
+		df_test = DF.fetch_issues(issues)
+		rule_based_predictions = []
+		for i in range(len(issues)):
+			# extract every issue's title
+			row = df_test.loc[i, 'title']
+			# apply rule-based algorithms
+			single_issue_predictions = []
+			if "feature request" in row.lower(): single_issue_predictions.append("Feature") 
+			if "c++" in row.lower(): single_issue_predictions.append("C++")
+			tokens = self.tokenize(row)
+			for k, v in self.keywords.items():
+				for keyword in v:
+					if keyword in tokens:
+						single_issue_predictions.append(k)
+			rule_based_predictions.append(single_issue_predictions)
+		return rule_based_predictions
+
+
+	def ml_predict(self, issues, threshold=0.3):
+		"""
+		This method applies machine learning algorithms to predict labels
+		Args: 
+			issues(list): a list of issue numbers
+			threshold(float): threshold of probability
+		Return:
+			ml_predictions(list of lists): predictions 
+		"""
+		# step1: fetch data
+		DF = DataFetcher()
+		df_test = DF.fetch_issues(issues)
+		# step2: data cleaning
+		SP = SentenceParser()
+		SP.data = df_test
+		SP.clean_body('body', True, True)
+		SP.merge_column(['title', 'title', 'title', 'body'], 'train')
+		test_text=SP.process_text('train', True, False, True)
+		# step3: word embedding
+		test_data_tfidf = self.tv.transform(test_text).toarray()
+		le = LabelEncoder()
+		le.fit_transform(self.labels)
+		# step4: classification
+		probs = self.clf.predict_proba(test_data_tfidf)
+		# pick up top 2 predictions which exceeds threshold
+		best_n = np.argsort(probs, axis=1)[:, -2:]
+		ml_predictions=[]
+		for i in range(len(best_n)):
+			# INFO:Predictor:issue:11919,Performance:0.47353076240017744,Question:0.2440056213336274
+			logging.info("issue:{}, {}:{}, {}:{}".format(str(issues[i]), str(le.classes_[best_n[i][-1]]), str(probs[i][best_n[i][-1]]),
+						str(le.classes_[best_n[i][-2]]), str(probs[i][best_n[i][-2]])))
+			single_issue_predictions = [le.classes_[best_n[i][j]]  for j in range(-1, -3, -1) if probs[i][best_n[i][j]] > threshold]
+			ml_predictions.append(single_issue_predictions)
+		return ml_predictions
+
+
+	def predict(self, issues):
+		# return predictions of both rule_base algorithms and machine learning methods
+		rule_based_predictions = self.rule_based(issues)
+		ml_predictions = self.ml_predict(issues)
+		predictions = [list(set(rule_based_predictions[i]+ml_predictions[i])) for i in range(len(ml_predictions))]
+		return predictions
+
+		
+
+
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/README.md b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/README.md
new file mode 100644
index 00000000000..b3d4b5f161f
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/README.md
@@ -0,0 +1,25 @@
+# Elastic Beanstalk Web Server
+
+A web server built on [AWS Elastic Beanstalk](https://aws.amazon.com/elasticbeanstalk/) which can response to GET/POST requests and realize self-maintenance. It mainly has 2 features:
+  * Train models: it will retrain Machine Learning models every 24 hours automatically using latest data.
+  * Predict labels: once it receives GET/POST requests with issues ID, it will send predictions back.
+
+## Set up
+*Make sure you are in current directory.*
+* Configure Dockerfile: In `Dockerfile`. Set environment variables (last 3 lines) with real `github_user`, `github_oauth_token` and `repo`.
+* Open terminal, run:
+```bash
+zip eb.zip application.py cron.yaml DataFetcher.py \
+Dockerfile Dockerrun.aws.json plot_piechart.py Predictor.py SentenceParser.py Trainer.py \
+requirements.txt stopwords.txt
+```
+It will zip all needed files into `eb.zip`
+* Manually create a new Elastic Beanstalk application.
+    1. Go to AWS Elastic Beanstalk console, click ***Create New Application***. Fill in *Application Name* and *Description*, click ***Create***.
+    2. Under ***Select environment tier***, select ***Web server environment***, click ***Select***.
+    3. Under **Base configuration**, select **Preconfigured platform**. In its dropdown, select **Docker**. Then select ***Upload your code***, upload `eb.zip`.
+    4. Click ***Configure more options***. Modify Intances, in the dropdown of Instance type, select t2.large. Click ***Create Environment*** (No need to select a security group, EB will create one.)
+    5. It will take about 2 minutes to setup the environment. 
+    6. Once the environment is setup, it will take 5-10 minutes to generate models. 
+    7. Write down URL. (ie: http://labelbot-env.pgc55xzpte.us-east-1.elasticbeanstalk.com)
+    
\ No newline at end of file
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/SentenceParser.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/SentenceParser.py
new file mode 100644
index 00000000000..e96147f0dc2
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/SentenceParser.py
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script serves to do data cleaning
+from bs4 import BeautifulSoup
+import logging
+import nltk
+# fix ssl certificate errors
+import ssl
+try:
+    _create_unverified_https_context = ssl._create_unverified_context
+except AttributeError:
+    pass
+else:
+    ssl._create_default_https_context = _create_unverified_https_context
+import os.path
+import pandas as pd
+import re
+import sys
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+# English Stopwords
+with open('stopwords.txt') as file:
+    stopwords = file.read().split()
+file.close()
+
+
+class SentenceParser:
+
+    regex_str = [
+        r'<[^>]+>',                                                                     # HTML tags
+        r'(?:@[\w_]+)',                                                                 # @-mentions
+        r"(?:\#+[\w_]+[\w\'_\-]*[\w_]+)",                                               # hash-tags
+        r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&amp;+]|[!*\(\),]|(?:%[0-9a-f][0-9a-f]))+',   # URLs
+        r'(?:(?:\d+,?)+(?:\.?\d+)?)',                                                   # numbers
+        r"(?:[a-z][a-z'\-_]+[a-z])",                                                    # words with - and '
+        r'(?:[\w_]+)',                                                                  # other words
+        r'(?:\S)'                                                                       # anything else
+    ]
+
+    def __init__(self):
+        """
+        SentenceParser serves to clean text content
+        """
+        self.data = None
+        # extract words stem
+        self.porter = nltk.PorterStemmer()
+        # a set of stopwords
+        self.stops = set(stopwords)
+
+    def read_file(self, filepath, filetype, encod='ISO-8859-1', header=None):
+        """
+        This method is to read csv/json/xlsx files
+        """
+        logging.info('Start reading File')
+        if not os.path.isfile(filepath):
+            logging.error("File Not Exist!")
+            sys.exit()
+        if filetype == 'csv':
+            df = pd.read_csv(filepath, encoding=encod, header=header)
+        elif filetype == 'json':
+            df = pd.read_json(filepath, encoding=encod, lines=False)
+        elif filetype == 'xlsx':
+            df = pd.read_excel(filepath, encoding=encod, header=header)
+        else:
+            logging.error("Extension Type not Accepted!")
+            sys.exit()
+
+        logging.debug(df)
+        self.data = df
+
+    def merge_column(self, columns, name):
+        """
+        This method is to merge columns of a pandas dataframe
+        """
+        logging.info('Merge headers %s to %s', str(columns), name)
+        self.data[name] = ''
+        for header in columns:
+            self.data[name] += ' ' + self.data[header]
+  
+    def clean_body(self, column, remove_template=True, remove_code=True):
+        """
+        This methods is to remove template and code from issue's body
+        """
+        logging.info("Start Removing Templates..")
+        for i in range(len(self.data)):
+            # remove 'Environment info' part
+            if remove_template and "## Environment info" in self.data[column][i]:
+                index = self.data.loc[i, column].find("## Environment info")
+                self.data.loc[i, column] = self.data.loc[i, column][:index]
+            # remove code
+            if remove_code and "```" in self.data[column][i]:
+                sample = self.data[column][i].split("```")
+                sample = [sample[i*2] for i in range(0, int((len(sample)+1)/2))]
+                self.data.loc[i,column] = " ".join(sample)
+
+    def process_text(self, column, remove_symbol=True, remove_stopwords=False, stemming=False):
+        """
+        This method is to remove symbols/remove stopwords/extract words stem
+        """
+        logging.info("Start Data Cleaning...")
+        # remove some symbols
+        self.data[column] = self.data[column].str.replace(r'[\n\r\t]+', ' ')
+        # remove URLs
+        self.data[column] = self.data[column].str.replace(self.regex_str[3], ' ')
+        tempcol = self.data[column].values.tolist()
+
+        for i in range(len(tempcol)):
+            row = BeautifulSoup(tempcol[i], 'html.parser').get_text().lower()
+            # remove symbols
+            if remove_symbol:
+                row = re.sub('[^a-zA-Z]', ' ', row)
+            words = row.split()
+            # remove stopwords
+            if remove_stopwords:
+                words = [w for w in words if w not in self.stops and not w.replace('.', '', 1).isdigit()]
+            # extract words stem
+            if stemming:
+                words = [self.porter.stem(w) for w in words] 
+            row = ' '.join(words)
+            tempcol[i] = row.lower()
+        return tempcol
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Trainer.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Trainer.py
new file mode 100644
index 00000000000..2f8c9409091
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/Trainer.py
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is served to train Machine Learning models
+from DataFetcher import DataFetcher
+from SentenceParser import SentenceParser
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import SVC
+from sklearn.naive_bayes import GaussianNB
+from sklearn.preprocessing import LabelEncoder
+import pickle
+import logging
+logging.basicConfig(level=logging.INFO)
+
+
+class Trainer:
+
+    def __init__(self):
+        """
+        Trainer is to train issues using Machine Learning methods.
+        self.labels(list): a list of target labels
+        self.tv: TFIDF model (trigram, max_features = 10000)
+        self.clf: Classifier (SVC, kenerl = 'rbf')
+        """
+        self.labels = ["Performance", "Test", "Question",
+                       "Feature request", "Call for contribution",
+                       "Feature", "Example", "Doc",
+                       "Installation", "Build", "Bug"]
+        self.tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000)
+        self.clf = SVC(gamma=0.5, C=100, probability=True)
+
+    def train(self):
+        """
+        This method is to train and save models.
+        """
+        logging.info("Start training issues of general labels")
+        # Step1: Fetch issues with general labels
+        logging.info("Fetching Data..")
+        DF = DataFetcher()
+        filename = DF.data2json('all', self.labels, False)
+        # Step2: Clean data
+        logging.info("Cleaning Data..")
+        SP = SentenceParser()
+        SP.read_file(filename, 'json')
+        SP.clean_body('body', True, True)
+        SP.merge_column(['title', 'title', 'title', 'body'], 'train')
+        text = SP.process_text('train', True, False, True)
+        df = SP.data
+        # Step3: Word Embedding
+        logging.info("Word Embedding..")
+        # tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000)
+        tv = self.tv
+        X = tv.fit_transform(text).toarray()
+        # Labels
+        labels = SP.data['labels']
+        le = LabelEncoder()
+        Y = le.fit_transform(labels)
+        # Step4: Train Classifier
+        # SVC, kernel = 'rbf'
+        logging.info("Training Data..")
+        #clf = SVC(gamma=0.5, C=100, probability=True)
+        clf = self.clf
+        clf.fit(X, Y)
+        # Step5: save models
+        logging.info("Saving Models..")
+        pickle.dump(tv, open("/tmp/Vectorizer.p", "wb"))
+        pickle.dump(clf, open("/tmp/Classifier.p", "wb"))
+        pickle.dump(labels, open("/tmp/Labels.p", "wb"))
+        logging.info("Completed!")
+        return
+
+
+
+
+
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/application.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/application.py
new file mode 100644
index 00000000000..e7d69119a14
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/application.py
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This is a web server built based on Flask framework and AWS Elastic Beanstalk service 
+# It will response to http GET/POST requests
+from flask import Flask, jsonify, request, send_file
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.triggers.interval import IntervalTrigger
+from Predictor import Predictor
+from Trainer import Trainer
+import plot_piechart
+import timeit
+import atexit
+import logging
+import os.path
+
+logging.basicConfig(level=logging.INFO)
+
+application = Flask(__name__)
+
+if not os.path.exists('/tmp/Classifier.p'):
+    trainer = Trainer()
+    trainer.train()
+predictor = Predictor()
+
+# GET '/'
+@application.route('/')
+def index():
+    return "Hello!  -Bot"
+
+
+# GET '/issues/<issue>'
+# return predictions of an issue
+@application.route('/issues/<issue>')
+def get_prediction(issue):
+    l = predictor.predict([issue])
+    return " ".join(l[0])
+
+
+# POST '/predict'
+# return predictions of issues
+@application.route('/predict', methods=['POST'])
+def predict():
+    # get prediction results of multiple issues
+    # data would be a json file {"issues":[1,2,3]}
+    data = request.get_json()["issues"]
+    #predictions = predict_labels.predict(data)
+    predictions = predictor.predict(data)
+    response = []
+    for i in range(len(data)):
+        response.append({"number":data[i], "predictions":predictions[i]})
+    return jsonify(response)
+
+
+# POST '/draw'
+# return an image's binary code
+@application.route('/draw', methods=['POST'])
+def plot():
+    # requests.post(url,json={"fracs":[], "labels":[]})
+    data = request.get_json()
+    fracs = data["fracs"]
+    labels = data["labels"]
+    filename = plot_piechart.draw_pie(fracs, labels)
+    return send_file(filename, mimetype='image/png')
+
+
+# helper function
+def train_models():
+    start = timeit.default_timer()
+    trainer = Trainer()
+    trainer.train()
+    stop = timeit.default_timer()
+    # reload models
+    predictor.reload()
+    time = int(stop - start)
+    logging.info("Training completed! Time cost: {} min, {} seconds".format(str(int(time/60)), str(time%60)))
+    return 
+
+
+# Once the server is running, it will retrain ML models every 24 hours
+@application.before_first_request
+def initialize():
+    scheduler = BackgroundScheduler()
+    scheduler.start()
+    scheduler.add_job(
+        func=train_models,
+        trigger=IntervalTrigger(hours=24),
+        id='Training_Job',
+        name='Update models every 24 hours',
+        replace_existing=True)
+    # Shut down the scheduler when exiting the app
+    atexit.register(lambda: scheduler.shutdown())
+
+
+initialize()
+
+
+# run the app.
+if __name__ == "__main__":
+    # Set debug to True enables debug output.
+    # This 'application.debug = True' should be removed before deploying a production app.
+    application.debug = True
+    application.threaded = True
+    application.run('0.0.0.0', 8000)
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/cron.yaml b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/cron.yaml
new file mode 100644
index 00000000000..f47da886a99
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/cron.yaml
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+version: 1
+cron: 
+  - name: "task1"
+    url: "/scheduled"
+    schedule: "* * * * *"
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/plot_piechart.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/plot_piechart.py
new file mode 100644
index 00000000000..9ffd89842de
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/plot_piechart.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime
+import matplotlib
+# set 'agg' as matplotlib backend
+matplotlib.use('agg', warn=False, force=True)
+from matplotlib import pyplot as plt
+import logging
+logging.basicConfig(level=logging.INFO)
+
+
+def make_autopct(values):
+    def my_autopct(pct):
+        total = sum(values)
+        val = int(round(pct * total / 100.0))
+        return '{p:.2f}% ({v:d})'.format(p=pct, v=val)
+
+    return my_autopct
+
+
+def draw_pie(fracs, labels):
+    """
+    This method is to plot the pie chart of labels, then save it into '/tmp/' folder
+    """
+    logging.info("Drawing the pie chart..")
+    fig = plt.figure()
+    plt.pie(fracs, labels=labels, autopct=make_autopct(fracs), shadow=True)
+    plt.title("Top 10 labels for newly opened issues")
+    figname = "piechart_{}_{}.png".format(str(datetime.datetime.today().date()),
+                                          str(datetime.datetime.today().time()))
+    fig.savefig("/tmp/{}".format(figname))
+    pic_path = "/tmp/{}".format(figname)
+    return pic_path
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_datafetcher.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_datafetcher.py
new file mode 100644
index 00000000000..fff2845a5dc
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_datafetcher.py
@@ -0,0 +1,114 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import unittest
+import boto3
+import pandas as pd
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from DataFetcher import DataFetcher
+from pandas.util.testing import assert_frame_equal
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+# test coverage: 93%
+class TestLabelBot(unittest.TestCase):
+
+	def setUp(self):
+		self.df = DataFetcher()
+		self.df.repo = "apache/incubator-mxnet"
+		self.df.github_user = "cathy"
+		self.df.github_oauth_token = "123"
+
+	def tearDown(self):
+		pass
+
+	def test_cleanstr(self):
+		new_string = self.df.cleanstr("a_b", "")
+		self.assertEqual(new_string, "ab")
+
+	def test_count_pages(self):
+		with patch('DataFetcher.requests.get') as mocked_get:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = [{ "body":"issue's body",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11925,
+							 					"labels":[{'name':'Doc'}],
+							  					"state":"open",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  },
+							  				  { "body":"issue's body",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11924,
+							 					"labels":[],
+							  					"state":"closed",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  }]
+			page = self.df.count_pages('all')
+			self.assertEqual(page,1)
+
+	def test_fetch_issues(self):
+		with patch('DataFetcher.requests.get') as mocked_get:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = { "body":"issue's body",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11925,
+							 					"labels":[{'name':'Feature'}],
+							  					"state":"open",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  }
+			data = self.df.fetch_issues([11925])
+			expected_data = [{'id':"11925", 'title':"issue's title",'body':"issue's body"}]
+			assert_frame_equal(data, pd.DataFrame(expected_data))
+
+	def test_data2json(self):
+		with patch('DataFetcher.requests.get') as mocked_get:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = [{ "body":"issue's body",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11925,
+							 					"labels":[{'name':'Feature'}],
+							  					"state":"open",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  },
+							  				  { "body":"issue's body",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11924,
+							 					"labels":[],
+							  					"state":"closed",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  }]
+			self.df.data2json('all', labels=["Feature"], other_labels=False)
+			expected_data = [{'id': 11925, 'title': "issue's title", 'body': "issue's body", 'labels': 'Feature'}] 						 
+			self.assertEqual(expected_data, self.df.json_data)
+
+if __name__ == "__main__":
+	unittest.main()
\ No newline at end of file
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_predictor.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_predictor.py
new file mode 100644
index 00000000000..ec2e59304ed
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_predictor.py
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import unittest
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from Predictor import Predictor
+from DataFetcher import DataFetcher
+from SentenceParser import SentenceParser
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+# test coverage: 100%
+class TestLabelBot(unittest.TestCase):
+
+	def setUp(self):
+		self.pr = Predictor()
+
+	def tearDown(self):
+		pass
+
+	def test_tokenize(self):
+		words = self.pr.tokenize("hello_world")
+		self.assertEqual(words, set(['hello','world']))
+
+	def test_rule_based(self):
+		with patch('DataFetcher.requests.get') as mocked_get:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = { "body":"issue's body",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11925,
+							 					"labels":[{'name':'Doc'}],
+							  					"state":"open",
+							  					"title":"a feature requests for scala package",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  }
+			predictions = self.pr.rule_based([11925])
+			self.assertEqual([['Feature','scala']], predictions)
+
+	def test_ml_predict(self):
+		with patch('DataFetcher.requests.get') as mocked_get:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = { "body":"test",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11925,
+							 					"labels":[{'name':'Doc'}],
+							  					"state":"open",
+							  					"title":"a feature requests for scala package",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  }
+			predictions=self.pr.ml_predict([11925])
+			self.assertEqual([['Feature']], predictions)
+
+	def test_predict(self):
+		with patch('DataFetcher.requests.get') as mocked_get:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = { "body":"test",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11925,
+							 					"labels":[{'name':'Doc'}],
+							  					"state":"open",
+							  					"title":"a feature requests for scala package",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  }
+			predictions = self.pr.predict([11925])
+			self.assertEqual([['Feature','scala']], predictions)
+
+if __name__ == "__main__":
+	unittest.main()
\ No newline at end of file
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_sentenceparse.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_sentenceparse.py
new file mode 100644
index 00000000000..4a8d21ff9b3
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_sentenceparse.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import unittest
+import boto3
+import pandas as pd
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from SentenceParser import SentenceParser
+from pandas.util.testing import assert_frame_equal
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+# test coverage: 88%
+class TestSentenceParser(unittest.TestCase):
+
+	def setUp(self):
+		self.sp = SentenceParser()
+		self.sp.data = pd.DataFrame([{'id': 11925, 'title': "issue's title", 
+									  'body': " bug ``` import pandas``` ## Environment info", 
+									  'labels': ['Doc']}])
+
+	def test_read_file(self):
+		self.sp.read_file('all_data.json_Feature', 'json')
+		expected_data = [{'id': 11925, 'title': "issue's title", 'body': "issue's body", 'labels': ['Doc']}, 
+						 {'id': 11924, 'title': "issue's title", 'body': "issue's body", 'labels': []}]
+		assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+	def test_merge_column(self):						 
+		self.sp.merge_column(['title', 'body'], 'train')
+		expected_data = [{'id': 11925, 'title': "issue's title", 'body': " bug ``` import pandas``` ## Environment info", 
+						  'labels': ['Doc'],
+						  'train': " issue's title  bug ``` import pandas``` ## Environment info"}]
+		assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+	def test_clean_body(self):
+		self.sp.clean_body('body', True, True)
+		expected_data = [{'id': 11925, 'title': "issue's title", 'body': " bug   ", 'labels': ['Doc']}]
+		assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+	def test_process_text(self):
+		data = self.sp.process_text('body', True, True, True)
+		expected_data = ['bug import panda environ info']
+		self.assertEqual(data, expected_data)
+
+
+if __name__ == "__main__":
+	unittest.main()
\ No newline at end of file
diff --git a/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_trainer.py b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_trainer.py
new file mode 100644
index 00000000000..4fd10e85422
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/elastic_beanstalk_server/test_trainer.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import unittest
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from DataFetcher import DataFetcher
+from Trainer import Trainer
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+# test coverage: 100%
+class TestTrainer(unittest.TestCase):
+
+	def setUp(self):
+		self.trainer = Trainer()
+
+	def test_train(self):
+		with patch('DataFetcher.requests.get') as mocked_get:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = [{ "body":"I was looking at the mxnet.\
+												metric source code and documentation",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11925,
+							 					"labels":[{'name':'Doc'}],
+							  					"state":"open",
+							  					"title":"Confusion in documentation/implementation of F1, MCC metrics",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  },
+							  				  { "body":"I train a CNN with python under mxnet gluon mys C++ code crash when i call MXPredsetInput.",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11924,
+							 					"labels":[{'name':'Bug'}],
+							  					"state":"closed",
+							  					"title":"Issue in exporting gluon model",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11924",
+							  				  }]
+			self.trainer.train()
+
+if __name__ == "__main__":
+	unittest.main()
\ No newline at end of file
diff --git a/mxnet-bot/LabelBotPredict/label_bot_send_predictions/LabelBot.py b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/LabelBot.py
new file mode 100644
index 00000000000..341d4599b90
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/LabelBot.py
@@ -0,0 +1,334 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+from collections import defaultdict
+from botocore.vendored import requests
+from botocore.exceptions import ClientError
+from email.mime.multipart import MIMEMultipart
+from email.mime.image import MIMEImage
+from email.mime.text import MIMEText
+import datetime
+import operator
+import os
+import boto3
+import re
+import logging
+logging.basicConfig(level=logging.INFO)
+
+
+class LabelBot:
+
+    def __init__(self, img_file="/tmp/img_file.png",
+                 github_user = os.environ.get("github_user"),
+                 github_oauth_token = os.environ.get("github_oauth_token"),
+                 repo = os.environ.get("repo"),
+                 sender = os.environ.get("sender"),
+                 recipients = os.environ.get("recipients"),
+                 aws_region = os.environ.get('aws_region'),
+                 elastic_beanstalk_url = os.environ.get("eb_url")):
+        """
+        This LabelBot serves to send github issue reports to recipients.
+        Args:
+            img_file(str): the path of image file which will be attached in email content
+            github_user(str): the github id. ie: "CathyZhang0822"
+            github_oauth_token(str): the github oauth token, paired with github_user to realize authorization
+            repo(str): the repo name
+            sender(str): sender's email address must be verifed in AWS SES. ie:"a@email.com"
+            recipients(str): recipients' email address must be verified in AWS SES. ie:"a@email.com, b@email.com"
+            aws_region(str): aws region. ie:"us-east-1"
+            elastic_beanstalk_url: the url of EB web server
+        """
+        self.github_user = github_user
+        self.github_oauth_token = github_oauth_token
+        self.repo = repo
+        self.auth = (self.github_user, self.github_oauth_token)
+        self.sender = sender
+        self.recipients = [s.strip() for s in recipients.split(",")] if recipients else None
+        self.aws_region = aws_region
+        self.elastic_beanstalk_url = elastic_beanstalk_url if elastic_beanstalk_url[-1]!="/" else elastic_beanstalk_url[:-1]
+        self.img_file = img_file
+        self.opendata = None
+        self.closeddata = None
+        self.sorted_open_issues = None
+        self.start = datetime.datetime.strptime("2015-01-01", "%Y-%m-%d")
+        self.end = datetime.datetime.today()
+    
+
+    def __clean_string(self, raw_string, sub_string):
+        """
+        This method is to covert all non-alphanumeric characters from raw_string to sub_string
+        """
+        cleans = re.sub("[^0-9a-zA-Z]", sub_string, raw_string)
+        return cleans.lower()
+
+    def __set_period(self, period):
+        """
+        This method is to set the time period. ie: set_period(7)
+        Because GitHub use UTC time, so we set self.end 2 days after today's date
+        For example:
+        self.today = "2018-07-10 00:00:00"
+        self.end = "2018-07-12 00:00:00"
+        self.start = "2018-07-04 00:00:00"
+        """
+        today = datetime.datetime.strptime(str(datetime.datetime.today().date()), "%Y-%m-%d")
+        self.end = today + datetime.timedelta(days=2)
+        timedelta = datetime.timedelta(days=period)
+        self.start = self.end - timedelta
+
+    def __count_pages(self, obj, state='all'):
+        """
+        This method is to count how many pages of issues/labels in total
+        obj could be "issues"/"labels"
+        state could be "open"/"closed"/"all", available to issues
+        """
+        assert obj in set(["issues", "labels"]), "Invalid Input!"
+        url = 'https://api.github.com/repos/{}/{}'.format(self.repo, obj)
+        if obj == 'issues':
+            response = requests.get(url, {'state': state},
+                                    auth=self.auth)
+        else:
+            response = requests.get(url, auth=self.auth)
+        assert response.status_code == 200, response.status_code
+        if "link" not in response.headers:
+            return 1
+        return int(self.__clean_string(response.headers['link'], " ").split()[-3])
+
+    def read_repo(self, periodically=True):
+        """
+        This method is to read issues in the repo.
+        if periodically == True, it will read issues which are created in a specific time period
+        if periodically == False, it will read all issues
+        """
+        logging.info("Start reading repo")   
+        if periodically:
+            self.__set_period(8)
+        pages = self.__count_pages('issues', 'all')
+        opendata = []
+        closeddata = []
+        stop = False
+        for page in range(1, pages + 1):
+            url = 'https://api.github.com/repos/' + self.repo + '/issues?page=' + str(page) \
+                  + '&per_page=30'.format(repo=self.repo)
+            response = requests.get(url,
+                                    {'state': 'all',
+                                     'base': 'master',
+                                     'sort': 'created',
+                                     'direction': 'desc'},
+                                    auth=self.auth)
+            response.raise_for_status()
+            for item in response.json():
+                if "pull_request" in item:
+                    continue
+                created = datetime.datetime.strptime(item['created_at'], "%Y-%m-%dT%H:%M:%SZ")
+                if self.start <= created <= self.end:
+                    if item['state'] == 'open':
+                        opendata.append(item)
+                    elif item['state'] == 'closed':
+                        closeddata.append(item)
+                else:
+                    stop = True
+                    break
+            if stop:
+                break
+        self.opendata = opendata
+        self.closeddata = closeddata
+
+    def sort(self):
+        """
+        This method is to sort open issues.
+        Returns a dictionary.
+        """
+        self.read_repo(True)
+        assert self.opendata, "No open issues in this time period!"
+        items = self.opendata
+        labelled = []
+        labelled_urls = ""
+        unlabelled = []
+        unlabelled_urls = ""
+        labels = {}
+        labels = defaultdict(lambda: 0, labels)
+        non_responded = []
+        non_responded_urls = ""
+        responded = []
+        responded_urls = ""
+
+        for item in items:
+            url = "<a href='" + item['html_url'] + "'>" + str(item['number']) + "</a>   "
+            if item['labels'] != []:
+                labelled += [{k: v for k, v in item.items()
+                              if k in ['number', 'html_url', 'title']}]
+                labelled_urls = labelled_urls + url
+                for label in item['labels']:
+                    labels[label['name']] += 1
+            else:
+                unlabelled += [{k: v for k, v in item.items()
+                                if k in ['number', 'html_url', 'title']}]
+                unlabelled_urls = unlabelled_urls + url
+            if item['comments'] == 0:
+                non_responded += [{k: v for k, v in item.items()
+                                   if k in ['number', 'html_url', 'title']}]
+                non_responded_urls = non_responded_urls + url
+            else:
+                responded += [{k: v for k, v in item.items()
+                               if k in ['number', 'html_url', 'title']}]
+                responded_urls = responded_urls + url
+        labels['unlabelled'] = len(unlabelled)
+        data = {"labelled": labelled,
+                "labels": labels,
+                "labelled_urls": labelled_urls,
+                "unlabelled": unlabelled,
+                "unlabelled_urls": unlabelled_urls,
+                "non_responded": non_responded,
+                "non_responded_urls": non_responded_urls,
+                "responded": responded,
+                "responded_urls": responded_urls}
+        self.sorted_open_issues = data
+        return data
+
+    def predict(self):
+        """
+        This method is to send POST requests to EB web server.
+        Then EB web server will send predictions of unlabeled issues back.
+        Returns a json:
+        ie: [{"number":11919, "predictions":["doc"]}]
+        """
+        assert self.sorted_open_issues, "Please call .sort()) first"
+        data = self.sorted_open_issues
+        unlabeled_data_number = [item['number'] for item in data["unlabelled"]]
+        logging.info("Start predicting labels for: {}".format(str(unlabeled_data_number)))
+        url = "{}/predict".format(self.elastic_beanstalk_url)
+        response = requests.post(url, json={"issues": unlabeled_data_number})
+        logging.info(response.json())
+        return response.json()
+
+    def __html_table(self, lol):
+        """
+        This method is to generate html table.
+        Args:
+            lol(list of lists): table content
+        """
+        yield '<table style="width: 500px;">'
+        for sublist in lol:
+            yield '  <tr><td style = "width:200px;">'
+            yield '    </td><td style = "width:300px;">'.join(sublist)
+            yield '  </td></tr>'
+        yield '</table>'
+
+    def __bodyhtml(self):
+        """
+        This method is to generate body html of email content
+        """
+        self.sort()
+        data = self.sorted_open_issues
+        all_labels = data['labels']
+        sorted_labels = sorted(all_labels.items(), key=operator.itemgetter(1), reverse=True)
+        labels = [item[0] for item in sorted_labels[:10]]
+        fracs = [item[1] for item in sorted_labels[:10]]
+        url = "{}/draw".format(self.elastic_beanstalk_url)
+        pic_data = {"fracs": fracs, "labels": labels}
+        response = requests.post(url, json=pic_data)
+        if response.status_code == 200:
+            with open(self.img_file, "wb") as f:
+                f.write(response.content)
+        htmltable = [["Count of issues with no response:", str(len(data['non_responded']))],
+                     ["List of issues with no response:", data['non_responded_urls']],
+                     ["Count of unlabeled issues:", str(len(data['unlabelled']))],
+                     ["List of unlabeled issues:", data['unlabelled_urls']]
+                     ]
+        htmltable2 = [[str(item['number']), ",".join(item['predictions'])] for item in self.predict()]
+        body_html = """<html>
+        <head>
+        </head>
+        <body>
+          <h4>Week: {} to {}</h4>
+          <p>{} newly issues were opened in the above period, among which {} were closed and {} are still open.</p>
+          <div>{}</div>
+          <p>Here are the recommanded labels for unlabeled issues:</p>
+          <div>{}</div>
+          <p><img src="cid:image1" width="400" height="400"></p>
+        </body>
+        </html>
+                    """.format(str(self.start.date()), str((self.end - datetime.timedelta(days=2)).date()),
+                               str(len(self.opendata) + len(self.closeddata)),
+                               str(len(self.closeddata)), str(len(self.opendata)),
+                               "\n".join(self.__html_table(htmltable)),
+                               "\n".join(self.__html_table(htmltable2)))
+        return body_html
+
+    def sendemail(self):
+        """
+        This method is to send emails.
+        The email content contains 2 html tables and an image.
+        """
+        sender = self.sender
+        recipients = self.recipients
+        aws_region = self.aws_region
+        # The subject line for the email.
+        subject = "weekly report"
+        # The email body for recipients with non-HTML email clients.
+        body_text = "weekly report"
+        # The HTML body of the email.
+        body_html = self.__bodyhtml()
+        # The character encoding for the email.
+        charset = "utf-8"
+        # Create a new SES resource and specify a region.
+        client = boto3.client('ses', region_name=aws_region)
+        # Create a multipart/mixed parent container.
+        msg = MIMEMultipart('mixed')
+        # Add subject, from and to lines
+        msg['Subject'] = subject
+        msg['From'] = sender
+        msg['To'] = ",".join(recipients)
+
+        # Create a multiparter child container
+        msg_body = MIMEMultipart('alternative')
+
+        # Encode the text and HTML content and set the character encoding. This step is
+        # necessary if you're sending a message with characters outside the ASCII range.
+        textpart = MIMEText(body_text.encode(charset), 'plain', charset)
+        htmlpart = MIMEText(body_html.encode(charset), 'html', charset)
+
+        # Add the text and HTML parts to the child container
+        msg_body.attach(textpart)
+        msg_body.attach(htmlpart)
+
+        # Attach the multipart/alternative child container to the multipart/mixed parent container
+        msg.attach(msg_body)
+
+        # Attach Image
+        fg = open(self.img_file, 'rb')
+        msg_image = MIMEImage(fg.read())
+        fg.close()
+        msg_image.add_header('Content-ID', '<image1>')
+        msg.attach(msg_image)
+        try:
+            # Provide the contents of the email.
+            response = client.send_raw_email(
+                Source=sender,
+                Destinations=recipients,
+                RawMessage={
+                    'Data': msg.as_string(),
+                },
+            )
+            logging.info("Email sent! Message ID:")
+            logging.info(response['MessageId'])
+        # Display an error if something goes wrong. 
+        except ClientError as e:
+            logging.exception(e.response['Error']['Message'])
+
+
diff --git a/mxnet-bot/LabelBotPredict/label_bot_send_predictions/README.md b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/README.md
new file mode 100644
index 00000000000..0f52a86a9a3
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/README.md
@@ -0,0 +1,40 @@
+# Send Daily Reports
+
+Automatically send daily [GitHub issue](https://github.com/apache/incubator-mxnet/issues) reports of repo using [Amazon Simple Email Service](https://docs.aws.amazon.com/ses/latest/DeveloperGuide/quick-start.html) and [AWS Lambda](https://aws.amazon.com/lambda/).
+
+## Set up
+*Deploy the lambda function using the serverless framework*
+* Configure ***serverless.yml***
+    1. Under ***provider***, replace ***region*** with your aws region
+    2. Under ***environment***
+        1. replace ***github_user*** with your github id ie:"CathyZhang0822"
+        2. replace ***github_oath_token*** with your READ ONLY access token
+        3. replece ***repo*** with the repo's name. ie:"apache/incubator-mxnet"
+        4. replace ***sender*** with the sender's email
+        5. replace ***recipients*** with recipients emails, seperated by comma. ie:"a@email.com, b@email.com"
+        6. replace ***aws_region*** with the same aws region in ***provider***
+        7. replace ***eb_url*** with your Elastic Beanstalk application's URL
+* Deploy
+Open terminal, go to current directory. run
+```bash
+serverless deploy
+```
+Then it will set up those AWS services:
+* An IAM role for label bot with policies:
+```
+1.ses:SendEmail
+2.ses:SendTemplagedEmail
+3.ses:SendRawEmail 
+4.cloudwatchlogs:CreateLogStream
+5.cloudwatchlogs:PutLogEvents
+```
+* A Lambda function will all code needed.
+* A CloudWatch event which will trigger the lambda function everyday at 14:59 UTC. 
+
+##Send Test Email
+* Go to this lambda function's console, make sure environment variables are filled in correctly. click **Test**
+* Then you will receive the email:
+    <div align="center">
+        <img src="https://s3-us-west-2.amazonaws.com/email-boy-images/Screen+Shot+2018-08-04+at+7.00.52+PM.png"><br>
+    </div>
+
diff --git a/mxnet-bot/LabelBotPredict/label_bot_send_predictions/lambda_function.py b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/lambda_function.py
new file mode 100644
index 00000000000..1266a5ac9d8
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/lambda_function.py
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The lambda_function
+from LabelBot import LabelBot 
+
+
+def lambda_handler(event, context):
+    lb = LabelBot()
+    lb.sendemail()
+    return ("Hello from Lambda")
\ No newline at end of file
diff --git a/mxnet-bot/LabelBotPredict/label_bot_send_predictions/serverless.yml b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/serverless.yml
new file mode 100644
index 00000000000..cabc07843bc
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/serverless.yml
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# Configurations
+
+service: LabelBotSendEmail
+
+package:
+  exclude:
+    - ./**
+  include:
+    - LabelBot.py
+    - lambda_function.py
+
+provider:
+  name: aws
+  # replace region with your aws region
+  region: us-east-1
+  runtime: python3.6
+  timeout: 300
+  iamRoleStatements:
+    -  Effect: "Allow"
+       Action:
+         - "ses:SendEmail"
+         - "ses:SendTemplatedEmail"
+         - "ses:SendRawEmail"
+       Resource: "*"
+
+functions:
+  label:
+    handler: lambda_function.lambda_handler
+
+    events:
+      # UTC 14:59
+      - schedule: cron(59 14 * * ? *)
+    environment:
+    # replace github_user with your github id ie:"CathyZhang0822"
+      github_user : "github_id"
+    # replace github_oath_token with your READ ONLY access token
+      github_oauth_token : "read_only_access_token"
+    # replace repo with "apache/incubator-mxnet"
+      repo : "repo_name"
+    # replace sender with the sender's email
+      sender : "a@email.com"
+    # replace recipients with recipients emails, seperated by comma
+      recipients : "a@email.com, b@email.com"
+    # replace aws_region with your aws's region
+      aws_region : "us-east-1"
+    # replace eb_url with your elasitic beanstalk's URL
+    # ie: "http://labelbot-env.pgc55xzpte.us-east-1.elasticbeanstalk.com/"
+      eb_url : ""
+
diff --git a/mxnet-bot/LabelBotPredict/label_bot_send_predictions/test_labelbot.py b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/test_labelbot.py
new file mode 100644
index 00000000000..085ed8000bd
--- /dev/null
+++ b/mxnet-bot/LabelBotPredict/label_bot_send_predictions/test_labelbot.py
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import unittest
+import boto3
+from botocore.exceptions import ClientError
+from LabelBot import LabelBot
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+
+class TestLabelBot(unittest.TestCase):
+	"""
+	Unittests for LabelBot.py
+	Test coverage: 92%
+	"""
+
+	def setUp(self):
+		self.lb = LabelBot(img_file="./test_img.png")
+		self.lb.repo = "apache/incubator-mxnet"
+		self.lb.sender = "fake@email.com"
+		self.lb.recipients = ["fake2@email.com"]
+		self.lb.aws_region = "us-east-1"
+		self.lb.elastic_beanstalk_url = "http://fakedocker.us-west-2.elasticbeanstalk.com"
+
+
+	def test_read_repo(self):
+		with patch('LabelBot.requests.get') as mocked_get:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = [{ "body":"issue's body",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11925,
+							 					"labels":[{'name':'Doc'}],
+							  					"state":"open",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  },
+							  				  { "body":"issue's body",
+											 	"created_at":"2018-07-28T18:27:17Z",
+							  					"comments":"0",
+							  					"number":11924,
+							 					"labels":[],
+							  					"state":"closed",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11924",
+							  				  }]
+			self.lb.read_repo(False)
+
+
+	def test_sendemail(self):
+		with patch('LabelBot.requests.get') as mocked_get, patch('LabelBot.requests.post') as mocked_post:
+			mocked_get.return_value.status_code = 200
+			mocked_get.return_value.json.return_value = [
+											   {"body":"issue's body",
+											 	"created_at":"2018-08-04T18:27:17Z",
+							  					"comments":0,
+							  					"number":11925,
+							 					"labels":[{'name':'Doc'}],
+							  					"state":"open",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+							  				  },
+							  				  {"body":"issue's body",
+											 	"created_at":"2018-08-04T18:27:17Z",
+							  					"comments":1,
+							  					"comments_url":"https://api.github.com/repos/apache/incubator-mxnet/issues/11918/comments",
+							  					"number":11918,
+							 					"labels":[],
+							  					"state":"open",
+							  					"title":"issue's title",
+							  					"html_url":"https://github.com/apache/incubator-mxnet/issues/11918",
+							  				  }]
+			mocked_post.return_value.json.return_value = [{'number': 11919, 'predictions': ['Performance']}, 
+														  {'number': 11924, 'predictions': ['Build']}]
+			self.assertRaises(ClientError, self.lb.sendemail())
+
+
+if __name__ == "__main__":
+	unittest.main()
\ No newline at end of file


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services