You are viewing a plain text version of this content. The canonical link for it is here.
Posted to by GitBox <> on 2018/08/09 18:10:02 UTC

[GitHub] CathyZhang0822 closed pull request #11861: [MXNET-691] Add Email Bot

CathyZhang0822 closed pull request #11861: [MXNET-691] Add Email Bot

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/mxnet-bot/EmailBot/ b/mxnet-bot/EmailBot/
new file mode 100644
index 00000000000..e9eaf7c6d86
--- /dev/null
+++ b/mxnet-bot/EmailBot/
@@ -0,0 +1,324 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import print_function
+from botocore.exceptions import ClientError
+from botocore.vendored import requests
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+import boto3
+import boto3.s3
+import datetime
+import logging
+import os
+import re
+class EmailBot:
+    def __init__(self, github_user = os.environ.get("github_user"),
+                 github_oauth_token = os.environ.get("github_oauth_token"),
+                 repo = os.environ.get("repo"),
+                 sender = os.environ.get("sender"),
+                 recipients = os.environ.get("recipients"),
+                 aws_region = os.environ.get('aws_region'),
+                 ):
+        """
+        This EmailBot serves to send github issue reports to recipients.
+        Args:
+            github_user(str): the github id. ie: "CathyZhang0822"
+            github_oauth_token(str): the github oauth token, paired with github_user to realize authorization
+            repo(str): the repo name
+            sender(str): sender's email address must be verifed in AWS SES. ie:""
+            recipients(str): recipients' email address must be verified in AWS SES. ie:","
+            aws_region(str): aws region. ie:"us-east-1"
+        """
+        self.github_user = github_user
+        self.github_oauth_token = github_oauth_token
+        self.repo = repo
+        self.auth = (self.github_user, self.github_oauth_token)
+        self.sender = sender
+        self.recipients = [s.strip() for s in recipients.split(",")] if recipients else None
+        self.aws_region = aws_region
+        self.opendata = None
+        self.closeddata = None
+        self.start = datetime.datetime.strptime("2015-01-01", "%Y-%m-%d")
+        self.end =
+        self.sla = 5
+        # 2018-5-15 is the date that 'sla' concept was used.
+        self.sla_start = datetime.datetime.strptime("2018-05-15", "%Y-%m-%d")
+    def __clean_string(self, raw_string, sub_string):
+        """
+        This method is to convert all non-alphanumeric characters from raw_string to sub_string
+        """
+        cleans = re.sub("[^0-9a-zA-Z]", sub_string, raw_string)
+        return cleans.lower()
+    def __set_period(self, period):
+        """
+        This method is to set the time period. ie: set_period(7)
+        Because GitHub use UTC time, so we set self.end 2 days after today's date
+        For example:
+ = "2018-07-10 00:00:00"
+        self.end = "2018-07-12 00:00:00"
+        self.start = "2018-07-04 00:00:00"
+        """
+        today = datetime.datetime.strptime(str(, "%Y-%m-%d")
+        self.end = today + datetime.timedelta(days=2)
+        timedelta = datetime.timedelta(days=period)
+        self.start = self.end - timedelta
+    def __count_pages(self, obj, state='all'):
+        """
+        This method is to count how many pages of issues/labels in total
+        obj could be "issues"/"labels"
+        state could be "open"/"closed"/"all", available to issues
+        """
+        assert obj in set(["issues", "labels"]), "Invalid Input!"
+        url = '{}/{}'.format(self.repo, obj)
+        if obj == 'issues':
+            response = requests.get(url, {'state': state},
+                                    auth=self.auth)
+        else:
+            response = requests.get(url, auth=self.auth)
+        assert response.status_code == 200, response.status_code
+        if "link" not in response.headers:
+            # That means only 1 page exits
+            return 1
+        # response.headers['link'] will looks like:
+        # <>; rel="last"
+        # In this case we need to extrac '387' as the count of pages
+        return int(self.__clean_string(response.headers['link'], " ").split()[-3])
+    def read_repo(self, periodically=True):
+        """
+        This method is to read issues in the repo.
+        if periodically == True, it will read issues which are created in a specific time period
+        if periodically == False, it will read all issues
+        """
+"Start reading {} issues".format("periodically" if periodically else "all"))
+        if periodically:
+            self.__set_period(8)
+        else:
+            self.start = self.sla_start
+            self.end =
+        pages = self.__count_pages('issues', 'all')
+        opendata = []
+        closeddata = []
+        stop = False
+        for page in range(1, pages + 1):
+            url = '' + self.repo + '/issues?page=' + str(page) \
+                  + '&per_page=30'.format(repo=self.repo)
+            response = requests.get(url,
+                                    {'state': 'all',
+                                     'base': 'master',
+                                     'sort': 'created',
+                                     'direction': 'desc'},
+                                    auth=self.auth)
+            response.raise_for_status()
+            for item in response.json():
+                if "pull_request" in item:
+                    continue
+                created = datetime.datetime.strptime(item['created_at'], "%Y-%m-%dT%H:%M:%SZ")
+                if self.start <= created <= self.end:
+                    if item['state'] == 'open':
+                        opendata.append(item)
+                    elif item['state'] == 'closed':
+                        closeddata.append(item)
+                else:
+                    stop = True
+                    break
+            if stop:
+                break
+        self.opendata = opendata
+        self.closeddata = closeddata
+    def sort(self):
+        """
+        This method is to sort open issues.
+        Returns a dictionary.
+        """
+        assert self.opendata, "No open issues in this time period!"
+        items = self.opendata
+        labelled = []
+        labelled_urls = ""
+        unlabelled = []
+        unlabelled_urls = ""
+        non_responded = []
+        non_responded_urls = ""
+        outside_sla = []
+        outside_sla_urls = ""
+        responded = []
+        responded_urls = ""
+        total_deltas = []
+        for item in items:
+            url = "<a href='" + item['html_url'] + "'>" + str(item['number']) + "</a>   "
+            created = datetime.datetime.strptime(item['created_at'], "%Y-%m-%dT%H:%M:%SZ")
+            if item['labels']:
+                labelled += [{k: v for k, v in item.items()
+                              if k in ['number', 'html_url', 'title']}]
+                labelled_urls = labelled_urls + url
+            else:
+                unlabelled += [{k: v for k, v in item.items()
+                                if k in ['number', 'html_url', 'title']}]
+                unlabelled_urls = unlabelled_urls + url
+            if item['comments'] == 0:
+                non_responded += [{k: v for k, v in item.items()
+                                   if k in ['number', 'html_url', 'title']}]
+                non_responded_urls = non_responded_urls + url
+                if self.sla_start < created < - datetime.timedelta(days=self.sla):
+                    outside_sla += [{k: v for k, v in item.items()
+                                     if k in ['number', 'html_url', 'title']}]
+                    outside_sla_urls = outside_sla_urls + url
+            else:
+                responded += [{k: v for k, v in item.items()
+                               if k in ['number', 'html_url', 'title']}]
+                responded_urls = responded_urls + url
+                comments_url = item['comments_url']
+                comments = requests.get(comments_url, auth=self.auth)
+                first_comment_created = datetime.datetime.strptime(comments.json()[0]['created_at'],
+                                                                   "%Y-%m-%dT%H:%M:%SZ")
+                delta = first_comment_created - created
+                total_deltas.append(delta)
+        data = {"labelled": labelled,
+                "labelled_urls": labelled_urls,
+                "unlabelled": unlabelled,
+                "unlabelled_urls": unlabelled_urls,
+                "responded": responded,
+                "responded_urls": responded_urls,
+                "non_responded": non_responded,
+                "non_responded_urls": non_responded_urls,
+                "outside_sla": outside_sla,
+                "outside_sla_urls": outside_sla_urls,
+                "total_deltas": total_deltas}
+        return data
+    def __html_table(self, lol):
+        """
+        This method is to generate html table.
+        Args:
+            lol(list of lists): table content
+        """
+        yield '<table style="width: 500px;">'
+        for sublist in lol:
+            yield '  <tr><td style = "width:200px;">'
+            yield '    </td><td style = "width:300px;">'.join(sublist)
+            yield '  </td></tr>'
+        yield '</table>'
+    def __bodyhtml(self):
+        """
+        This method is to generate body html of email content
+        """
+        self.read_repo(False)
+        all_sorted_open_data = self.sort()
+        self.read_repo(True)
+        weekly_sorted_open_data = self.sort()
+        total_deltas = weekly_sorted_open_data["total_deltas"]
+        if len(total_deltas) != 0:
+            avg = sum(total_deltas, datetime.timedelta())/len(total_deltas)
+            avg_time = str(avg.days)+" days, "+str(int(avg.seconds/3600))+" hours"
+            worst_time = str(max(total_deltas).days)+" days, "+str(int(max(total_deltas).seconds/3600)) + " hours"
+        else:
+            avg_time = "N/A"
+            worst_time = "N/A"
+        htmltable = [
+                    ["Labeled issues:", str(len(weekly_sorted_open_data["labelled"]))],
+                    ["Unlabeled issues:", str(len(weekly_sorted_open_data["unlabelled"]))],
+                    ["List unlabeled issues", weekly_sorted_open_data["unlabelled_urls"]],
+                    ["Issues with response:", str(len(weekly_sorted_open_data["responded"]))],
+                    ["Issues without response:", str(len(weekly_sorted_open_data["non_responded"]))],
+                    ["The average response time is:", avg_time],
+                    ["The worst response time is:", worst_time],
+                    ["List issues without response:", weekly_sorted_open_data["non_responded_urls"]],
+                    ["Count of issues without response within 5 days:", str(len(all_sorted_open_data["outside_sla"]))],
+                    ["List issues without response with 5 days:", all_sorted_open_data["outside_sla_urls"]]]
+        body_html = """<html>
+        <head>
+        </head>
+        <body>
+          <h4>Week: {} to {}</h4>
+          <p>{} newly issues were opened in the above period, among which {} were closed and {} are still open.</p>
+          <div>{}</div>
+        </body>
+        </html>
+                    """.format(str(, str((self.end - datetime.timedelta(days=2)).date()),
+                               str(len(self.opendata) + len(self.closeddata)),
+                               str(len(self.closeddata)), str(len(self.opendata)),
+                               "\n".join(self.__html_table(htmltable)))
+        return body_html
+    def sendemail(self):
+        """
+        This method is to send emails.
+        The email content contains 2 html tables and an image.
+        """
+        sender = self.sender
+        recipients = self.recipients
+        aws_region = self.aws_region
+        # The email body for recipients with non-HTML email clients.
+        body_text = "weekly report"
+        # The HTML body of the email.
+        body_html = self.__bodyhtml()
+        # The subject line for the email.
+        subject = "GitHub Issues Daily Report {} to {}".format(str(,
+                                                               str((self.end - datetime.timedelta(days=2)).date()))
+        # The character encoding for the email.
+        charset = "utf-8"
+        # Create a new SES resource and specify a region.
+        client = boto3.client('ses', region_name=aws_region)
+        # Create a multipart/mixed parent container.
+        msg = MIMEMultipart('mixed')
+        # Add subject, from and to lines
+        msg['Subject'] = subject
+        msg['From'] = sender
+        msg['To'] = ",".join(recipients)
+        # Create a multiparter child container
+        msg_body = MIMEMultipart('alternative')
+        # Encode the text and HTML content and set the character encoding. This step is
+        # necessary if you're sending a message with characters outside the ASCII range.
+        textpart = MIMEText(body_text.encode(charset), 'plain', charset)
+        htmlpart = MIMEText(body_html.encode(charset), 'html', charset)
+        # Add the text and HTML parts to the child container
+        msg_body.attach(textpart)
+        msg_body.attach(htmlpart)
+        msg.attach(msg_body)
+        try:
+            # Provide the contents of the email.
+            response = client.send_raw_email(
+                Source=sender,
+                Destinations=recipients,
+                RawMessage={
+                    'Data': msg.as_string(),
+                },
+            )
+  "Email sent! Message ID:")
+  ['MessageId'])
+        # Display an error if something goes wrong.
+        except ClientError as e:
+            logging.exception(e.response['Error']['Message'])
diff --git a/mxnet-bot/EmailBot/ b/mxnet-bot/EmailBot/
new file mode 100644
index 00000000000..5868ee2a9ce
--- /dev/null
+++ b/mxnet-bot/EmailBot/
@@ -0,0 +1,69 @@
+# EmailBot
+Automatically send daily [GitHub issue]( reports using [Amazon Simple Email Service]( and [AWS Lambda](
+## Description
+### Architecture
+An amazon cloudwatch event will trigger lambda function in a certain frequency(ex: 9am every Monday). Once the lambda function is executed, the issue report will be generated and sent to recipients.   
+<div align="center">
+  <img src=""><br>
+### Email Content
+<div align="center">
+    <img src="" width="200" height="200"><br>
+## Setup
+Setup this email bot using serverless framework / manually.
+### Deploy email bot using serverless framework
+* Configure ***serverless.yml***
+    1. Under ***provider***, replace ***region*** with your aws region
+    2. Under ***environment***
+        1. replace ***github_user*** with your github id ie:"CathyZhang0822"
+        2. replace ***github_oath_token*** with your READ ONLY access token
+        3. replece ***repo*** with the repo's name. ie:"apache/incubator-mxnet"
+        4. replace ***sender*** with the sender's email
+        5. replace ***recipients*** with recipients emails, seperated by comma. ie:","
+        6. replace ***aws_region*** with the same aws region in ***provider***
+* Deploy
+Open terminal, go to current directory. run
+serverless deploy
+Then it will set up those AWS services:
+	* An IAM role for label bot with policies:
+	* A Lambda function will all code needed.
+	* A CloudWatch event which will trigger the lambda function everyday at 14:59 and 18:59 UTC. 
+* [Verify Email Addresses]( Go to AWS console -> SES -> Email Addresses to verify email address.
+* Test the Lambda Function. On the lambda function's console, click Test.
+### Setup email bot manually
+* Set an AWS Lambda Function
+    * [Create an AWS Lambda Function]( Go to AWS console -> Lambda -> Create function. 
+        * Runtime: select Python3.6
+        * Role: Create a new IAM role with SES permissions
+    * [Upload code]( Save `` and ``, package the two files into a .zip file. Then upload the .zip file into the lambda function.
+    * Set Environment Variables. Set your own `github_user`, `github_oauth_token`, `repo`, `sender`, `recipients` and `aws_region` as environmental variables.
+    * Add a trigger. Select `CloudWatch Events` from the list on the left. Then configure the trigger. ie. create a new rule with schedule expression `cron(30 2 **?*)`. Then this cloudevent will trigger the lambda function everyday at 2:30(UTC)
+* [Verify Email Addresses]( Go to AWS console -> SES -> Email Addresses to verify email address.
+* Test the Lambda Function. On the lambda function's console, click Test.
diff --git a/mxnet-bot/EmailBot/ b/mxnet-bot/EmailBot/
new file mode 100644
index 00000000000..d6521c302c8
--- /dev/null
+++ b/mxnet-bot/EmailBot/
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from EmailBot import EmailBot
+def lambda_handler(event, context):
+    EB = EmailBot()
+    EB.sendemail()
+    return "Hello from Lambda"
diff --git a/mxnet-bot/EmailBot/serverless.yml b/mxnet-bot/EmailBot/serverless.yml
new file mode 100644
index 00000000000..4471e4ac075
--- /dev/null
+++ b/mxnet-bot/EmailBot/serverless.yml
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# Configurations
+service: EmailBot
+  exclude:
+    - ./**
+  include:
+    -
+    -
+  name: aws
+  # replace region with your aws region
+  region: us-east-1
+  runtime: python3.6
+  timeout: 300
+  iamRoleStatements:
+    -  Effect: "Allow"
+       Action:
+         - "ses:SendEmail"
+         - "ses:SendTemplatedEmail"
+         - "ses:SendRawEmail"
+       Resource: "*"
+  label:
+    handler: lambda_function.lambda_handler
+    events:
+      # UTC 14:59, 18:59
+      - schedule: cron(59 14,18 * * ? *)
+    environment:
+    # replace github_user with your github id ie:"CathyZhang0822"
+      github_user : "github_id"
+    # replace github_oath_token with your READ ONLY access token
+      github_oauth_token : "read_only_access_token"
+    # replace repo with "apache/incubator-mxnet"
+      repo : "repo_name"
+    # replace sender with the sender's email
+      sender : ""
+    # replace recipients with recipients emails, seperated by comma
+      recipients : ","
+    # replace aws_region with your aws's region
+      aws_region : "us-east-1"
diff --git a/mxnet-bot/EmailBot/ b/mxnet-bot/EmailBot/
new file mode 100644
index 00000000000..0106333ddca
--- /dev/null
+++ b/mxnet-bot/EmailBot/
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import requests
+import unittest
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from unittest.mock import patch
+from EmailBot import EmailBot
+# This file are unit tests of, coverage:91%
+class TestEmailBot(unittest.TestCase):
+    def setUp(self):
+        self.eb = EmailBot()
+        self.eb.repo = "apache/incubator-mxnet"
+        self.eb.sender = ""
+        self.eb.recipients = ["", ""]
+    def tearDown(self):
+        pass
+    def test_read_repo(self):
+        with patch('EmailBot.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = [{"body": "issue's body",
+                                                          "created_at": "2018-07-28T18:27:17Z",
+                                                          "comments": "0",
+                                                          "number": 11925,
+                                                          "labels": [{'name': 'Doc'}],
+                                                          "state": "open",
+                                                          "title": "issue's title",
+                                                          "html_url": "",
+                                                          },
+                                                         {"body": "issue's body",
+                                                          "created_at": "2018-07-28T18:27:17Z",
+                                                          "comments": "0",
+                                                          "number": 11924,
+                                                          "labels": [],
+                                                          "state": "closed",
+                                                          "title": "issue's title",
+                                                          "html_url":"",
+                                                          }]
+            self.eb.read_repo(True)
+    def test_sendemail(self):
+        with patch('EmailBot.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = [{"body": "issue's body",
+                                                          "created_at": "2018-08-04T18:27:17Z",
+                                                          "comments": 0,
+                                                          "number": 11925,
+                                                          "labels": [{'name': 'Doc'}],
+                                                          "state": "open",
+                                                          "title": "issue's title",
+                                                          "html_url": "",
+                                                          },
+                                                         {"body": "issue's body",
+                                                          "created_at": "2018-08-04T18:27:17Z",
+                                                          "comments": 1,
+                                                          "comments_url": "",
+                                                          "number": 11918,
+                                                          "labels": [],
+                                                          "state":"open",
+                                                          "title":"issue's title",
+                                                          "html_url":"",
+                                                          }]
+            self.assertRaises(ClientError, self.eb.sendemail())
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mxnet-bot/ b/mxnet-bot/
new file mode 100644
index 00000000000..c2bb3e6291a
--- /dev/null
+++ b/mxnet-bot/
@@ -0,0 +1,25 @@
+# MXNet Bot
+This is a directory contains bots served to improve operational efficiency.
+## Email Bot
+Automatically send daily [GitHub issue]( reports using [Amazon Simple Email Service]( and [AWS Lambda](
+## Label Bot
+This bot will help automate/simplify issue labeling process, which mainly contains 3 parts:
+* Machine Learning part:
+  A web server built based on AWS Elastic Beanstalk which can response to GET/POST requests and realize self-maintenance. It mainly has 2 features:
+  * Train models: it will retrain Machine Learning models every 24 hours automatically using latest data.
+  * Predict labels: once it receives GET/POST requests with issues ID, it will send predictions back.
+* Send Daily Emails: Automatically send daily [GitHub issue]( reports listing unlabeled issues and recommended labels.
+* Add Labels: An API built using API Gateway and Lambda. Once this API is given correct GitHub credentials, issue ID and labels. It will add labels to corresponding issues.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:

With regards,
Apache Git Services