You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2021/06/30 23:51:53 UTC

[GitHub] [beam] pabloem commented on a change in pull request #15039: [BEAM-10913] - Post-commit github actions status

pabloem commented on a change in pull request #15039:
URL: https://github.com/apache/beam/pull/15039#discussion_r661878274



##########
File path: .test-infra/metrics/sync/github/sync_github_actions.py
##########
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+'''
+This module queries GitHub to collect pre-commit GitHub Actions metrics and put them in
+PostgreSQL.
+'''
+import sys
+import time
+import psycopg2
+import datetime
+
+# Keeping this as reference for localhost debug
+# Fetching docker host machine ip for testing purposes.
+# Actual host should be used for production.
+def findDockerNetworkIP():
+  '''Utilizes ip tool to find docker network IP'''
+  import subprocess
+  cmd_out = subprocess.check_output(["ip", "route", "show"]).decode("utf-8")
+  return cmd_out.split(" ")[2]
+
+
+#DB_HOST = findDockerNetworkIP()
+
+GH_PRS_TABLE_NAME = 'gh_actions_metrics'
+
+GH_PRS_CREATE_TABLE_QUERY = f"""
+  create table {GH_PRS_TABLE_NAME} (
+  ga_id serial NOT NULL PRIMARY KEY,
+  job_name varchar NOT NULL,
+  status varchar NOT NULL,
+  workflow_id varchar NOT NULL,
+  workflow_url varchar NOT NULL,
+  executed_ts timestamp NOT NULL
+  )
+  """
+
+def initDBConnection():
+  '''Opens connection to postgresql DB, as configured via global variables.'''
+
+  DB_HOST = sys.argv[4]
+  DB_PORT = sys.argv[5]
+  DB_NAME = sys.argv[6]
+  DB_USER_NAME = sys.argv[7]
+  DB_PASSWORD = sys.argv[8]
+
+  conn = None
+  while not conn:
+    try:
+      conn = psycopg2.connect(
+          f"dbname='{DB_NAME}' user='{DB_USER_NAME}' host='{DB_HOST}'"
+          f" port='{DB_PORT}' password='{DB_PASSWORD}'")
+    except:
+      print('Failed to connect to DB; retrying in one hour')
+      sys.stdout.flush()
+      time.sleep(3600)

Review comment:
       1 hour sounds like a very long wait. Maybe something shorter? maybe even 1 or 2 minutes? WDYT?

##########
File path: .test-infra/metrics/sync/github/sync_github_actions.py
##########
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+'''
+This module queries GitHub to collect pre-commit GitHub Actions metrics and put them in
+PostgreSQL.
+'''
+import sys
+import time
+import psycopg2
+import datetime
+
+# Keeping this as reference for localhost debug
+# Fetching docker host machine ip for testing purposes.
+# Actual host should be used for production.
+def findDockerNetworkIP():
+  '''Utilizes ip tool to find docker network IP'''
+  import subprocess
+  cmd_out = subprocess.check_output(["ip", "route", "show"]).decode("utf-8")
+  return cmd_out.split(" ")[2]
+
+
+#DB_HOST = findDockerNetworkIP()
+
+GH_PRS_TABLE_NAME = 'gh_actions_metrics'
+
+GH_PRS_CREATE_TABLE_QUERY = f"""
+  create table {GH_PRS_TABLE_NAME} (
+  ga_id serial NOT NULL PRIMARY KEY,
+  job_name varchar NOT NULL,
+  status varchar NOT NULL,
+  workflow_id varchar NOT NULL,
+  workflow_url varchar NOT NULL,
+  executed_ts timestamp NOT NULL
+  )
+  """
+
+def initDBConnection():
+  '''Opens connection to postgresql DB, as configured via global variables.'''
+
+  DB_HOST = sys.argv[4]
+  DB_PORT = sys.argv[5]
+  DB_NAME = sys.argv[6]
+  DB_USER_NAME = sys.argv[7]
+  DB_PASSWORD = sys.argv[8]
+
+  conn = None
+  while not conn:
+    try:
+      conn = psycopg2.connect(
+          f"dbname='{DB_NAME}' user='{DB_USER_NAME}' host='{DB_HOST}'"
+          f" port='{DB_PORT}' password='{DB_PASSWORD}'")
+    except:
+      print('Failed to connect to DB; retrying in one hour')
+      sys.stdout.flush()
+      time.sleep(3600)
+  return conn
+
+def tableExists(cursor, tableName):
+  '''Checks the existense of table.'''
+  cursor.execute(f"select * from information_schema.tables"
+                 f" where table_name='{tableName}';")
+  return bool(cursor.rowcount)
+
+def initDbTablesIfNeeded():
+  '''Creates and initializes DB tables required for script to work.'''
+  connection = initDBConnection()
+  cursor = connection.cursor()
+
+  buildsTableExists = tableExists(cursor, GH_PRS_TABLE_NAME)
+  print('PRs table exists', buildsTableExists)
+  if not buildsTableExists:
+    cursor.execute(GH_PRS_CREATE_TABLE_QUERY)
+    if not bool(cursor.rowcount):
+      raise Exception(f"Failed to create table {GH_PRS_TABLE_NAME}")
+  cursor.close()
+  connection.commit()
+
+  connection.close()
+
+def insertIntoTable(cursor, values):
+  insertRowQuery = f'''INSERT INTO {GH_PRS_TABLE_NAME}
+                            (job_name,
+                            status,
+                            workflow_id,
+                            workflow_url,
+                            executed_ts)
+                          VALUES
+                            (%s, %s, %s, %s, %s)
+                          '''
+  cursor.execute(insertRowQuery, values)
+
+def fetchNewData():
+  '''
+  Main workhorse method. Fetches data from GitHub and puts it in metrics table.
+  '''
+
+  job_name = sys.argv[1]
+  status = sys.argv[2]
+  workflow_id = sys.argv[3]
+  workflow_url = f'''https://github.com/fernando-wizeline/beam/actions/runs/{workflow_id}''' #TODO: remove reference to personal repository
+  executed_ts = datetime.datetime.now()
+  row_values = [job_name, status, workflow_id, workflow_url, executed_ts]

Review comment:
       is this method fetching any data from github? I think it is not , right?

##########
File path: .test-infra/metrics/sync/github/sync_github_actions.py
##########
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+'''
+This module queries GitHub to collect pre-commit GitHub Actions metrics and put them in
+PostgreSQL.
+'''
+import sys
+import time
+import psycopg2
+import datetime
+
+# Keeping this as reference for localhost debug
+# Fetching docker host machine ip for testing purposes.
+# Actual host should be used for production.
+def findDockerNetworkIP():
+  '''Utilizes ip tool to find docker network IP'''
+  import subprocess
+  cmd_out = subprocess.check_output(["ip", "route", "show"]).decode("utf-8")
+  return cmd_out.split(" ")[2]
+
+
+#DB_HOST = findDockerNetworkIP()
+
+GH_PRS_TABLE_NAME = 'gh_actions_metrics'
+
+GH_PRS_CREATE_TABLE_QUERY = f"""
+  create table {GH_PRS_TABLE_NAME} (
+  ga_id serial NOT NULL PRIMARY KEY,
+  job_name varchar NOT NULL,
+  status varchar NOT NULL,
+  workflow_id varchar NOT NULL,
+  workflow_url varchar NOT NULL,
+  executed_ts timestamp NOT NULL
+  )
+  """
+
+def initDBConnection():
+  '''Opens connection to postgresql DB, as configured via global variables.'''
+
+  DB_HOST = sys.argv[4]
+  DB_PORT = sys.argv[5]
+  DB_NAME = sys.argv[6]
+  DB_USER_NAME = sys.argv[7]
+  DB_PASSWORD = sys.argv[8]
+
+  conn = None
+  while not conn:
+    try:
+      conn = psycopg2.connect(
+          f"dbname='{DB_NAME}' user='{DB_USER_NAME}' host='{DB_HOST}'"
+          f" port='{DB_PORT}' password='{DB_PASSWORD}'")
+    except:
+      print('Failed to connect to DB; retrying in one hour')
+      sys.stdout.flush()
+      time.sleep(3600)
+  return conn
+
+def tableExists(cursor, tableName):
+  '''Checks the existense of table.'''
+  cursor.execute(f"select * from information_schema.tables"
+                 f" where table_name='{tableName}';")
+  return bool(cursor.rowcount)
+
+def initDbTablesIfNeeded():
+  '''Creates and initializes DB tables required for script to work.'''
+  connection = initDBConnection()
+  cursor = connection.cursor()
+
+  buildsTableExists = tableExists(cursor, GH_PRS_TABLE_NAME)
+  print('PRs table exists', buildsTableExists)
+  if not buildsTableExists:
+    cursor.execute(GH_PRS_CREATE_TABLE_QUERY)
+    if not bool(cursor.rowcount):
+      raise Exception(f"Failed to create table {GH_PRS_TABLE_NAME}")
+  cursor.close()
+  connection.commit()
+
+  connection.close()
+
+def insertIntoTable(cursor, values):
+  insertRowQuery = f'''INSERT INTO {GH_PRS_TABLE_NAME}
+                            (job_name,
+                            status,
+                            workflow_id,
+                            workflow_url,
+                            executed_ts)
+                          VALUES
+                            (%s, %s, %s, %s, %s)
+                          '''
+  cursor.execute(insertRowQuery, values)
+
+def fetchNewData():
+  '''
+  Main workhorse method. Fetches data from GitHub and puts it in metrics table.
+  '''
+
+  job_name = sys.argv[1]
+  status = sys.argv[2]
+  workflow_id = sys.argv[3]
+  workflow_url = f'''https://github.com/fernando-wizeline/beam/actions/runs/{workflow_id}''' #TODO: remove reference to personal repository

Review comment:
       should this be fixed?

##########
File path: .test-infra/metrics/sync/github/sync_github_actions.py
##########
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+'''
+This module queries GitHub to collect pre-commit GitHub Actions metrics and put them in
+PostgreSQL.
+'''
+import sys
+import time
+import psycopg2
+import datetime
+
+# Keeping this as reference for localhost debug
+# Fetching docker host machine ip for testing purposes.
+# Actual host should be used for production.
+def findDockerNetworkIP():
+  '''Utilizes ip tool to find docker network IP'''
+  import subprocess
+  cmd_out = subprocess.check_output(["ip", "route", "show"]).decode("utf-8")
+  return cmd_out.split(" ")[2]
+
+
+#DB_HOST = findDockerNetworkIP()
+
+GH_PRS_TABLE_NAME = 'gh_actions_metrics'
+
+GH_PRS_CREATE_TABLE_QUERY = f"""
+  create table {GH_PRS_TABLE_NAME} (
+  ga_id serial NOT NULL PRIMARY KEY,
+  job_name varchar NOT NULL,
+  status varchar NOT NULL,
+  workflow_id varchar NOT NULL,
+  workflow_url varchar NOT NULL,
+  executed_ts timestamp NOT NULL
+  )
+  """
+
+def initDBConnection():
+  '''Opens connection to postgresql DB, as configured via global variables.'''
+
+  DB_HOST = sys.argv[4]
+  DB_PORT = sys.argv[5]
+  DB_NAME = sys.argv[6]
+  DB_USER_NAME = sys.argv[7]
+  DB_PASSWORD = sys.argv[8]
+
+  conn = None
+  while not conn:
+    try:
+      conn = psycopg2.connect(
+          f"dbname='{DB_NAME}' user='{DB_USER_NAME}' host='{DB_HOST}'"
+          f" port='{DB_PORT}' password='{DB_PASSWORD}'")
+    except:
+      print('Failed to connect to DB; retrying in one hour')
+      sys.stdout.flush()
+      time.sleep(3600)
+  return conn
+
+def tableExists(cursor, tableName):
+  '''Checks the existense of table.'''
+  cursor.execute(f"select * from information_schema.tables"
+                 f" where table_name='{tableName}';")
+  return bool(cursor.rowcount)
+
+def initDbTablesIfNeeded():
+  '''Creates and initializes DB tables required for script to work.'''
+  connection = initDBConnection()
+  cursor = connection.cursor()
+
+  buildsTableExists = tableExists(cursor, GH_PRS_TABLE_NAME)
+  print('PRs table exists', buildsTableExists)

Review comment:
       maybe this print statement should be made more clear? Perhaps `'PR table %s exist' % 'does' if buildsTableExists else 'does not'`? WDYT?
   Any other option works, but I just want to make the message more clear

##########
File path: .test-infra/metrics/sync/github/sync_github_actions.py
##########
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+'''
+This module queries GitHub to collect pre-commit GitHub Actions metrics and put them in
+PostgreSQL.
+'''
+import sys
+import time
+import psycopg2
+import datetime
+
+# Keeping this as reference for localhost debug
+# Fetching docker host machine ip for testing purposes.
+# Actual host should be used for production.
+def findDockerNetworkIP():
+  '''Utilizes ip tool to find docker network IP'''
+  import subprocess
+  cmd_out = subprocess.check_output(["ip", "route", "show"]).decode("utf-8")
+  return cmd_out.split(" ")[2]
+
+
+#DB_HOST = findDockerNetworkIP()
+
+GH_PRS_TABLE_NAME = 'gh_actions_metrics'
+
+GH_PRS_CREATE_TABLE_QUERY = f"""
+  create table {GH_PRS_TABLE_NAME} (
+  ga_id serial NOT NULL PRIMARY KEY,
+  job_name varchar NOT NULL,
+  status varchar NOT NULL,
+  workflow_id varchar NOT NULL,
+  workflow_url varchar NOT NULL,
+  executed_ts timestamp NOT NULL
+  )
+  """
+
+def initDBConnection():
+  '''Opens connection to postgresql DB, as configured via global variables.'''
+
+  DB_HOST = sys.argv[4]
+  DB_PORT = sys.argv[5]
+  DB_NAME = sys.argv[6]
+  DB_USER_NAME = sys.argv[7]
+  DB_PASSWORD = sys.argv[8]

Review comment:
       Please document the order of arguments at the top of the file.
   
   A good idea may be to use [Python's `argparse`](https://docs.python.org/3/library/argparse.html), which helps document and manages error messages as well.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@beam.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org