You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ep...@apache.org on 2022/10/18 15:08:38 UTC

[airflow] 03/13: Create a workflow to sync the list of authors (#26275)

This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch v2-4-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit e574aa78f9ef13149daa9c3096ce09d708a8a189
Author: Mark Norman Francis <no...@201created.com>
AuthorDate: Tue Sep 13 13:30:47 2022 +0100

    Create a workflow to sync the list of authors (#26275)
    
    The CI jobs caused by Pull Requests run on Airflow infrastructure only
    when an author is trusted, which currently means a core committer. The
    list is currently duplicated in the ci.yml workflow.
    
    As part of being able to open up the infra to other trusted authors,
    this list needs to be updated based on a list of trusted authors kept
    in the `airflow-ci-infra` repository.
    
    (cherry picked from commit 73257628219e09c6cfd14b38d0aeed8b6e46a83c)
---
 .github/workflows/ci.yml           | 12 +++---
 .github/workflows/sync_authors.yml | 64 ++++++++++++++++++++++++++++++++
 scripts/ci/runners/sync_authors.py | 76 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 145 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ece19a4c8f..742583271c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -60,13 +60,11 @@ jobs:
     # is checked again by the runner using it's own list, so a PR author cannot
     # change this and get access to our self-hosted runners
     #
-    # When changing this list, ensure that it is kept in sync with the
-    # /runners/apache/airflow/configOverlay
-    # parameter in AWS SSM ParameterStore (which is what the runner uses)
-    # and restart the self-hosted runners.
-    #
-    # This list of committers can be generated with:
-    #   https://github.com/apache/airflow-ci-infra/blob/main/scripts/list_committers
+    # This list is kept up-to-date from the list of authors found in the
+    # 'airflow-ci-infra' by the 'sync_authors' Github workflow. It uses a regexp
+    # to find the list of authors and replace them, so any changes to the
+    # formatting of the contains(fromJSON()) structure below will need to be
+    # reflected in that workflow too.
     runs-on: >-
       ${{ (
         (
diff --git a/.github/workflows/sync_authors.yml b/.github/workflows/sync_authors.yml
new file mode 100644
index 0000000000..8405292859
--- /dev/null
+++ b/.github/workflows/sync_authors.yml
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+---
+name: Sync authors list
+
+on:  # yamllint disable-line rule:truthy
+  schedule:
+    #        min   hr    dom   mon   dow
+    - cron: '11    01    *     *     *'     # daily at 1.11am
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  sync:
+    name: Sync
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      - name: Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Requests
+        run: |
+          pip install requests toml
+
+      - name: Sync the authors list
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          python scripts/ci/runners/sync_authors.py
+          git config user.name "GitHub Actions"
+          git config user.email "actions@users.noreply.github.com"
+          if [ -n "$(git status --porcelain)" ]; then
+            branch=update-$(date +%s)
+            git add -A
+            git checkout -b $branch
+            git commit --message "Authors list automatic update"
+            git push origin $branch
+            gh pr create --title "Authors list automatic update" --body ''
+          fi
diff --git a/scripts/ci/runners/sync_authors.py b/scripts/ci/runners/sync_authors.py
new file mode 100644
index 0000000000..6df09305a5
--- /dev/null
+++ b/scripts/ci/runners/sync_authors.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+
+import requests
+import toml
+
+
+# The list of users in the 'build-info' job looks like:
+#
+#       contains(fromJSON('[
+#         "BasPH",
+#         ...
+#       ]'), github.event.pull_request.user.login)
+#
+# This script should replace the contents of the array with a new list of
+# identically formatted names, such that changes to the source of truth:
+
+AUTHORS = 'https://raw.githubusercontent.com/apache/airflow-ci-infra/main/authors.toml'
+
+# end up being reflected in the ci.yml.
+
+
+req = requests.get(AUTHORS)
+author_list = toml.loads(req.text)
+
+author_set = set()
+for membership in author_list:
+    author_set.update([author for author in author_list[membership]])
+
+authors = ''
+for author in sorted(author_set):
+    authors += f'            "{author}",\n'
+
+authors = authors[:-2]
+
+with open('ci.yml') as handle:
+
+
+    new_ci = re.sub(
+        r'''
+            ^
+            # matches the entire file up to contains(fromJSON('[
+            ( .*? contains.fromJSON \( ' \[ \n )
+
+            # the list of authors (which is replaced)
+            .*?
+
+            # the remainder of the file, from the end of the list onwards
+            ( \s+ \] ' \), . github\.event\.pull_request\.user\.login .*? )
+            $
+        ''',
+        f'\\1{authors}\\2',
+        handle.read(),
+        flags=re.DOTALL | re.VERBOSE,
+    )
+
+with open('ci.yml', 'w') as handle:
+    handle.write(new_ci)