You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/09/19 09:18:37 UTC

[GitHub] [hudi] xushiyan commented on a diff in pull request #6575: [HUDI-4754] Add compliance check in github actions

xushiyan commented on code in PR #6575:
URL: https://github.com/apache/hudi/pull/6575#discussion_r974028630


##########
.github/workflows/pr_compliance.yml:
##########
@@ -0,0 +1,21 @@
+name: validate pr
+on:
+  pull_request:
+    types: [opened, edited, reopened, synchronize]
+    branches:
+      - master
+
+jobs:
+  validate-pr:
+    runs-on: ubuntu-latest
+    env:
+      REQUEST_BODY: ${{ github.event.pull_request.body }}
+      REQUEST_TITLE: ${{ github.event.pull_request.title }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+      - name: run script
+        run: python scripts/pr_compliance.py
+
+
+

Review Comment:
   unnecessary newlines



##########
scripts/pr_compliance.py:
##########
@@ -0,0 +1,487 @@
+import re
+import os
+
+def title_is_ok(title):
+    return len(re.findall('(\[HUDI\-[0-9]{1,}\]|\[MINOR\])',title)) == 1
+
+def test_title():
+    #test that position doesn't matter
+    assert title_is_ok("[HUDI-1324] my fake pr")
+    assert title_is_ok(" my [HUDI-1324] fake pr")
+    assert title_is_ok(" my fake pr [HUDI-1324]")
+    #test position doesn't matter for minor
+    assert title_is_ok("[MINOR] my fake pr")
+    assert title_is_ok(" my [MINOR] fake pr")
+    assert title_is_ok(" my fake pr [MINOR]")
+    #test that more than 4 nums is also ok
+    assert title_is_ok("[HUDI-12345] my fake pr")
+    #test that 1 nums is also ok
+    assert title_is_ok("[HUDI-1] my fake pr")
+
+    #no nums not ok
+    assert not title_is_ok("[HUDI-] my fake pr")
+    #no brackets not ok
+    assert not title_is_ok("HUDI-1234 my fake pr")
+    assert not title_is_ok("MINOR my fake pr")
+    #lowercase not ok
+    assert not title_is_ok("[hudi-1234] my fake pr")
+    assert not title_is_ok("[minor] my fake pr")
+    #duplicate not ok
+    assert not title_is_ok("[HUDI-1324][HUDI-1324] my fake pr")
+    #hudi and minor not ok
+    assert not title_is_ok("[HUDI-1324] my [MINOR]fake pr")
+
+
+
+
+
+def body_is_ok(body,PRINT_DEBUG=False):
+    state = "CHANGELOGS"
+    found = False
+    def print_error(message):
+        if PRINT_DEBUG:
+            print(f"ERROR:(state: {state}, found: {found}, message: {message}")
+    for line in body.splitlines():
+        # ignore empty lines
+        if len(line) == 0:
+            continue
+
+        if state == "CHANGELOGS":
+            # see if it matches template
+            if line == "### Change Logs":
+                # we found it so now we need to make sure there is additional text
+                if not found:
+                    found = True
+                else:
+                    print_error("duplicate \"### Change Logs\" ")
+                    return False
+            elif line == "### Impact" or line.startswith("**Risk level:") or line == "### Contributor's checklist":
+                print_error(f"Out of order section or missing description \"{line}\"")
+                return False
+
+            # make sure there is additional text before looking for impact
+            elif found and line != "_Describe context and summary for this change. Highlight if any code was copied._":
+                # this is the default so we want to see different text that is actually explaining things
+                state = "IMPACT"
+                found = False
+
+
+        elif state == "IMPACT":
+            # see if it matches template
+            if line == "### Impact":
+                # we found it so now we need to make sure there is additional text
+                if not found:
+                    found = True
+                else:
+                    print_error("duplicate \"### Impact\" ")
+                    return False
+            elif line == "### Change Logs" or line.startswith("**Risk level:") or line == "### Contributor's checklist":
+                print_error(f"Out of order section or missing description\"{line}\"")
+                return False
+            # make sure there is additional text before looking for risk level
+            elif found and line != "_Describe any public API or user-facing feature change or any performance impact._":
+                # this is the default so we want to see different text that is actually explaining things
+                state = "RISKLEVEL"
+                found = False
+
+        elif state == "RISKLEVEL":
+            if line.startswith("**Risk level:"):
+                #if we already saw this then they shouldn't have it again
+                if found:
+                    print_error("duplicate line starting with \"**Risk level:\" ")
+                    return False
+                #if it is this line then they never chose a risk level
+                if line == "**Risk level: none | low | medium | high**":
+                    print_error("risk level not chosen ")
+                    return False
+
+                # an explanation is not required for none or low
+                if "NONE" in line.upper() or "LOW" in line.upper():
+                    state = "CHECKLIST"
+                    found = False
+                elif "MEDIUM" in line.upper() or "HIGH" in line.upper():
+                    # an explanation is required so we don't change state
+                    found = True
+                else:
+                    #they put something weird in for risk level
+                    print_error("invalid choice for risk level")
+                    return False
+            elif line == "### Impact" or line == "### Change Logs" or line == "### Contributor's checklist":
+                print_error(f"Out of order section or missing description \"{line}\"")
+                return False
+
+            elif found and line != "_Choose one. If medium or high, explain what verification was done to mitigate the risks._":
+                #explanation found so we can change states
+                state = "CHECKLIST"
+                found = False
+
+
+        elif state == "CHECKLIST":
+            if line.startswith("**Risk level:"):
+                print_error("duplicate line starting with \"**Risk level:\" ")
+                return False
+            if line == "### Impact" or line == "### Change Logs" or line.startswith("**Risk level:"):
+                print_error(f"Out of order section \"{line}\"")
+                return False
+            if line == "### Contributor's checklist":
+                #this has passed the check
+                return True
+    #we didn't exit(0) so something is wrong with the template
+    print_error("template is not filled out properly")
+    return False
+
+def test_body():
+    debug_messages = True
+    just_the_template = [

Review Comment:
   you could load this from https://github.com/apache/hudi/blob/master/.github/PULL_REQUEST_TEMPLATE.md ? so that you don't need to duplicate it



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org