You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by xu...@apache.org on 2022/10/17 04:42:25 UTC
[hudi] branch master updated: [HUDI-5022] Make better error messages for pr compliance (#6934)

This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 1dfa1634cf [HUDI-5022] Make better error messages for pr compliance (#6934)
1dfa1634cf is described below

commit 1dfa1634cfb6c2b90cc9e4d3a228418822699410
Author: Jon Vexler <jo...@onehouse.ai>
AuthorDate: Mon Oct 17 00:42:17 2022 -0400

    [HUDI-5022] Make better error messages for pr compliance (#6934)
---
 .github/PULL_REQUEST_TEMPLATE.md    |   4 +-
 .github/workflows/pr_compliance.yml |   2 +-
 scripts/pr_compliance.py            | 211 ++++++++++++++++++------------------
 3 files changed, 110 insertions(+), 107 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 17ad995a97..b1902aab5f 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,9 +6,9 @@ _Describe context and summary for this change. Highlight if any code was copied.
 
 _Describe any public API or user-facing feature change or any performance impact._
 
-**Risk level: none | low | medium | high**
+### Risk level (write none, low medium or high below)
 
-_Choose one. If medium or high, explain what verification was done to mitigate the risks._
+_If medium or high, explain what verification was done to mitigate the risks._
 
 ### Documentation Update
 
diff --git a/.github/workflows/pr_compliance.yml b/.github/workflows/pr_compliance.yml
index 67affbb7b7..542a0a5467 100644
--- a/.github/workflows/pr_compliance.yml
+++ b/.github/workflows/pr_compliance.yml
@@ -15,7 +15,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v3
       - name: run script
-        run: python3 scripts/pr_compliance.py > test.log || { echo "::error::pr_compliance.py $(cat test.log)" && exit 1; }
+        run: python3 scripts/pr_compliance.py 
 
 
 
diff --git a/scripts/pr_compliance.py b/scripts/pr_compliance.py
index ea1e2ce3e2..5946a35587 100644
--- a/scripts/pr_compliance.py
+++ b/scripts/pr_compliance.py
@@ -18,7 +18,7 @@
 import re
 import os
 import sys
-
+import inspect
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #  
 #  _____ _ _   _       __     __    _ _     _       _   _                 #
 # |_   _(_) |_| | ___  \ \   / /_ _| (_) __| | __ _| |_(_) ___  _ __      #  
@@ -130,14 +130,15 @@ class Outcomes:
 # identifier: str - line that signifies the start of a section
 # linesAfter: set of str - default lines in the template that we ignore when 
 #                          verifying that the user filled out the section
-# nextSection: str - The name of the next section or "SUCCESS" if this is the 
-#                    last section
 class ParseSectionData:
-    def __init__(self, name: str, identifier: str, linesAfter, nextSection: str):
+    def __init__(self, name: str, identifier: str, linesAfter: str):
         self.name = name
         self.identifier = identifier
         self.linesAfter = linesAfter
-        self.nextSection = nextSection
+        self.prevSection = ""
+        self.nextSection = ""
+        
+        
     
     #returns true if line matches the identifier
     def identify(self, line: str):
@@ -151,8 +152,8 @@ class ParseSectionData:
 #Special holder of data for risk level because the identifier line is modified
 #by the user
 class RiskLevelData(ParseSectionData):
-    def __init__(self, name: str, identifier: str, linesAfter, nextSection: str):
-        super().__init__(name, identifier, linesAfter, nextSection)
+    def __init__(self, name: str, identifier: str, linesAfter):
+        super().__init__(name, identifier, linesAfter)
     
     #we check that the line start with the identifier because the identifier 
     #line will be modified when filled out correctly
@@ -165,8 +166,23 @@ class RiskLevelData(ParseSectionData):
 class ParseSections:
     def __init__(self, psd):
         self.sections = {}
-        for x in psd:
-            self.sections[x.name] = x
+        assert len(psd) > 0
+        for i in range(len(psd)):
+            prevI = i - 1
+            nextI = i + 1
+            if prevI < 0:
+                psd[i].prevSection = "START"
+            else:
+                psd[i].prevSection = psd[prevI].name
+            
+            if nextI >= len(psd):
+                psd[i].nextSection = "END"
+            else:
+                psd[i].nextSection = psd[nextI].name
+            
+            self.sections[psd[i].name] = psd[i]
+            
+
     
     #returns true if line is an identifier for a section that is not value
     #   PARAMS
@@ -179,6 +195,18 @@ class ParseSections:
                     return True
         return False    
     
+    #gets the name of the section identified in the line
+    #   PARAMS
+    # line: str - the line that we are parsing
+    #   RETURN
+    # string - name of the section if the identifier is found, else none
+    def getSectionName(self, line: str):
+        for name in self.sections:
+            if self.sections[name].identify(line):
+                return name
+        return None    
+
+
     #returns the ParseSectionData that is named name
     def get(self, name):
         return self.sections.get(name)
@@ -200,9 +228,13 @@ class ParseSection:
         self.sections = sections
 
     #prints error message if debug is set to true
-    def error(self, message):
+    def error(self, line: str, lineno: str, message: str):
         if self.debug:
-            print(f"ERROR:(state: {self.data.name}, found: {self.found}, message: {message}")
+            pyline = inspect.getframeinfo(inspect.stack()[1][0]).lineno
+            print(f"::error file=pr_compliance.py,line={pyline}::{message}")
+            if lineno != "" and line != "":
+                print(f"::error file=pr_compliance.py,line={pyline}::found on line {lineno}: {line}")
+            print(f"::debug::state: {self.data.name}, found: {self.found},")
 
     #returns the name of the next section
     def nextSection(self):
@@ -214,29 +246,55 @@ class ParseSection:
         return self.found and self.data.identifyAfter(line)
 
     #Decides what outcome occurs when the section identifier is found
-    def processIdentify(self, line):
+    def processIdentify(self, line, lineno):
         if self.found:
             #since we have already found the identifier, this means that we have
             #found a duplicate of the identifier
-            self.error(f"duplicate line \"{line}\"")
+            self.error(line, lineno, f"Duplicate {self.data.name} section found")
             return Outcomes.ERROR
         self.found = True
         return Outcomes.CONTINUE
+
+    def makeValidateOthersErrorMessage(self, line):
+        if self.found:
+            if self.nextSection() != "END" and self.sections.sections[self.nextSection()].identify(line):
+                #we found the next identifier but haven't found a description
+                #yet for this section
+                return f"Section {self.data.name} is missing a description"
+            #we found a section other than the next section
+            return f"Section {self.data.name} should be followed by section {self.data.nextSection}"
+
+        #section identifier has not been found yet
+        sectionFound = self.sections.getSectionName(line)
+        if sectionFound is None:
+            print("ERROR: none found even though validateOthers returned True")
+            exit(1)
+        elif sectionFound == self.data.prevSection:
+            #we have not found the current section identifier but we found the
+            #previous section identifier again
+            return f"Duplicate {self.data.prevSection} section found"
+         
+        if self.data.prevSection == "START":
+            return f"Section {self.data.name} should be the first section"
+        if sectionFound == self.data.nextSection:
+            return f"Missing section {self.data.name} between {self.data.prevSection} and {self.data.nextSection}"
+        return f"Section {self.data.name} was expected after section {self.data.prevSection}"
     
     #Decides the outcome state by processing line
-    def validateLine(self,line):
+    def validateLine(self,line,lineno):
         if self.data.identify(line):
             #we have found the identifier so we should decide what to do
-            return self.processIdentify(line)
+            return self.processIdentify(line,lineno)
         elif self.sections.validateOthers(line, self.data.name):
             #we have found the identifier for another section
-            self.error(f"Out of order section or missing description \"{line}\"")
+            #figure out what the error is
+            self.error(line,lineno,self.makeValidateOthersErrorMessage(line))
             return Outcomes.ERROR
         elif self.validateAfter(line):
             #the pr author has added new text to this section so we consider it
             #to be filled out
-            if self.nextSection() == "SUCCESS":
-                #if next section is "SUCCESS" then there are no more sections 
+            if self.nextSection() == "END":
+                #if next section is "END" then there are no more sections 
                 #to process
                 return Outcomes.SUCCESS
             return Outcomes.NEXTSECTION
@@ -250,47 +308,15 @@ class NoDataSection(ParseSection):
 
     #After finding the identifier we don't need to look for anything else so we
     #can just go to the next section or terminate if this is the last
-    def processIdentify(self, line):
-        o = super().processIdentify(line)
+    def processIdentify(self, line, lineno):
+        o = super().processIdentify(line, lineno)
         if o  == Outcomes.CONTINUE:
-            if self.nextSection() == "SUCCESS":
+            if self.nextSection() == "END":
                 return Outcomes.SUCCESS
             else:
                 return Outcomes.NEXTSECTION
         return o
 
-#Risk level has different processing because the pr author will modify the 
-#identifier and doesn't need to add description if risk is none or low
-class RiskLevelSection(ParseSection):
-    def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False):
-        super().__init__(data, sections, debug)
-
-    def processIdentify(self, line):
-        if self.found:
-            #since we have already found the identifier, this means that we have
-            #found a duplicate of the identifier
-            self.error(f"duplicate line starting with \"{self.identifier}\"")
-            return Outcomes.ERROR
-        if line == "**Risk level: none | low | medium | high**":
-            #the user has not modified this line so a risk level was not chosen
-            self.error("risk level not chosen")
-            return Outcomes.ERROR
-        if "NONE" in line.upper() or "LOW" in line.upper():
-            # an explanation is not required for none or low so we can just
-            # move on to the next section or terminate if this is the last
-            if self.nextSection() == "SUCCESS":
-                return Outcomes.SUCCESS
-            else:
-                return Outcomes.NEXTSECTION
-        elif "MEDIUM" in line.upper() or "HIGH" in line.upper():
-            # an explanation is required so we don't change state
-            self.found = True
-            return Outcomes.CONTINUE
-        else:
-            #the pr author put something weird in for risk level
-            self.error("invalid choice for risk level")
-            return Outcomes.ERROR
-
 #Class that orchestrates the validation of the entire body
 class ValidateBody:
     def __init__(self, body: str, firstSection: str, sections: ParseSections, debug=False):
@@ -319,13 +345,11 @@ class ValidateBody:
         #get the data for that section
         data = self.sections.get(sectionName)
         if data is None:
-            print(f"parse section {sectionName} not found")
+            print(f"ERROR with your parse section setup. Parse section {sectionName} not found")
             exit(-3)
         
         #create the section
-        if data.name == "RISKLEVEL":
-            self.section = RiskLevelSection(data=data, sections=self.sections, debug=self.debug)
-        elif data.name == "CHECKLIST":
+        if data.name == "CHECKLIST":
             self.section = NoDataSection(data=data, sections=self.sections, debug=self.debug)
         else:
             self.section = ParseSection(data=data, sections=self.sections, debug=self.debug)
@@ -336,13 +360,13 @@ class ValidateBody:
         self.nextSection()
 
         #validate each line
-        for line in self.body.splitlines():
+        for lineno, line in enumerate(self.body.splitlines(), 1):
             #ignore empty lines
             if len(line) == 0:
                 continue
 
             #run the parse section validation
-            o = self.section.validateLine(line)
+            o = self.section.validateLine(line, lineno)
             
             #decide what to do based on outcome
             if o == Outcomes.ERROR:
@@ -353,7 +377,13 @@ class ValidateBody:
                 self.nextSection()
         #if we get through all the lines without a success outcome, then the 
         #body does not comply
-        self.section.error("template is not filled out properly")
+        if self.section.data.nextSection == "END":
+            if self.section.found:
+                self.section.error("","",f"Section {self.section.data.name} is missing a description")
+                return False
+            self.section.error("","",f"Missing section {self.section.data.name} at the end")
+            return False
+        self.section.error("","", "Please make sure you have filled out the template correctly. You can find a blank template in /.github/PULL_REQUEST_TEMPLATE.md")
         return False
         
 #Generate the validator for the current template.
@@ -361,20 +391,16 @@ class ValidateBody:
 def make_default_validator(body, debug=False):
     changelogs = ParseSectionData("CHANGELOGS",
         "### Change Logs",
-        {"_Describe context and summary for this change. Highlight if any code was copied._"},
-        "IMPACT")
+        {"_Describe context and summary for this change. Highlight if any code was copied._"})
     impact = ParseSectionData("IMPACT",
         "### Impact",
-        {"_Describe any public API or user-facing feature change or any performance impact._"},
-        "RISKLEVEL")
+        {"_Describe any public API or user-facing feature change or any performance impact._"})
     risklevel = RiskLevelData("RISKLEVEL",
-        "**Risk level:",
-        {"_Choose one. If medium or high, explain what verification was done to mitigate the risks._"},
-        "CHECKLIST")
+        "### Risk level ",
+        {"_If medium or high, explain what verification was done to mitigate the risks._"})
     checklist = ParseSectionData("CHECKLIST",
         "### Contributor's checklist",
-        {},
-        "SUCCESS")
+        {})
     parseSections = ParseSections([changelogs, impact, risklevel, checklist])
 
     return ValidateBody(body, "CHANGELOGS", parseSections, debug)
@@ -431,18 +457,14 @@ def test_body():
     good_impact[1] = "impact description"
 
     template_risklevel = [
-        "**Risk level: none | low | medium | high**",
+        "### Risk level (write none, low medium or high below)",
         "",
-        "_Choose one. If medium or high, explain what verification was done to mitigate the risks._",
+        "_If medium or high, explain what verification was done to mitigate the risks._",
         ""
     ]
 
-    good_risklevel_none = template_risklevel.copy()
-    good_risklevel_none[0] = "**Risk level: none**"
-
-    good_risklevel_medium = template_risklevel.copy()
-    good_risklevel_medium[0] = "**Risk level: medium**"
-    good_risklevel_medium[1] = "risklevel description" 
+    good_risklevel = template_risklevel.copy()
+    good_risklevel[1] = "none"
 
     template_checklist = [
         "### Contributor's checklist",
@@ -454,13 +476,14 @@ def test_body():
     ]
 
     #list of sections that when combined form a valid body
-    good_sections = [good_changelogs, good_impact, good_risklevel_none, template_checklist]
+    good_sections = [good_changelogs, good_impact, good_risklevel, template_checklist]
 
     #list of sections that when combined form the template
     template_sections = [template_changelogs, template_impact, template_risklevel, template_checklist]
 
     tests_passed = True
     #Test section not filled out
+    #no need to test checklist section
     for i in range(len(good_sections)-1):
         test_sections = []
         for j in range(len(good_sections)):
@@ -480,13 +503,13 @@ def test_body():
         tests_passed = run_test(f"duplicate section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
     
     #Test out of order section
-    for i in range(len(good_sections)):
+    for i in range(len(good_sections)-1):
         test_sections = []
         for j in range(len(good_sections)):
             test_sections.append(good_sections[j].copy())
-        k = (i + 3) % len(good_sections)
-        test_sections[i], test_sections[k] = test_sections[k],test_sections[i]
-        tests_passed = run_test(f"Swapped sections: {i}, {k}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
+        for k in range(i+1,len(good_sections)):
+            test_sections[i], test_sections[k] = test_sections[k],test_sections[i]
+            tests_passed = run_test(f"Swapped sections: {i}, {k}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
 
     #Test missing section
     for i in range(len(good_sections)):
@@ -496,28 +519,8 @@ def test_body():
                 test_sections.append(good_sections[j].copy())
         tests_passed = run_test(f"Missing Section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
     
-    #Test good body with risk level of none:
-    tests_passed = run_test("good documentation. risk level none", build_body(good_sections), True, DEBUG_MESSAGES) and tests_passed
-
-    #Test good body with risk level of medium:
-    risk_level_index = 2
-    good_medium_risk_level = good_sections.copy()
-    good_medium_risk_level[risk_level_index] = good_risklevel_medium
-    tests_passed = run_test("good documentation. risk level medium", build_body(good_medium_risk_level), True, DEBUG_MESSAGES) and tests_passed
-
-    #Test good body except medium risk level and there is no description
-    bad_medium_risk_level = good_sections.copy()
-    bad_risklevel_medium = good_risklevel_medium.copy()
-    bad_risklevel_medium[1] = ""
-    bad_medium_risk_level[risk_level_index] = bad_risklevel_medium
-    tests_passed = run_test("medium risk level but no description", build_body(bad_medium_risk_level), False, DEBUG_MESSAGES) and tests_passed
-
-    #Test unknown risk level:
-    unknow_risk_level = good_sections.copy()
-    unknow_risk_level_section = template_risklevel.copy()
-    unknow_risk_level_section[0] = "**Risk level: unknown**"
-    unknow_risk_level[risk_level_index] = unknow_risk_level_section
-    tests_passed = run_test("unknown risk level", build_body(unknow_risk_level), False, DEBUG_MESSAGES) and tests_passed
+    #Test good body:
+    tests_passed = run_test("good documentation", build_body(good_sections), True, DEBUG_MESSAGES) and tests_passed
 
     print("*****")
     if tests_passed: