You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by xu...@apache.org on 2022/10/17 04:42:25 UTC
[hudi] branch master updated: [HUDI-5022] Make better error messages for pr compliance (#6934)
This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 1dfa1634cf [HUDI-5022] Make better error messages for pr compliance (#6934)
1dfa1634cf is described below
commit 1dfa1634cfb6c2b90cc9e4d3a228418822699410
Author: Jon Vexler <jo...@onehouse.ai>
AuthorDate: Mon Oct 17 00:42:17 2022 -0400
[HUDI-5022] Make better error messages for pr compliance (#6934)
---
.github/PULL_REQUEST_TEMPLATE.md | 4 +-
.github/workflows/pr_compliance.yml | 2 +-
scripts/pr_compliance.py | 211 ++++++++++++++++++------------------
3 files changed, 110 insertions(+), 107 deletions(-)
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 17ad995a97..b1902aab5f 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,9 +6,9 @@ _Describe context and summary for this change. Highlight if any code was copied.
_Describe any public API or user-facing feature change or any performance impact._
-**Risk level: none | low | medium | high**
+### Risk level (write none, low medium or high below)
-_Choose one. If medium or high, explain what verification was done to mitigate the risks._
+_If medium or high, explain what verification was done to mitigate the risks._
### Documentation Update
diff --git a/.github/workflows/pr_compliance.yml b/.github/workflows/pr_compliance.yml
index 67affbb7b7..542a0a5467 100644
--- a/.github/workflows/pr_compliance.yml
+++ b/.github/workflows/pr_compliance.yml
@@ -15,7 +15,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- name: run script
- run: python3 scripts/pr_compliance.py > test.log || { echo "::error::pr_compliance.py $(cat test.log)" && exit 1; }
+ run: python3 scripts/pr_compliance.py
diff --git a/scripts/pr_compliance.py b/scripts/pr_compliance.py
index ea1e2ce3e2..5946a35587 100644
--- a/scripts/pr_compliance.py
+++ b/scripts/pr_compliance.py
@@ -18,7 +18,7 @@
import re
import os
import sys
-
+import inspect
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# _____ _ _ _ __ __ _ _ _ _ _ #
# |_ _(_) |_| | ___ \ \ / /_ _| (_) __| | __ _| |_(_) ___ _ __ #
@@ -130,14 +130,15 @@ class Outcomes:
# identifier: str - line that signifies the start of a section
# linesAfter: set of str - default lines in the template that we ignore when
# verifying that the user filled out the section
-# nextSection: str - The name of the next section or "SUCCESS" if this is the
-# last section
class ParseSectionData:
- def __init__(self, name: str, identifier: str, linesAfter, nextSection: str):
+ def __init__(self, name: str, identifier: str, linesAfter: str):
self.name = name
self.identifier = identifier
self.linesAfter = linesAfter
- self.nextSection = nextSection
+ self.prevSection = ""
+ self.nextSection = ""
+
+
#returns true if line matches the identifier
def identify(self, line: str):
@@ -151,8 +152,8 @@ class ParseSectionData:
#Special holder of data for risk level because the identifier line is modified
#by the user
class RiskLevelData(ParseSectionData):
- def __init__(self, name: str, identifier: str, linesAfter, nextSection: str):
- super().__init__(name, identifier, linesAfter, nextSection)
+ def __init__(self, name: str, identifier: str, linesAfter):
+ super().__init__(name, identifier, linesAfter)
#we check that the line start with the identifier because the identifier
#line will be modified when filled out correctly
@@ -165,8 +166,23 @@ class RiskLevelData(ParseSectionData):
class ParseSections:
def __init__(self, psd):
self.sections = {}
- for x in psd:
- self.sections[x.name] = x
+ assert len(psd) > 0
+ for i in range(len(psd)):
+ prevI = i - 1
+ nextI = i + 1
+ if prevI < 0:
+ psd[i].prevSection = "START"
+ else:
+ psd[i].prevSection = psd[prevI].name
+
+ if nextI >= len(psd):
+ psd[i].nextSection = "END"
+ else:
+ psd[i].nextSection = psd[nextI].name
+
+ self.sections[psd[i].name] = psd[i]
+
+
#returns true if line is an identifier for a section that is not value
# PARAMS
@@ -179,6 +195,18 @@ class ParseSections:
return True
return False
+ #gets the name of the section identified in the line
+ # PARAMS
+ # line: str - the line that we are parsing
+ # RETURN
+ # string - name of the section if the identifier is found, else none
+ def getSectionName(self, line: str):
+ for name in self.sections:
+ if self.sections[name].identify(line):
+ return name
+ return None
+
+
#returns the ParseSectionData that is named name
def get(self, name):
return self.sections.get(name)
@@ -200,9 +228,13 @@ class ParseSection:
self.sections = sections
#prints error message if debug is set to true
- def error(self, message):
+ def error(self, line: str, lineno: str, message: str):
if self.debug:
- print(f"ERROR:(state: {self.data.name}, found: {self.found}, message: {message}")
+ pyline = inspect.getframeinfo(inspect.stack()[1][0]).lineno
+ print(f"::error file=pr_compliance.py,line={pyline}::{message}")
+ if lineno != "" and line != "":
+ print(f"::error file=pr_compliance.py,line={pyline}::found on line {lineno}: {line}")
+ print(f"::debug::state: {self.data.name}, found: {self.found},")
#returns the name of the next section
def nextSection(self):
@@ -214,29 +246,55 @@ class ParseSection:
return self.found and self.data.identifyAfter(line)
#Decides what outcome occurs when the section identifier is found
- def processIdentify(self, line):
+ def processIdentify(self, line, lineno):
if self.found:
#since we have already found the identifier, this means that we have
#found a duplicate of the identifier
- self.error(f"duplicate line \"{line}\"")
+ self.error(line, lineno, f"Duplicate {self.data.name} section found")
return Outcomes.ERROR
self.found = True
return Outcomes.CONTINUE
+
+ def makeValidateOthersErrorMessage(self, line):
+ if self.found:
+ if self.nextSection() != "END" and self.sections.sections[self.nextSection()].identify(line):
+ #we found the next identifier but haven't found a description
+ #yet for this section
+ return f"Section {self.data.name} is missing a description"
+ #we found a section other than the next section
+ return f"Section {self.data.name} should be followed by section {self.data.nextSection}"
+
+ #section identifier has not been found yet
+ sectionFound = self.sections.getSectionName(line)
+ if sectionFound is None:
+ print("ERROR: none found even though validateOthers returned True")
+ exit(1)
+ elif sectionFound == self.data.prevSection:
+ #we have not found the current section identifier but we found the
+ #previous section identifier again
+ return f"Duplicate {self.data.prevSection} section found"
+
+ if self.data.prevSection == "START":
+ return f"Section {self.data.name} should be the first section"
+ if sectionFound == self.data.nextSection:
+ return f"Missing section {self.data.name} between {self.data.prevSection} and {self.data.nextSection}"
+ return f"Section {self.data.name} was expected after section {self.data.prevSection}"
#Decides the outcome state by processing line
- def validateLine(self,line):
+ def validateLine(self,line,lineno):
if self.data.identify(line):
#we have found the identifier so we should decide what to do
- return self.processIdentify(line)
+ return self.processIdentify(line,lineno)
elif self.sections.validateOthers(line, self.data.name):
#we have found the identifier for another section
- self.error(f"Out of order section or missing description \"{line}\"")
+ #figure out what the error is
+ self.error(line,lineno,self.makeValidateOthersErrorMessage(line))
return Outcomes.ERROR
elif self.validateAfter(line):
#the pr author has added new text to this section so we consider it
#to be filled out
- if self.nextSection() == "SUCCESS":
- #if next section is "SUCCESS" then there are no more sections
+ if self.nextSection() == "END":
+ #if next section is "END" then there are no more sections
#to process
return Outcomes.SUCCESS
return Outcomes.NEXTSECTION
@@ -250,47 +308,15 @@ class NoDataSection(ParseSection):
#After finding the identifier we don't need to look for anything else so we
#can just go to the next section or terminate if this is the last
- def processIdentify(self, line):
- o = super().processIdentify(line)
+ def processIdentify(self, line, lineno):
+ o = super().processIdentify(line, lineno)
if o == Outcomes.CONTINUE:
- if self.nextSection() == "SUCCESS":
+ if self.nextSection() == "END":
return Outcomes.SUCCESS
else:
return Outcomes.NEXTSECTION
return o
-#Risk level has different processing because the pr author will modify the
-#identifier and doesn't need to add description if risk is none or low
-class RiskLevelSection(ParseSection):
- def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False):
- super().__init__(data, sections, debug)
-
- def processIdentify(self, line):
- if self.found:
- #since we have already found the identifier, this means that we have
- #found a duplicate of the identifier
- self.error(f"duplicate line starting with \"{self.identifier}\"")
- return Outcomes.ERROR
- if line == "**Risk level: none | low | medium | high**":
- #the user has not modified this line so a risk level was not chosen
- self.error("risk level not chosen")
- return Outcomes.ERROR
- if "NONE" in line.upper() or "LOW" in line.upper():
- # an explanation is not required for none or low so we can just
- # move on to the next section or terminate if this is the last
- if self.nextSection() == "SUCCESS":
- return Outcomes.SUCCESS
- else:
- return Outcomes.NEXTSECTION
- elif "MEDIUM" in line.upper() or "HIGH" in line.upper():
- # an explanation is required so we don't change state
- self.found = True
- return Outcomes.CONTINUE
- else:
- #the pr author put something weird in for risk level
- self.error("invalid choice for risk level")
- return Outcomes.ERROR
-
#Class that orchestrates the validation of the entire body
class ValidateBody:
def __init__(self, body: str, firstSection: str, sections: ParseSections, debug=False):
@@ -319,13 +345,11 @@ class ValidateBody:
#get the data for that section
data = self.sections.get(sectionName)
if data is None:
- print(f"parse section {sectionName} not found")
+ print(f"ERROR with your parse section setup. Parse section {sectionName} not found")
exit(-3)
#create the section
- if data.name == "RISKLEVEL":
- self.section = RiskLevelSection(data=data, sections=self.sections, debug=self.debug)
- elif data.name == "CHECKLIST":
+ if data.name == "CHECKLIST":
self.section = NoDataSection(data=data, sections=self.sections, debug=self.debug)
else:
self.section = ParseSection(data=data, sections=self.sections, debug=self.debug)
@@ -336,13 +360,13 @@ class ValidateBody:
self.nextSection()
#validate each line
- for line in self.body.splitlines():
+ for lineno, line in enumerate(self.body.splitlines(), 1):
#ignore empty lines
if len(line) == 0:
continue
#run the parse section validation
- o = self.section.validateLine(line)
+ o = self.section.validateLine(line, lineno)
#decide what to do based on outcome
if o == Outcomes.ERROR:
@@ -353,7 +377,13 @@ class ValidateBody:
self.nextSection()
#if we get through all the lines without a success outcome, then the
#body does not comply
- self.section.error("template is not filled out properly")
+ if self.section.data.nextSection == "END":
+ if self.section.found:
+ self.section.error("","",f"Section {self.section.data.name} is missing a description")
+ return False
+ self.section.error("","",f"Missing section {self.section.data.name} at the end")
+ return False
+ self.section.error("","", "Please make sure you have filled out the template correctly. You can find a blank template in /.github/PULL_REQUEST_TEMPLATE.md")
return False
#Generate the validator for the current template.
@@ -361,20 +391,16 @@ class ValidateBody:
def make_default_validator(body, debug=False):
changelogs = ParseSectionData("CHANGELOGS",
"### Change Logs",
- {"_Describe context and summary for this change. Highlight if any code was copied._"},
- "IMPACT")
+ {"_Describe context and summary for this change. Highlight if any code was copied._"})
impact = ParseSectionData("IMPACT",
"### Impact",
- {"_Describe any public API or user-facing feature change or any performance impact._"},
- "RISKLEVEL")
+ {"_Describe any public API or user-facing feature change or any performance impact._"})
risklevel = RiskLevelData("RISKLEVEL",
- "**Risk level:",
- {"_Choose one. If medium or high, explain what verification was done to mitigate the risks._"},
- "CHECKLIST")
+ "### Risk level ",
+ {"_If medium or high, explain what verification was done to mitigate the risks._"})
checklist = ParseSectionData("CHECKLIST",
"### Contributor's checklist",
- {},
- "SUCCESS")
+ {})
parseSections = ParseSections([changelogs, impact, risklevel, checklist])
return ValidateBody(body, "CHANGELOGS", parseSections, debug)
@@ -431,18 +457,14 @@ def test_body():
good_impact[1] = "impact description"
template_risklevel = [
- "**Risk level: none | low | medium | high**",
+ "### Risk level (write none, low medium or high below)",
"",
- "_Choose one. If medium or high, explain what verification was done to mitigate the risks._",
+ "_If medium or high, explain what verification was done to mitigate the risks._",
""
]
- good_risklevel_none = template_risklevel.copy()
- good_risklevel_none[0] = "**Risk level: none**"
-
- good_risklevel_medium = template_risklevel.copy()
- good_risklevel_medium[0] = "**Risk level: medium**"
- good_risklevel_medium[1] = "risklevel description"
+ good_risklevel = template_risklevel.copy()
+ good_risklevel[1] = "none"
template_checklist = [
"### Contributor's checklist",
@@ -454,13 +476,14 @@ def test_body():
]
#list of sections that when combined form a valid body
- good_sections = [good_changelogs, good_impact, good_risklevel_none, template_checklist]
+ good_sections = [good_changelogs, good_impact, good_risklevel, template_checklist]
#list of sections that when combined form the template
template_sections = [template_changelogs, template_impact, template_risklevel, template_checklist]
tests_passed = True
#Test section not filled out
+ #no need to test checklist section
for i in range(len(good_sections)-1):
test_sections = []
for j in range(len(good_sections)):
@@ -480,13 +503,13 @@ def test_body():
tests_passed = run_test(f"duplicate section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
#Test out of order section
- for i in range(len(good_sections)):
+ for i in range(len(good_sections)-1):
test_sections = []
for j in range(len(good_sections)):
test_sections.append(good_sections[j].copy())
- k = (i + 3) % len(good_sections)
- test_sections[i], test_sections[k] = test_sections[k],test_sections[i]
- tests_passed = run_test(f"Swapped sections: {i}, {k}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
+ for k in range(i+1,len(good_sections)):
+ test_sections[i], test_sections[k] = test_sections[k],test_sections[i]
+ tests_passed = run_test(f"Swapped sections: {i}, {k}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
#Test missing section
for i in range(len(good_sections)):
@@ -496,28 +519,8 @@ def test_body():
test_sections.append(good_sections[j].copy())
tests_passed = run_test(f"Missing Section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
- #Test good body with risk level of none:
- tests_passed = run_test("good documentation. risk level none", build_body(good_sections), True, DEBUG_MESSAGES) and tests_passed
-
- #Test good body with risk level of medium:
- risk_level_index = 2
- good_medium_risk_level = good_sections.copy()
- good_medium_risk_level[risk_level_index] = good_risklevel_medium
- tests_passed = run_test("good documentation. risk level medium", build_body(good_medium_risk_level), True, DEBUG_MESSAGES) and tests_passed
-
- #Test good body except medium risk level and there is no description
- bad_medium_risk_level = good_sections.copy()
- bad_risklevel_medium = good_risklevel_medium.copy()
- bad_risklevel_medium[1] = ""
- bad_medium_risk_level[risk_level_index] = bad_risklevel_medium
- tests_passed = run_test("medium risk level but no description", build_body(bad_medium_risk_level), False, DEBUG_MESSAGES) and tests_passed
-
- #Test unknown risk level:
- unknow_risk_level = good_sections.copy()
- unknow_risk_level_section = template_risklevel.copy()
- unknow_risk_level_section[0] = "**Risk level: unknown**"
- unknow_risk_level[risk_level_index] = unknow_risk_level_section
- tests_passed = run_test("unknown risk level", build_body(unknow_risk_level), False, DEBUG_MESSAGES) and tests_passed
+ #Test good body:
+ tests_passed = run_test("good documentation", build_body(good_sections), True, DEBUG_MESSAGES) and tests_passed
print("*****")
if tests_passed: