You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2022/08/19 10:54:29 UTC

[GitHub] [airflow] ashb commented on a diff in pull request #25164: Common SQLCheckOperators Various Functionality Update

ashb commented on code in PR #25164:
URL: https://github.com/apache/airflow/pull/25164#discussion_r950065201


##########
airflow/providers/common/sql/operators/sql.py:
##########
@@ -273,38 +303,38 @@ def __init__(
 
         self.table = table
         self.checks = checks
+        self.partition_clause = partition_clause
         # OpenLineage needs a valid SQL query with the input/output table(s) to parse
         self.sql = f"SELECT * FROM {self.table};"
 
     def execute(self, context=None):
         hook = self.get_db_hook()
-
-        check_names = [*self.checks]
-        check_mins_sql = ",".join(
-            self.sql_min_template.replace("check_name", check_name) for check_name in check_names
-        )
-        checks_sql = ",".join(
+        checks_sql = " UNION ALL ".join(
             [
-                self.sql_check_template.replace("check_statement", value["check_statement"]).replace(
-                    "check_name", check_name
-                )
+                self.sql_check_template.replace("check_statement", value["check_statement"])
+                .replace("_check_name", check_name)
+                .replace("table", self.table)
                 for check_name, value in self.checks.items()
             ]
         )
+        partition_clause_statement = f"WHERE {self.partition_clause}" if self.partition_clause else ""
+        self.sql = f"SELECT check_name, check_result FROM ({checks_sql}) "
+        f"AS check_table {partition_clause_statement};"

Review Comment:
   Whoops. Missing `+` here. This string is just a literal, not part of `self.sql` (and thus the entire `partition_clause_statement` isn't used!



##########
airflow/providers/common/sql/operators/sql.py:
##########
@@ -273,38 +303,38 @@ def __init__(
 
         self.table = table
         self.checks = checks
+        self.partition_clause = partition_clause
         # OpenLineage needs a valid SQL query with the input/output table(s) to parse
         self.sql = f"SELECT * FROM {self.table};"
 
     def execute(self, context=None):
         hook = self.get_db_hook()
-
-        check_names = [*self.checks]
-        check_mins_sql = ",".join(
-            self.sql_min_template.replace("check_name", check_name) for check_name in check_names
-        )
-        checks_sql = ",".join(
+        checks_sql = " UNION ALL ".join(
             [
-                self.sql_check_template.replace("check_statement", value["check_statement"]).replace(
-                    "check_name", check_name
-                )
+                self.sql_check_template.replace("check_statement", value["check_statement"])
+                .replace("_check_name", check_name)
+                .replace("table", self.table)
                 for check_name, value in self.checks.items()
             ]
         )
+        partition_clause_statement = f"WHERE {self.partition_clause}" if self.partition_clause else ""
+        self.sql = f"SELECT check_name, check_result FROM ({checks_sql}) "
+        f"AS check_table {partition_clause_statement};"
 
-        self.sql = f"SELECT {check_mins_sql} FROM (SELECT {checks_sql} FROM {self.table});"
-        records = hook.get_first(self.sql)
+        records = hook.get_pandas_df(self.sql)

Review Comment:
   Why did we change from getting records to getting this as a pandas dataframe? This now places a _hard_ requirement on using pandas for this operator, where as previously pandas was almost entirely optional.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@airflow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org