You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by zh...@apache.org on 2022/04/25 02:17:29 UTC

[dolphinscheduler] branch dev updated: [Improve][python] Support create table syntax and custom sql type param (#9673)

This is an automated email from the ASF dual-hosted git repository.

zhongjiajie pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/dolphinscheduler.git


The following commit(s) were added to refs/heads/dev by this push:
     new 8a8b63cd96 [Improve][python] Support create table syntax and custom sql type param (#9673)
8a8b63cd96 is described below

commit 8a8b63cd96ad0301322e25936f305ff595f317dc
Author: 陈家名 <13...@163.com>
AuthorDate: Mon Apr 25 10:17:20 2022 +0800

    [Improve][python] Support create table syntax and custom sql type param (#9673)
---
 .../src/pydolphinscheduler/tasks/sql.py            | 24 ++++++++++-
 .../pydolphinscheduler/tests/tasks/test_sql.py     | 46 +++++++++++++++-------
 2 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py
index b5be3e45aa..c15170c448 100644
--- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py
+++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py
@@ -17,6 +17,7 @@
 
 """Task sql."""
 
+import logging
 import re
 from typing import Dict, Optional
 
@@ -24,6 +25,8 @@ from pydolphinscheduler.constants import TaskType
 from pydolphinscheduler.core.database import Database
 from pydolphinscheduler.core.task import Task
 
+log = logging.getLogger(__file__)
+
 
 class SqlType:
     """SQL type, for now it just contain `SELECT` and `NO_SELECT`."""
@@ -61,6 +64,7 @@ class Sql(Task):
         name: str,
         datasource_name: str,
         sql: str,
+        sql_type: Optional[int] = None,
         pre_statements: Optional[str] = None,
         post_statements: Optional[str] = None,
         display_rows: Optional[int] = 10,
@@ -69,6 +73,7 @@ class Sql(Task):
     ):
         super().__init__(name, TaskType.SQL, *args, **kwargs)
         self.sql = sql
+        self.param_sql_type = sql_type
         self.datasource_name = datasource_name
         self.pre_statements = pre_statements or []
         self.post_statements = post_statements or []
@@ -76,9 +81,24 @@ class Sql(Task):
 
     @property
     def sql_type(self) -> int:
-        """Judgement sql type, use regexp to check which type of the sql is."""
+        """Judgement sql type, it will return the SQL type for type `SELECT` or `NOT_SELECT`.
+
+        If `param_sql_type` dot not specific, will use regexp to check
+        which type of the SQL is. But if `param_sql_type` is specific
+        will use the parameter overwrites the regexp way
+        """
+        if (
+            self.param_sql_type == SqlType.SELECT
+            or self.param_sql_type == SqlType.NOT_SELECT
+        ):
+            log.info(
+                "The sql type is specified by a parameter, with value %s",
+                self.param_sql_type,
+            )
+            return self.param_sql_type
         pattern_select_str = (
-            "^(?!(.* |)insert |(.* |)delete |(.* |)drop |(.* |)update |(.* |)alter ).*"
+            "^(?!(.* |)insert |(.* |)delete |(.* |)drop "
+            "|(.* |)update |(.* |)alter |(.* |)create ).*"
         )
         pattern_select = re.compile(pattern_select_str, re.IGNORECASE)
         if pattern_select.match(self.sql) is None:
diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py
index 3f8209c23e..74ac0f67ea 100644
--- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py
+++ b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py
@@ -26,24 +26,38 @@ from pydolphinscheduler.tasks.sql import Sql, SqlType
 
 
 @pytest.mark.parametrize(
-    "sql, sql_type",
+    "sql, param_sql_type, sql_type",
     [
-        ("select 1", SqlType.SELECT),
-        (" select 1", SqlType.SELECT),
-        (" select 1 ", SqlType.SELECT),
-        (" select 'insert' ", SqlType.SELECT),
-        (" select 'insert ' ", SqlType.SELECT),
-        ("with tmp as (select 1) select * from tmp ", SqlType.SELECT),
-        ("insert into table_name(col1, col2) value (val1, val2)", SqlType.NOT_SELECT),
+        ("select 1", None, SqlType.SELECT),
+        (" select 1", None, SqlType.SELECT),
+        (" select 1 ", None, SqlType.SELECT),
+        (" select 'insert' ", None, SqlType.SELECT),
+        (" select 'insert ' ", None, SqlType.SELECT),
+        ("with tmp as (select 1) select * from tmp ", None, SqlType.SELECT),
+        (
+            "insert into table_name(col1, col2) value (val1, val2)",
+            None,
+            SqlType.NOT_SELECT,
+        ),
         (
             "insert into table_name(select, col2) value ('select', val2)",
+            None,
+            SqlType.NOT_SELECT,
+        ),
+        ("update table_name SET col1=val1 where col1=val2", None, SqlType.NOT_SELECT),
+        (
+            "update table_name SET col1='select' where col1=val2",
+            None,
             SqlType.NOT_SELECT,
         ),
-        ("update table_name SET col1=val1 where col1=val2", SqlType.NOT_SELECT),
-        ("update table_name SET col1='select' where col1=val2", SqlType.NOT_SELECT),
-        ("delete from table_name where id < 10", SqlType.NOT_SELECT),
-        ("delete from table_name where id < 10", SqlType.NOT_SELECT),
-        ("alter table table_name add column col1 int", SqlType.NOT_SELECT),
+        ("delete from table_name where id < 10", None, SqlType.NOT_SELECT),
+        ("delete from table_name where id < 10", None, SqlType.NOT_SELECT),
+        ("alter table table_name add column col1 int", None, SqlType.NOT_SELECT),
+        ("create table table_name2 (col1 int)", None, SqlType.NOT_SELECT),
+        ("create table table_name2 (col1 int)", SqlType.SELECT, SqlType.SELECT),
+        ("select 1", SqlType.NOT_SELECT, SqlType.NOT_SELECT),
+        ("create table table_name2 (col1 int)", SqlType.NOT_SELECT, SqlType.NOT_SELECT),
+        ("select 1", SqlType.SELECT, SqlType.SELECT),
     ],
 )
 @patch(
@@ -54,11 +68,13 @@ from pydolphinscheduler.tasks.sql import Sql, SqlType
     "pydolphinscheduler.core.database.Database.get_database_info",
     return_value=({"id": 1, "type": "mock_type"}),
 )
-def test_get_sql_type(mock_datasource, mock_code_version, sql, sql_type):
+def test_get_sql_type(
+    mock_datasource, mock_code_version, sql, param_sql_type, sql_type
+):
     """Test property sql_type could return correct type."""
     name = "test_get_sql_type"
     datasource_name = "test_datasource"
-    task = Sql(name, datasource_name, sql)
+    task = Sql(name, datasource_name, sql, sql_type=param_sql_type)
     assert (
         sql_type == task.sql_type
     ), f"Sql {sql} expect sql type is {sql_type} but got {task.sql_type}"