You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by zh...@apache.org on 2022/04/25 02:17:29 UTC
[dolphinscheduler] branch dev updated: [Improve][python] Support create table syntax and custom sql type param (#9673)
This is an automated email from the ASF dual-hosted git repository.
zhongjiajie pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/dolphinscheduler.git
The following commit(s) were added to refs/heads/dev by this push:
new 8a8b63cd96 [Improve][python] Support create table syntax and custom sql type param (#9673)
8a8b63cd96 is described below
commit 8a8b63cd96ad0301322e25936f305ff595f317dc
Author: 陈家名 <13...@163.com>
AuthorDate: Mon Apr 25 10:17:20 2022 +0800
[Improve][python] Support create table syntax and custom sql type param (#9673)
---
.../src/pydolphinscheduler/tasks/sql.py | 24 ++++++++++-
.../pydolphinscheduler/tests/tasks/test_sql.py | 46 +++++++++++++++-------
2 files changed, 53 insertions(+), 17 deletions(-)
diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py
index b5be3e45aa..c15170c448 100644
--- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py
+++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py
@@ -17,6 +17,7 @@
"""Task sql."""
+import logging
import re
from typing import Dict, Optional
@@ -24,6 +25,8 @@ from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.database import Database
from pydolphinscheduler.core.task import Task
+log = logging.getLogger(__file__)
+
class SqlType:
"""SQL type, for now it just contain `SELECT` and `NO_SELECT`."""
@@ -61,6 +64,7 @@ class Sql(Task):
name: str,
datasource_name: str,
sql: str,
+ sql_type: Optional[int] = None,
pre_statements: Optional[str] = None,
post_statements: Optional[str] = None,
display_rows: Optional[int] = 10,
@@ -69,6 +73,7 @@ class Sql(Task):
):
super().__init__(name, TaskType.SQL, *args, **kwargs)
self.sql = sql
+ self.param_sql_type = sql_type
self.datasource_name = datasource_name
self.pre_statements = pre_statements or []
self.post_statements = post_statements or []
@@ -76,9 +81,24 @@ class Sql(Task):
@property
def sql_type(self) -> int:
- """Judgement sql type, use regexp to check which type of the sql is."""
+ """Judgement sql type, it will return the SQL type for type `SELECT` or `NOT_SELECT`.
+
+ If `param_sql_type` dot not specific, will use regexp to check
+ which type of the SQL is. But if `param_sql_type` is specific
+ will use the parameter overwrites the regexp way
+ """
+ if (
+ self.param_sql_type == SqlType.SELECT
+ or self.param_sql_type == SqlType.NOT_SELECT
+ ):
+ log.info(
+ "The sql type is specified by a parameter, with value %s",
+ self.param_sql_type,
+ )
+ return self.param_sql_type
pattern_select_str = (
- "^(?!(.* |)insert |(.* |)delete |(.* |)drop |(.* |)update |(.* |)alter ).*"
+ "^(?!(.* |)insert |(.* |)delete |(.* |)drop "
+ "|(.* |)update |(.* |)alter |(.* |)create ).*"
)
pattern_select = re.compile(pattern_select_str, re.IGNORECASE)
if pattern_select.match(self.sql) is None:
diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py
index 3f8209c23e..74ac0f67ea 100644
--- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py
+++ b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py
@@ -26,24 +26,38 @@ from pydolphinscheduler.tasks.sql import Sql, SqlType
@pytest.mark.parametrize(
- "sql, sql_type",
+ "sql, param_sql_type, sql_type",
[
- ("select 1", SqlType.SELECT),
- (" select 1", SqlType.SELECT),
- (" select 1 ", SqlType.SELECT),
- (" select 'insert' ", SqlType.SELECT),
- (" select 'insert ' ", SqlType.SELECT),
- ("with tmp as (select 1) select * from tmp ", SqlType.SELECT),
- ("insert into table_name(col1, col2) value (val1, val2)", SqlType.NOT_SELECT),
+ ("select 1", None, SqlType.SELECT),
+ (" select 1", None, SqlType.SELECT),
+ (" select 1 ", None, SqlType.SELECT),
+ (" select 'insert' ", None, SqlType.SELECT),
+ (" select 'insert ' ", None, SqlType.SELECT),
+ ("with tmp as (select 1) select * from tmp ", None, SqlType.SELECT),
+ (
+ "insert into table_name(col1, col2) value (val1, val2)",
+ None,
+ SqlType.NOT_SELECT,
+ ),
(
"insert into table_name(select, col2) value ('select', val2)",
+ None,
+ SqlType.NOT_SELECT,
+ ),
+ ("update table_name SET col1=val1 where col1=val2", None, SqlType.NOT_SELECT),
+ (
+ "update table_name SET col1='select' where col1=val2",
+ None,
SqlType.NOT_SELECT,
),
- ("update table_name SET col1=val1 where col1=val2", SqlType.NOT_SELECT),
- ("update table_name SET col1='select' where col1=val2", SqlType.NOT_SELECT),
- ("delete from table_name where id < 10", SqlType.NOT_SELECT),
- ("delete from table_name where id < 10", SqlType.NOT_SELECT),
- ("alter table table_name add column col1 int", SqlType.NOT_SELECT),
+ ("delete from table_name where id < 10", None, SqlType.NOT_SELECT),
+ ("delete from table_name where id < 10", None, SqlType.NOT_SELECT),
+ ("alter table table_name add column col1 int", None, SqlType.NOT_SELECT),
+ ("create table table_name2 (col1 int)", None, SqlType.NOT_SELECT),
+ ("create table table_name2 (col1 int)", SqlType.SELECT, SqlType.SELECT),
+ ("select 1", SqlType.NOT_SELECT, SqlType.NOT_SELECT),
+ ("create table table_name2 (col1 int)", SqlType.NOT_SELECT, SqlType.NOT_SELECT),
+ ("select 1", SqlType.SELECT, SqlType.SELECT),
],
)
@patch(
@@ -54,11 +68,13 @@ from pydolphinscheduler.tasks.sql import Sql, SqlType
"pydolphinscheduler.core.database.Database.get_database_info",
return_value=({"id": 1, "type": "mock_type"}),
)
-def test_get_sql_type(mock_datasource, mock_code_version, sql, sql_type):
+def test_get_sql_type(
+ mock_datasource, mock_code_version, sql, param_sql_type, sql_type
+):
"""Test property sql_type could return correct type."""
name = "test_get_sql_type"
datasource_name = "test_datasource"
- task = Sql(name, datasource_name, sql)
+ task = Sql(name, datasource_name, sql, sql_type=param_sql_type)
assert (
sql_type == task.sql_type
), f"Sql {sql} expect sql type is {sql_type} but got {task.sql_type}"