You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by ki...@apache.org on 2021/12/26 07:44:40 UTC

[dolphinscheduler] branch dev updated: [python] Fix task relation wrong hashable function (#7628)

This is an automated email from the ASF dual-hosted git repository.

kirs pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/dolphinscheduler.git


The following commit(s) were added to refs/heads/dev by this push:
     new b1afd99  [python] Fix task relation wrong hashable function (#7628)
b1afd99 is described below

commit b1afd99c9e4ba5f4fc3bb3ee3fbce5fe64cbc201
Author: Jiajie Zhong <zh...@hotmail.com>
AuthorDate: Sun Dec 26 15:44:30 2021 +0800

    [python] Fix task relation wrong hashable function (#7628)
    
    * [python] Fix task relation wrong hashable function
    
    Currently our :class:`TaskRelation` have wrong __hash__
    function, would mistake a object as same object. It failed
    our dependence and process definition. this patch fix it
    by adding `_KEY_ATTR` to overwrite TaskRelation's __eq__
    and correct __hash__ func
    
    close: #7627
    
    * Fix code format
---
 .../src/pydolphinscheduler/constants.py            |  1 +
 .../src/pydolphinscheduler/core/task.py            | 12 +++++-
 .../pydolphinscheduler/tests/core/test_task.py     | 46 +++++++++++++++++++++-
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py
index 940c749..84becbc 100644
--- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py
+++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py
@@ -104,6 +104,7 @@ class Delimiter(str):
     DASH = "/"
     COLON = ":"
     UNDERSCORE = "_"
+    DIRECTION = "->"
 
 
 class Time(str):
diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py
index 39cca9c..8a90efc 100644
--- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py
+++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py
@@ -21,6 +21,7 @@ import logging
 from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
 
 from pydolphinscheduler.constants import (
+    Delimiter,
     ProcessDefinitionDefault,
     TaskFlag,
     TaskPriority,
@@ -37,6 +38,13 @@ from pydolphinscheduler.java_gateway import launch_gateway
 class TaskRelation(Base):
     """TaskRelation object, describe the relation of exactly two tasks."""
 
+    # Add attr `_KEY_ATTR` to overwrite :func:`__eq__`, it is make set
+    # `Task.process_definition._task_relations` work correctly.
+    _KEY_ATTR = {
+        "pre_task_code",
+        "post_task_code",
+    }
+
     _DEFINE_ATTR = {
         "pre_task_code",
         "post_task_code",
@@ -61,7 +69,7 @@ class TaskRelation(Base):
         self.post_task_code = post_task_code
 
     def __hash__(self):
-        return hash(f"{self.post_task_code}, {self.post_task_code}")
+        return hash(f"{self.pre_task_code} {Delimiter.DIRECTION} {self.post_task_code}")
 
 
 class Task(Base):
@@ -219,6 +227,7 @@ class Task(Base):
                     task_relation = TaskRelation(
                         pre_task_code=task.code,
                         post_task_code=self.code,
+                        name=f"{task.name} {Delimiter.DIRECTION} {self.name}",
                     )
                     self.process_definition._task_relations.add(task_relation)
             else:
@@ -229,6 +238,7 @@ class Task(Base):
                     task_relation = TaskRelation(
                         pre_task_code=self.code,
                         post_task_code=task.code,
+                        name=f"{self.name} {Delimiter.DIRECTION} {task.name}",
                     )
                     self.process_definition._task_relations.add(task_relation)
 
diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py
index b551f07..6af731b 100644
--- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py
+++ b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py
@@ -24,6 +24,9 @@ import pytest
 from pydolphinscheduler.core.task import Task, TaskRelation
 from tests.testing.task import Task as testTask
 
+TEST_TASK_RELATION_SET = set()
+TEST_TASK_RELATION_SIZE = 0
+
 
 @pytest.mark.parametrize(
     "attr, expect",
@@ -66,6 +69,45 @@ def test_property_task_params(attr, expect):
     assert expect == task.task_params
 
 
+@pytest.mark.parametrize(
+    "pre_code, post_code, expect",
+    [
+        (123, 456, hash("123 -> 456")),
+        (12345678, 987654321, hash("12345678 -> 987654321")),
+    ],
+)
+def test_task_relation_hash_func(pre_code, post_code, expect):
+    """Test TaskRelation magic function :func:`__hash__`."""
+    task_param = TaskRelation(pre_task_code=pre_code, post_task_code=post_code)
+    assert hash(task_param) == expect
+
+
+@pytest.mark.parametrize(
+    "pre_code, post_code, size_add",
+    [
+        (123, 456, 1),
+        (123, 456, 0),
+        (456, 456, 1),
+        (123, 123, 1),
+        (456, 123, 1),
+        (0, 456, 1),
+        (123, 0, 1),
+    ],
+)
+def test_task_relation_add_to_set(pre_code, post_code, size_add):
+    """Test TaskRelation with different pre_code and post_code add to set behavior.
+
+    Here we use global variable to keep set of :class:`TaskRelation` instance and the number we expect
+    of the size when we add a new task relation to exists set.
+    """
+    task_relation = TaskRelation(pre_task_code=pre_code, post_task_code=post_code)
+    TEST_TASK_RELATION_SET.add(task_relation)
+    # hint python interpreter use global variable instead of local's
+    global TEST_TASK_RELATION_SIZE
+    TEST_TASK_RELATION_SIZE += size_add
+    assert len(TEST_TASK_RELATION_SET) == TEST_TASK_RELATION_SIZE
+
+
 def test_task_relation_to_dict():
     """Test TaskRelation object function to_dict."""
     pre_task_code = 123
@@ -79,10 +121,10 @@ def test_task_relation_to_dict():
         "conditionType": 0,
         "conditionParams": {},
     }
-    task_param = TaskRelation(
+    task_relation = TaskRelation(
         pre_task_code=pre_task_code, post_task_code=post_task_code
     )
-    assert task_param.get_define() == expect
+    assert task_relation.get_define() == expect
 
 
 def test_task_get_define():