You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ba...@apache.org on 2021/07/12 18:18:36 UTC

[systemds] branch master updated: [SYSTEMDS-3056] Python API replace operation

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 22b64f7  [SYSTEMDS-3056] Python API replace operation
22b64f7 is described below

commit 22b64f7bb2033d6095ecdb1c43ef0c64160d7e80
Author: baunsgaard <ba...@tugraz.at>
AuthorDate: Mon Jul 12 20:17:58 2021 +0200

    [SYSTEMDS-3056] Python API replace operation
---
 .../python/systemds/operator/algorithm/__init__.py |  2 +
 .../operator/algorithm/builtin/shortestPath.py     | 40 +++++++++++++
 src/main/python/systemds/operator/nodes/frame.py   | 10 +++-
 src/main/python/systemds/operator/nodes/matrix.py  | 12 ++++
 src/main/python/tests/frame/test_replace.py        | 68 ++++++++++++++++++++++
 src/main/python/tests/matrix/test_replace.py       | 61 +++++++++++++++++++
 6 files changed, 192 insertions(+), 1 deletion(-)

diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py
index 172e12b..377c248 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -98,6 +98,7 @@ from .builtin.scale import scale
 from .builtin.scaleApply import scaleApply 
 from .builtin.sherlock import sherlock 
 from .builtin.sherlockPredict import sherlockPredict 
+from .builtin.shortestPath import shortestPath 
 from .builtin.sigmoid import sigmoid 
 from .builtin.slicefinder import slicefinder 
 from .builtin.smote import smote 
@@ -192,6 +193,7 @@ __all__ = ['abstain',
  'scaleApply',
  'sherlock',
  'sherlockPredict',
+ 'shortestPath',
  'sigmoid',
  'slicefinder',
  'smote',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/shortestPath.py b/src/main/python/systemds/operator/algorithm/builtin/shortestPath.py
new file mode 100644
index 0000000..8625824
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/shortestPath.py
@@ -0,0 +1,40 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/shortestPath.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.script_building.dag import OutputType
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def shortestPath(G: Matrix,
+                 sourceNode: int,
+                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    
+    params_dict = {'G': G, 'sourceNode': sourceNode}
+    params_dict.update(kwargs)
+    return Matrix(G.sds_context,
+        'shortestPath',
+        named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/nodes/frame.py b/src/main/python/systemds/operator/nodes/frame.py
index 9c7d204..25f1b81 100644
--- a/src/main/python/systemds/operator/nodes/frame.py
+++ b/src/main/python/systemds/operator/nodes/frame.py
@@ -120,7 +120,15 @@ class Frame(OperationNode):
         """
         Column-wise frame concatenation, by concatenating the second frame as additional columns to the first frame. 
         :param: The other frame to bind to the right hand side.
-        :return: The OperationNode containing the concatenated frames.
+        :return: The Frame containing the concatenated frames.
         """
         return Frame(self.sds_context, "cbind", [self, other])
 
+    def replace(self, pattern:str, replacement:str) -> 'Frame':
+        """
+        Replace all instances of string with replacement string
+        :param: pattern the string to replace
+        :param: replacement the string to replace with
+        :return: The Frame containing the replaced values 
+        """
+        return Frame(self.sds_context, "replace", named_input_nodes={"target": self, "pattern": f"'{pattern}'", "replacement":f"'{replacement}'"})
diff --git a/src/main/python/systemds/operator/nodes/matrix.py b/src/main/python/systemds/operator/nodes/matrix.py
index e1322d4..f205b4d 100644
--- a/src/main/python/systemds/operator/nodes/matrix.py
+++ b/src/main/python/systemds/operator/nodes/matrix.py
@@ -348,3 +348,15 @@ class Matrix(OperationNode):
         """
         return Matrix(self.sds_context, 'rev', [self])
 
+    def round(self) -> 'Matrix':
+        """ round all values to nearest natural number
+
+        :return: The Matrix representing the result of this operation
+        """
+        return Matrix(self.sds_context, "round", [self])
+    
+    def replace(self, pattern:VALID_INPUT_TYPES, replacement:VALID_INPUT_TYPES) -> 'Matrix':
+        """
+        Replace all values with replacement value
+        """
+        return Matrix(self.sds_context, "replace", named_input_nodes={"target": self, "pattern": pattern, "replacement":replacement})
\ No newline at end of file
diff --git a/src/main/python/tests/frame/test_replace.py b/src/main/python/tests/frame/test_replace.py
new file mode 100644
index 0000000..7be735c
--- /dev/null
+++ b/src/main/python/tests/frame/test_replace.py
@@ -0,0 +1,68 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import json
+import os
+import shutil
+import sys
+import unittest
+
+import numpy as np
+import pandas as pd
+from systemds.context import SystemDSContext
+
+
+class TestReplaceFrame(unittest.TestCase):
+
+    sds: SystemDSContext = None
+    HOMES_PATH = "../../test/resources/datasets/homes/homes.csv"
+    HOMES_SCHEMA = '"int,string,int,int,double,int,boolean,int,int"'
+    JSPEC_PATH = "../../test/resources/datasets/homes/homes.tfspec_bin2.json"
+
+    @classmethod
+    def setUpClass(cls):
+        cls.sds = SystemDSContext()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.sds.close()
+
+    def tearDown(self):
+        pass
+
+    def test_apply_recode_bin(self):
+
+        F1 = self.sds.read(
+            self.HOMES_PATH,
+            data_type="frame",
+            schema=self.HOMES_SCHEMA,
+            format="csv",
+            header=True,
+        )
+        ret = F1.replace("north", "south").replace("west", "south").replace("east","south").compute()
+        self.assertTrue(any(ret.district == "south"))
+        self.assertTrue(not( any(ret.district == "north")))
+
+
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
diff --git a/src/main/python/tests/matrix/test_replace.py b/src/main/python/tests/matrix/test_replace.py
new file mode 100644
index 0000000..85bc3c4
--- /dev/null
+++ b/src/main/python/tests/matrix/test_replace.py
@@ -0,0 +1,61 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import json
+import os
+import random
+import shutil
+import sys
+import unittest
+
+import numpy as np
+import pandas as pd
+from systemds.context import SystemDSContext
+
+np.random.seed(7)
+shape = (25, 25)
+
+
+class TestReplaceMatrix(unittest.TestCase):
+
+    sds: SystemDSContext = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.sds = SystemDSContext()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.sds.close()
+
+    def tearDown(self):
+        pass
+
+    def test_replace_01(self):
+        m = self.sds.rand(min=0, max=2, rows=shape[0], cols=shape[1], seed=14)\
+            .round().replace(1, 2).compute()
+        self.assertTrue(1 not in m)
+        self.assertTrue(2 in m)
+        self.assertTrue(0 in m)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)